libstdc++
simd.h
1// Definition of the public simd interfaces -*- C++ -*-
2
3// Copyright (C) 2020-2023 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26#define _GLIBCXX_EXPERIMENTAL_SIMD_H
27
28#if __cplusplus >= 201703L
29
30#include "simd_detail.h"
31#include "numeric_traits.h"
32#include <bit>
33#include <bitset>
34#ifdef _GLIBCXX_DEBUG_UB
35#include <cstdio> // for stderr
36#endif
37#include <cstring>
38#include <cmath>
39#include <functional>
40#include <iosfwd>
41#include <utility>
42
43#if _GLIBCXX_SIMD_X86INTRIN
44#include <x86intrin.h>
45#elif _GLIBCXX_SIMD_HAVE_NEON
46#pragma GCC diagnostic push
47// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
48// 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
49#pragma GCC diagnostic ignored "-Wnarrowing"
50#include <arm_neon.h>
51#pragma GCC diagnostic pop
52#endif
53
54/** @ingroup ts_simd
55 * @{
56 */
57/* There are several closely related types, with the following naming
58 * convention:
59 * _Tp: vectorizable (arithmetic) type (or any type)
60 * _TV: __vector_type_t<_Tp, _Np>
61 * _TW: _SimdWrapper<_Tp, _Np>
62 * _TI: __intrinsic_type_t<_Tp, _Np>
63 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
64 * If one additional type is needed use _U instead of _T.
65 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
66 *
67 * More naming conventions:
68 * _Ap or _Abi: An ABI tag from the simd_abi namespace
69 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
70 * _IV, _IW as for _TV, _TW
71 * _Np: number of elements (not bytes)
72 * _Bytes: number of bytes
73 *
74 * Variable names:
75 * __k: mask object (vector- or bitmask)
76 */
77_GLIBCXX_SIMD_BEGIN_NAMESPACE
78
79#if !_GLIBCXX_SIMD_X86INTRIN
80using __m128 [[__gnu__::__vector_size__(16)]] = float;
81using __m128d [[__gnu__::__vector_size__(16)]] = double;
82using __m128i [[__gnu__::__vector_size__(16)]] = long long;
83using __m256 [[__gnu__::__vector_size__(32)]] = float;
84using __m256d [[__gnu__::__vector_size__(32)]] = double;
85using __m256i [[__gnu__::__vector_size__(32)]] = long long;
86using __m512 [[__gnu__::__vector_size__(64)]] = float;
87using __m512d [[__gnu__::__vector_size__(64)]] = double;
88using __m512i [[__gnu__::__vector_size__(64)]] = long long;
89#endif
90
91namespace simd_abi {
92// simd_abi forward declarations {{{
93// implementation details:
94struct _Scalar;
95
96template <int _Np>
97 struct _Fixed;
98
99// There are two major ABIs that appear on different architectures.
100// Both have non-boolean values packed into an N Byte register
101// -> #elements = N / sizeof(T)
102// Masks differ:
103// 1. Use value vector registers for masks (all 0 or all 1)
104// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
105// value vector
106//
107// Both can be partially used, masking off the rest when doing horizontal
108// operations or operations that can trap (e.g. FP_INVALID or integer division
109// by 0). This is encoded as the number of used bytes.
110template <int _UsedBytes>
111 struct _VecBuiltin;
112
113template <int _UsedBytes>
114 struct _VecBltnBtmsk;
115
116template <typename _Tp, int _Np>
117 using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
118
119template <int _UsedBytes = 16>
120 using _Sse = _VecBuiltin<_UsedBytes>;
121
122template <int _UsedBytes = 32>
123 using _Avx = _VecBuiltin<_UsedBytes>;
124
125template <int _UsedBytes = 64>
126 using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
127
128template <int _UsedBytes = 16>
129 using _Neon = _VecBuiltin<_UsedBytes>;
130
131// implementation-defined:
132using __sse = _Sse<>;
133using __avx = _Avx<>;
134using __avx512 = _Avx512<>;
135using __neon = _Neon<>;
136using __neon128 = _Neon<16>;
137using __neon64 = _Neon<8>;
138
139// standard:
140template <typename _Tp, size_t _Np, typename...>
141 struct deduce;
142
143template <int _Np>
144 using fixed_size = _Fixed<_Np>;
145
146using scalar = _Scalar;
147
148// }}}
149} // namespace simd_abi
150// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
151template <typename _Tp>
152 struct is_simd;
153
154template <typename _Tp>
155 struct is_simd_mask;
156
157template <typename _Tp, typename _Abi>
158 class simd;
159
160template <typename _Tp, typename _Abi>
161 class simd_mask;
162
163template <typename _Tp, typename _Abi>
164 struct simd_size;
165
166// }}}
167// load/store flags {{{
168struct element_aligned_tag
169{
170 template <typename _Tp, typename _Up = typename _Tp::value_type>
171 static constexpr size_t _S_alignment = alignof(_Up);
172
173 template <typename _Tp, typename _Up>
174 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
175 _S_apply(_Up* __ptr)
176 { return __ptr; }
177};
178
179struct vector_aligned_tag
180{
181 template <typename _Tp, typename _Up = typename _Tp::value_type>
182 static constexpr size_t _S_alignment
183 = std::__bit_ceil(sizeof(_Up) * _Tp::size());
184
185 template <typename _Tp, typename _Up>
186 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
187 _S_apply(_Up* __ptr)
188 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
189};
190
191template <size_t _Np> struct overaligned_tag
192{
193 template <typename _Tp, typename _Up = typename _Tp::value_type>
194 static constexpr size_t _S_alignment = _Np;
195
196 template <typename _Tp, typename _Up>
197 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
198 _S_apply(_Up* __ptr)
199 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
200};
201
202inline constexpr element_aligned_tag element_aligned = {};
203
204inline constexpr vector_aligned_tag vector_aligned = {};
205
206template <size_t _Np>
207 inline constexpr overaligned_tag<_Np> overaligned = {};
208
209// }}}
210template <size_t _Xp>
211 using _SizeConstant = integral_constant<size_t, _Xp>;
212// constexpr feature detection{{{
213constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
214constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
215constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
216constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
217constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
218constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
219constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
220constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
221constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
222constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
223constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
224constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
225constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
226constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
227constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
228constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
229constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
230constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
231constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
232constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
233constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
234constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
235constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
236constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
237constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
238constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
239constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
240constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
241constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
242constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
243constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
244constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
245
246constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
247constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
248constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
249constexpr inline bool __support_neon_float =
250#if defined __GCC_IEC_559
251 __GCC_IEC_559 == 0;
252#elif defined __FAST_MATH__
253 true;
254#else
255 false;
256#endif
257
258#ifdef _ARCH_PWR10
259constexpr inline bool __have_power10vec = true;
260#else
261constexpr inline bool __have_power10vec = false;
262#endif
263#ifdef __POWER9_VECTOR__
264constexpr inline bool __have_power9vec = true;
265#else
266constexpr inline bool __have_power9vec = false;
267#endif
268#if defined __POWER8_VECTOR__
269constexpr inline bool __have_power8vec = true;
270#else
271constexpr inline bool __have_power8vec = __have_power9vec;
272#endif
273#if defined __VSX__
274constexpr inline bool __have_power_vsx = true;
275#else
276constexpr inline bool __have_power_vsx = __have_power8vec;
277#endif
278#if defined __ALTIVEC__
279constexpr inline bool __have_power_vmx = true;
280#else
281constexpr inline bool __have_power_vmx = __have_power_vsx;
282#endif
283
284// }}}
285
286namespace __detail
287{
288#ifdef math_errhandling
289 // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
290 // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
291 // must be guessed.
292 template <int = math_errhandling>
293 constexpr bool
294 __handle_fpexcept_impl(int)
295 { return math_errhandling & MATH_ERREXCEPT; }
296#endif
297
298 // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
299 // ignored, otherwise implement correct exception behavior.
300 constexpr bool
301 __handle_fpexcept_impl(float)
302 {
303#if defined __FAST_MATH__
304 return false;
305#else
306 return true;
307#endif
308 }
309
310 /// True if math functions must raise floating-point exceptions as specified by C17.
311 static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
312
313 constexpr std::uint_least64_t
314 __floating_point_flags()
315 {
316 std::uint_least64_t __flags = 0;
317 if constexpr (_S_handle_fpexcept)
318 __flags |= 1;
319#ifdef __FAST_MATH__
320 __flags |= 1 << 1;
321#elif __FINITE_MATH_ONLY__
322 __flags |= 2 << 1;
323#elif __GCC_IEC_559 < 2
324 __flags |= 3 << 1;
325#endif
326 __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
327 return __flags;
328 }
329
330 constexpr std::uint_least64_t
331 __machine_flags()
332 {
333 if constexpr (__have_mmx || __have_sse)
334 return __have_mmx
335 | (__have_sse << 1)
336 | (__have_sse2 << 2)
337 | (__have_sse3 << 3)
338 | (__have_ssse3 << 4)
339 | (__have_sse4_1 << 5)
340 | (__have_sse4_2 << 6)
341 | (__have_xop << 7)
342 | (__have_avx << 8)
343 | (__have_avx2 << 9)
344 | (__have_bmi << 10)
345 | (__have_bmi2 << 11)
346 | (__have_lzcnt << 12)
347 | (__have_sse4a << 13)
348 | (__have_fma << 14)
349 | (__have_fma4 << 15)
350 | (__have_f16c << 16)
351 | (__have_popcnt << 17)
352 | (__have_avx512f << 18)
353 | (__have_avx512dq << 19)
354 | (__have_avx512vl << 20)
355 | (__have_avx512bw << 21)
356 | (__have_avx512bitalg << 22)
357 | (__have_avx512vbmi2 << 23)
358 | (__have_avx512vbmi << 24)
359 | (__have_avx512ifma << 25)
360 | (__have_avx512cd << 26)
361 | (__have_avx512vnni << 27)
362 | (__have_avx512vpopcntdq << 28)
363 | (__have_avx512vp2intersect << 29);
364 else if constexpr (__have_neon)
365 return __have_neon
366 | (__have_neon_a32 << 1)
367 | (__have_neon_a64 << 2)
368 | (__have_neon_a64 << 2)
369 | (__support_neon_float << 3);
370 else if constexpr (__have_power_vmx)
371 return __have_power_vmx
372 | (__have_power_vsx << 1)
373 | (__have_power8vec << 2)
374 | (__have_power9vec << 3)
375 | (__have_power10vec << 4);
376 else
377 return 0;
378 }
379
380 namespace
381 {
382 struct _OdrEnforcer {};
383 }
384
385 template <std::uint_least64_t...>
386 struct _MachineFlagsTemplate {};
387
388 /**@internal
389 * Use this type as default template argument to all function templates that
390 * are not declared always_inline. It ensures, that a function
391 * specialization, which the compiler decides not to inline, has a unique symbol
392 * (_OdrEnforcer) or a symbol matching the machine/architecture flags
393 * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
394 * users link TUs compiled with different flags. This is especially important
395 * for using simd in libraries.
396 */
397 using __odr_helper
398 = conditional_t<__machine_flags() == 0, _OdrEnforcer,
399 _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
400
401 struct _Minimum
402 {
403 template <typename _Tp>
404 _GLIBCXX_SIMD_INTRINSIC constexpr
405 _Tp
406 operator()(_Tp __a, _Tp __b) const
407 {
408 using std::min;
409 return min(__a, __b);
410 }
411 };
412
413 struct _Maximum
414 {
415 template <typename _Tp>
416 _GLIBCXX_SIMD_INTRINSIC constexpr
417 _Tp
418 operator()(_Tp __a, _Tp __b) const
419 {
420 using std::max;
421 return max(__a, __b);
422 }
423 };
424} // namespace __detail
425
426// unrolled/pack execution helpers
427// __execute_n_times{{{
428template <typename _Fp, size_t... _I>
429 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
430 void
431 __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
432 { ((void)__f(_SizeConstant<_I>()), ...); }
433
434template <typename _Fp>
435 _GLIBCXX_SIMD_INTRINSIC constexpr void
436 __execute_on_index_sequence(_Fp&&, index_sequence<>)
437 { }
438
439template <size_t _Np, typename _Fp>
440 _GLIBCXX_SIMD_INTRINSIC constexpr void
441 __execute_n_times(_Fp&& __f)
442 {
443 __execute_on_index_sequence(static_cast<_Fp&&>(__f),
444 make_index_sequence<_Np>{});
445 }
446
447// }}}
448// __generate_from_n_evaluations{{{
449template <typename _R, typename _Fp, size_t... _I>
450 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
451 _R
452 __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
453 { return _R{__f(_SizeConstant<_I>())...}; }
454
455template <size_t _Np, typename _R, typename _Fp>
456 _GLIBCXX_SIMD_INTRINSIC constexpr _R
457 __generate_from_n_evaluations(_Fp&& __f)
458 {
459 return __execute_on_index_sequence_with_return<_R>(
460 static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
461 }
462
463// }}}
464// __call_with_n_evaluations{{{
465template <size_t... _I, typename _F0, typename _FArgs>
466 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
467 auto
468 __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
469 { return __f0(__fargs(_SizeConstant<_I>())...); }
470
471template <size_t _Np, typename _F0, typename _FArgs>
472 _GLIBCXX_SIMD_INTRINSIC constexpr auto
473 __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
474 {
475 return __call_with_n_evaluations(make_index_sequence<_Np>{},
476 static_cast<_F0&&>(__f0),
477 static_cast<_FArgs&&>(__fargs));
478 }
479
480// }}}
481// __call_with_subscripts{{{
482template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
483 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
484 auto
485 __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
486 { return __fun(__x[_First + _It]...); }
487
488template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
489 _GLIBCXX_SIMD_INTRINSIC constexpr auto
490 __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
491 {
492 return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
493 make_index_sequence<_Np>(),
494 static_cast<_Fp&&>(__fun));
495 }
496
497// }}}
498
499// vvv ---- type traits ---- vvv
500// integer type aliases{{{
501using _UChar = unsigned char;
502using _SChar = signed char;
503using _UShort = unsigned short;
504using _UInt = unsigned int;
505using _ULong = unsigned long;
506using _ULLong = unsigned long long;
507using _LLong = long long;
508
509//}}}
510// __first_of_pack{{{
511template <typename _T0, typename...>
512 struct __first_of_pack
513 { using type = _T0; };
514
515template <typename... _Ts>
516 using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
517
518//}}}
519// __value_type_or_identity_t {{{
520template <typename _Tp>
521 typename _Tp::value_type
522 __value_type_or_identity_impl(int);
523
524template <typename _Tp>
525 _Tp
526 __value_type_or_identity_impl(float);
527
528template <typename _Tp>
529 using __value_type_or_identity_t
530 = decltype(__value_type_or_identity_impl<_Tp>(int()));
531
532// }}}
533// __is_vectorizable {{{
534template <typename _Tp>
535 struct __is_vectorizable : public is_arithmetic<_Tp> {};
536
537template <>
538 struct __is_vectorizable<bool> : public false_type {};
539
540template <typename _Tp>
541 inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
542
543// Deduces to a vectorizable type
544template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
545 using _Vectorizable = _Tp;
546
547// }}}
548// _LoadStorePtr / __is_possible_loadstore_conversion {{{
549template <typename _Ptr, typename _ValueType>
550 struct __is_possible_loadstore_conversion
551 : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
552
553template <>
554 struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
555
556// Deduces to a type allowed for load/store with the given value type.
557template <typename _Ptr, typename _ValueType,
558 typename = enable_if_t<
559 __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
560 using _LoadStorePtr = _Ptr;
561
562// }}}
563// __is_bitmask{{{
564template <typename _Tp, typename = void_t<>>
565 struct __is_bitmask : false_type {};
566
567template <typename _Tp>
568 inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
569
570// the __mmaskXX case:
571template <typename _Tp>
572 struct __is_bitmask<_Tp,
573 void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
574 : true_type {};
575
576// }}}
577// __int_for_sizeof{{{
578#pragma GCC diagnostic push
579#pragma GCC diagnostic ignored "-Wpedantic"
580template <size_t _Bytes>
581 constexpr auto
582 __int_for_sizeof()
583 {
584 static_assert(_Bytes > 0);
585 if constexpr (_Bytes == sizeof(int))
586 return int();
587 #ifdef __clang__
588 else if constexpr (_Bytes == sizeof(char))
589 return char();
590 #else
591 else if constexpr (_Bytes == sizeof(_SChar))
592 return _SChar();
593 #endif
594 else if constexpr (_Bytes == sizeof(short))
595 return short();
596 #ifndef __clang__
597 else if constexpr (_Bytes == sizeof(long))
598 return long();
599 #endif
600 else if constexpr (_Bytes == sizeof(_LLong))
601 return _LLong();
602 #ifdef __SIZEOF_INT128__
603 else if constexpr (_Bytes == sizeof(__int128))
604 return __int128();
605 #endif // __SIZEOF_INT128__
606 else if constexpr (_Bytes % sizeof(int) == 0)
607 {
608 constexpr size_t _Np = _Bytes / sizeof(int);
609 struct _Ip
610 {
611 int _M_data[_Np];
612
613 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
614 operator&(_Ip __rhs) const
615 {
616 return __generate_from_n_evaluations<_Np, _Ip>(
617 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
618 return __rhs._M_data[__i] & _M_data[__i];
619 });
620 }
621
622 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
623 operator|(_Ip __rhs) const
624 {
625 return __generate_from_n_evaluations<_Np, _Ip>(
626 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
627 return __rhs._M_data[__i] | _M_data[__i];
628 });
629 }
630
631 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
632 operator^(_Ip __rhs) const
633 {
634 return __generate_from_n_evaluations<_Np, _Ip>(
635 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
636 return __rhs._M_data[__i] ^ _M_data[__i];
637 });
638 }
639
640 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
641 operator~() const
642 {
643 return __generate_from_n_evaluations<_Np, _Ip>(
644 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
645 }
646 };
647 return _Ip{};
648 }
649 else
650 static_assert(_Bytes == 0, "this should be unreachable");
651 }
652#pragma GCC diagnostic pop
653
654template <typename _Tp>
655 using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
656
657template <size_t _Np>
658 using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
659
660// }}}
661// __is_fixed_size_abi{{{
662template <typename _Tp>
663 struct __is_fixed_size_abi : false_type {};
664
665template <int _Np>
666 struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
667
668template <typename _Tp>
669 inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
670
671// }}}
672// __is_scalar_abi {{{
673template <typename _Abi>
674 constexpr bool
675 __is_scalar_abi()
676 { return is_same_v<simd_abi::scalar, _Abi>; }
677
678// }}}
679// __abi_bytes_v {{{
680template <template <int> class _Abi, int _Bytes>
681 constexpr int
682 __abi_bytes_impl(_Abi<_Bytes>*)
683 { return _Bytes; }
684
685template <typename _Tp>
686 constexpr int
687 __abi_bytes_impl(_Tp*)
688 { return -1; }
689
690template <typename _Abi>
691 inline constexpr int __abi_bytes_v
692 = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
693
694// }}}
695// __is_builtin_bitmask_abi {{{
696template <typename _Abi>
697 constexpr bool
698 __is_builtin_bitmask_abi()
699 { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
700
701// }}}
702// __is_sse_abi {{{
703template <typename _Abi>
704 constexpr bool
705 __is_sse_abi()
706 {
707 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
708 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
709 }
710
711// }}}
712// __is_avx_abi {{{
713template <typename _Abi>
714 constexpr bool
715 __is_avx_abi()
716 {
717 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
718 return _Bytes > 16 && _Bytes <= 32
719 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
720 }
721
722// }}}
723// __is_avx512_abi {{{
724template <typename _Abi>
725 constexpr bool
726 __is_avx512_abi()
727 {
728 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
729 return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
730 }
731
732// }}}
733// __is_neon_abi {{{
734template <typename _Abi>
735 constexpr bool
736 __is_neon_abi()
737 {
738 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
739 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
740 }
741
742// }}}
743// __make_dependent_t {{{
744template <typename, typename _Up>
745 struct __make_dependent
746 { using type = _Up; };
747
748template <typename _Tp, typename _Up>
749 using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
750
751// }}}
752// ^^^ ---- type traits ---- ^^^
753
754// __invoke_ub{{{
755template <typename... _Args>
756 [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
757 __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
758 {
759#ifdef _GLIBCXX_DEBUG_UB
760 __builtin_fprintf(stderr, __msg, __args...);
761 __builtin_trap();
762#else
763 __builtin_unreachable();
764#endif
765 }
766
767// }}}
768// __assert_unreachable{{{
769template <typename _Tp>
770 struct __assert_unreachable
771 { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
772
773// }}}
774// __size_or_zero_v {{{
775template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
776 constexpr size_t
777 __size_or_zero_dispatch(int)
778 { return _Np; }
779
780template <typename _Tp, typename _Ap>
781 constexpr size_t
782 __size_or_zero_dispatch(float)
783 { return 0; }
784
785template <typename _Tp, typename _Ap>
786 inline constexpr size_t __size_or_zero_v
787 = __size_or_zero_dispatch<_Tp, _Ap>(0);
788
789// }}}
790// __div_roundup {{{
791inline constexpr size_t
792__div_roundup(size_t __a, size_t __b)
793{ return (__a + __b - 1) / __b; }
794
795// }}}
796// _ExactBool{{{
797class _ExactBool
798{
799 const bool _M_data;
800
801public:
802 _GLIBCXX_SIMD_INTRINSIC constexpr
803 _ExactBool(bool __b) : _M_data(__b) {}
804
805 _ExactBool(int) = delete;
806
807 _GLIBCXX_SIMD_INTRINSIC constexpr
808 operator bool() const
809 { return _M_data; }
810};
811
812// }}}
813// __may_alias{{{
814/**@internal
815 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
816 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
817 * that support it).
818 */
819template <typename _Tp>
820 using __may_alias [[__gnu__::__may_alias__]] = _Tp;
821
822// }}}
823// _UnsupportedBase {{{
824// simd and simd_mask base for unsupported <_Tp, _Abi>
825struct _UnsupportedBase
826{
827 _UnsupportedBase() = delete;
828 _UnsupportedBase(const _UnsupportedBase&) = delete;
829 _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
830 ~_UnsupportedBase() = delete;
831};
832
833// }}}
834// _InvalidTraits {{{
835/**
836 * @internal
837 * Defines the implementation of __a given <_Tp, _Abi>.
838 *
839 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
840 * possible. Static assertions in the type definition do not suffice. It is
841 * important that SFINAE works.
842 */
843struct _InvalidTraits
844{
845 using _IsValid = false_type;
846 using _SimdBase = _UnsupportedBase;
847 using _MaskBase = _UnsupportedBase;
848
849 static constexpr size_t _S_full_size = 0;
850 static constexpr bool _S_is_partial = false;
851
852 static constexpr size_t _S_simd_align = 1;
853 struct _SimdImpl;
854 struct _SimdMember {};
855 struct _SimdCastType;
856
857 static constexpr size_t _S_mask_align = 1;
858 struct _MaskImpl;
859 struct _MaskMember {};
860 struct _MaskCastType;
861};
862
863// }}}
864// _SimdTraits {{{
865template <typename _Tp, typename _Abi, typename = void_t<>>
866 struct _SimdTraits : _InvalidTraits {};
867
868// }}}
869// __private_init, __bitset_init{{{
870/**
871 * @internal
872 * Tag used for private init constructor of simd and simd_mask
873 */
874inline constexpr struct _PrivateInit {} __private_init = {};
875
876inline constexpr struct _BitsetInit {} __bitset_init = {};
877
878// }}}
879// __is_narrowing_conversion<_From, _To>{{{
880template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
881 bool = is_arithmetic_v<_To>>
882 struct __is_narrowing_conversion;
883
884// ignore "signed/unsigned mismatch" in the following trait.
885// The implicit conversions will do the right thing here.
886template <typename _From, typename _To>
887 struct __is_narrowing_conversion<_From, _To, true, true>
888 : public __bool_constant<(
889 __digits_v<_From> > __digits_v<_To>
890 || __finite_max_v<_From> > __finite_max_v<_To>
891 || __finite_min_v<_From> < __finite_min_v<_To>
892 || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
893
894template <typename _Tp>
895 struct __is_narrowing_conversion<_Tp, bool, true, true>
896 : public true_type {};
897
898template <>
899 struct __is_narrowing_conversion<bool, bool, true, true>
900 : public false_type {};
901
902template <typename _Tp>
903 struct __is_narrowing_conversion<_Tp, _Tp, true, true>
904 : public false_type {};
905
906template <typename _From, typename _To>
907 struct __is_narrowing_conversion<_From, _To, false, true>
908 : public negation<is_convertible<_From, _To>> {};
909
910// }}}
911// __converts_to_higher_integer_rank{{{
912template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
913 struct __converts_to_higher_integer_rank : public true_type {};
914
915// this may fail for char -> short if sizeof(char) == sizeof(short)
916template <typename _From, typename _To>
917 struct __converts_to_higher_integer_rank<_From, _To, false>
918 : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
919
920// }}}
921// __data(simd/simd_mask) {{{
922template <typename _Tp, typename _Ap>
923 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
924 __data(const simd<_Tp, _Ap>& __x);
925
926template <typename _Tp, typename _Ap>
927 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
928 __data(simd<_Tp, _Ap>& __x);
929
930template <typename _Tp, typename _Ap>
931 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
932 __data(const simd_mask<_Tp, _Ap>& __x);
933
934template <typename _Tp, typename _Ap>
935 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
936 __data(simd_mask<_Tp, _Ap>& __x);
937
938// }}}
939// _SimdConverter {{{
940template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
941 typename = void>
942 struct _SimdConverter;
943
944template <typename _Tp, typename _Ap>
945 struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
946 {
947 template <typename _Up>
948 _GLIBCXX_SIMD_INTRINSIC const _Up&
949 operator()(const _Up& __x)
950 { return __x; }
951 };
952
953// }}}
954// __to_value_type_or_member_type {{{
955template <typename _V>
956 _GLIBCXX_SIMD_INTRINSIC constexpr auto
957 __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
958 { return __data(__x); }
959
960template <typename _V>
961 _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
962 __to_value_type_or_member_type(const typename _V::value_type& __x)
963 { return __x; }
964
965// }}}
966// __bool_storage_member_type{{{
967template <size_t _Size>
968 struct __bool_storage_member_type;
969
970template <size_t _Size>
971 using __bool_storage_member_type_t =
972 typename __bool_storage_member_type<_Size>::type;
973
974// }}}
975// _SimdTuple {{{
976// why not tuple?
977// 1. tuple gives no guarantee about the storage order, but I require
978// storage
979// equivalent to array<_Tp, _Np>
980// 2. direct access to the element type (first template argument)
981// 3. enforces equal element type, only different _Abi types are allowed
982template <typename _Tp, typename... _Abis>
983 struct _SimdTuple;
984
985//}}}
986// __fixed_size_storage_t {{{
987template <typename _Tp, int _Np>
988 struct __fixed_size_storage;
989
990template <typename _Tp, int _Np>
991 using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
992
993// }}}
994// _SimdWrapper fwd decl{{{
995template <typename _Tp, size_t _Size, typename = void_t<>>
996 struct _SimdWrapper;
997
998template <typename _Tp>
999 using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
1000template <typename _Tp>
1001 using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
1002template <typename _Tp>
1003 using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
1004template <typename _Tp>
1005 using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
1006
1007// }}}
1008// __is_simd_wrapper {{{
1009template <typename _Tp>
1010 struct __is_simd_wrapper : false_type {};
1011
1012template <typename _Tp, size_t _Np>
1013 struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1014
1015template <typename _Tp>
1016 inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1017
1018// }}}
1019// _BitOps {{{
1020struct _BitOps
1021{
1022 // _S_bit_iteration {{{
1023 template <typename _Tp, typename _Fp>
1024 static void
1025 _S_bit_iteration(_Tp __mask, _Fp&& __f)
1026 {
1027 static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1028 conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1029 if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1030 __k = __mask;
1031 else
1032 __k = __mask.to_ullong();
1033 while(__k)
1034 {
1035 __f(std::__countr_zero(__k));
1036 __k &= (__k - 1);
1037 }
1038 }
1039
1040 //}}}
1041};
1042
1043//}}}
1044// __increment, __decrement {{{
1045template <typename _Tp = void>
1046 struct __increment
1047 { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1048
1049template <>
1050 struct __increment<void>
1051 {
1052 template <typename _Tp>
1053 constexpr _Tp
1054 operator()(_Tp __a) const
1055 { return ++__a; }
1056 };
1057
1058template <typename _Tp = void>
1059 struct __decrement
1060 { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1061
1062template <>
1063 struct __decrement<void>
1064 {
1065 template <typename _Tp>
1066 constexpr _Tp
1067 operator()(_Tp __a) const
1068 { return --__a; }
1069 };
1070
1071// }}}
1072// _ValuePreserving(OrInt) {{{
1073template <typename _From, typename _To,
1074 typename = enable_if_t<negation<
1075 __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1076 using _ValuePreserving = _From;
1077
1078template <typename _From, typename _To,
1079 typename _DecayedFrom = __remove_cvref_t<_From>,
1080 typename = enable_if_t<conjunction<
1081 is_convertible<_From, _To>,
1082 disjunction<
1083 is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1084 conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1085 negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1086 using _ValuePreservingOrInt = _From;
1087
1088// }}}
1089// __intrinsic_type {{{
1090template <typename _Tp, size_t _Bytes, typename = void_t<>>
1091 struct __intrinsic_type;
1092
1093template <typename _Tp, size_t _Size>
1094 using __intrinsic_type_t =
1095 typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1096
1097template <typename _Tp>
1098 using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1099template <typename _Tp>
1100 using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1101template <typename _Tp>
1102 using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1103template <typename _Tp>
1104 using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1105template <typename _Tp>
1106 using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1107template <typename _Tp>
1108 using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1109
1110// }}}
1111// _BitMask {{{
1112template <size_t _Np, bool _Sanitized = false>
1113 struct _BitMask;
1114
1115template <size_t _Np, bool _Sanitized>
1116 struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1117
1118template <size_t _Np>
1119 using _SanitizedBitMask = _BitMask<_Np, true>;
1120
1121template <size_t _Np, bool _Sanitized>
1122 struct _BitMask
1123 {
1124 static_assert(_Np > 0);
1125
1126 static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1127
1128 using _Tp = conditional_t<_Np == 1, bool,
1129 make_unsigned_t<__int_with_sizeof_t<std::min(
1130 sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1131
1132 static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1133
1134 _Tp _M_bits[_S_array_size];
1135
1136 static constexpr int _S_unused_bits
1137 = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1138
1139 static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1140
1141 constexpr _BitMask() noexcept = default;
1142
1143 constexpr _BitMask(unsigned long long __x) noexcept
1144 : _M_bits{static_cast<_Tp>(__x)} {}
1145
1146 _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1147
1148 constexpr _BitMask(const _BitMask&) noexcept = default;
1149
1150 template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1151 && _Sanitized == true>>
1152 constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1153 : _BitMask(__rhs._M_sanitized()) {}
1154
1155 constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1156 {
1157 static_assert(_S_array_size == 1);
1158 return _M_bits[0];
1159 }
1160
1161 // precondition: is sanitized
1162 constexpr _Tp
1163 _M_to_bits() const noexcept
1164 {
1165 static_assert(_S_array_size == 1);
1166 return _M_bits[0];
1167 }
1168
1169 // precondition: is sanitized
1170 constexpr unsigned long long
1171 to_ullong() const noexcept
1172 {
1173 static_assert(_S_array_size == 1);
1174 return _M_bits[0];
1175 }
1176
1177 // precondition: is sanitized
1178 constexpr unsigned long
1179 to_ulong() const noexcept
1180 {
1181 static_assert(_S_array_size == 1);
1182 return _M_bits[0];
1183 }
1184
1185 constexpr bitset<_Np>
1186 _M_to_bitset() const noexcept
1187 {
1188 static_assert(_S_array_size == 1);
1189 return _M_bits[0];
1190 }
1191
1192 constexpr decltype(auto)
1193 _M_sanitized() const noexcept
1194 {
1195 if constexpr (_Sanitized)
1196 return *this;
1197 else if constexpr (_Np == 1)
1198 return _SanitizedBitMask<_Np>(_M_bits[0]);
1199 else
1200 {
1201 _SanitizedBitMask<_Np> __r = {};
1202 for (int __i = 0; __i < _S_array_size; ++__i)
1203 __r._M_bits[__i] = _M_bits[__i];
1204 if constexpr (_S_unused_bits > 0)
1205 __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1206 return __r;
1207 }
1208 }
1209
1210 template <size_t _Mp, bool _LSanitized>
1211 constexpr _BitMask<_Np + _Mp, _Sanitized>
1212 _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1213 {
1214 constexpr size_t _RN = _Np + _Mp;
1215 using _Rp = _BitMask<_RN, _Sanitized>;
1216 if constexpr (_Rp::_S_array_size == 1)
1217 {
1218 _Rp __r{{_M_bits[0]}};
1219 __r._M_bits[0] <<= _Mp;
1220 __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1221 return __r;
1222 }
1223 else
1224 __assert_unreachable<_Rp>();
1225 }
1226
1227 // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1228 // significant bits. If the operation implicitly produces a sanitized bitmask,
1229 // the result type will have _Sanitized set.
1230 template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1231 constexpr auto
1232 _M_extract() const noexcept
1233 {
1234 static_assert(_Np > _DropLsb);
1235 static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1236 "not implemented for bitmasks larger than one ullong");
1237 if constexpr (_NewSize == 1)
1238 // must sanitize because the return _Tp is bool
1239 return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1240 else
1241 return _BitMask<_NewSize,
1242 ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1243 && _NewSize + _DropLsb <= _Np)
1244 || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1245 && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1246 >> _DropLsb);
1247 }
1248
1249 // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1250 constexpr bool
1251 all() const noexcept
1252 {
1253 if constexpr (_Np == 1)
1254 return _M_bits[0];
1255 else if constexpr (!_Sanitized)
1256 return _M_sanitized().all();
1257 else
1258 {
1259 constexpr _Tp __allbits = ~_Tp();
1260 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1261 if (_M_bits[__i] != __allbits)
1262 return false;
1263 return _M_bits[_S_array_size - 1] == _S_bitmask;
1264 }
1265 }
1266
1267 // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1268 // false.
1269 constexpr bool
1270 any() const noexcept
1271 {
1272 if constexpr (_Np == 1)
1273 return _M_bits[0];
1274 else if constexpr (!_Sanitized)
1275 return _M_sanitized().any();
1276 else
1277 {
1278 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1279 if (_M_bits[__i] != 0)
1280 return true;
1281 return _M_bits[_S_array_size - 1] != 0;
1282 }
1283 }
1284
1285 // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1286 constexpr bool
1287 none() const noexcept
1288 {
1289 if constexpr (_Np == 1)
1290 return !_M_bits[0];
1291 else if constexpr (!_Sanitized)
1292 return _M_sanitized().none();
1293 else
1294 {
1295 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1296 if (_M_bits[__i] != 0)
1297 return false;
1298 return _M_bits[_S_array_size - 1] == 0;
1299 }
1300 }
1301
1302 // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1303 // false.
1304 constexpr int
1305 count() const noexcept
1306 {
1307 if constexpr (_Np == 1)
1308 return _M_bits[0];
1309 else if constexpr (!_Sanitized)
1310 return _M_sanitized().none();
1311 else
1312 {
1313 int __result = __builtin_popcountll(_M_bits[0]);
1314 for (int __i = 1; __i < _S_array_size; ++__i)
1315 __result += __builtin_popcountll(_M_bits[__i]);
1316 return __result;
1317 }
1318 }
1319
1320 // Returns the bit at offset __i as bool.
1321 constexpr bool
1322 operator[](size_t __i) const noexcept
1323 {
1324 if constexpr (_Np == 1)
1325 return _M_bits[0];
1326 else if constexpr (_S_array_size == 1)
1327 return (_M_bits[0] >> __i) & 1;
1328 else
1329 {
1330 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1331 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1332 return (_M_bits[__j] >> __shift) & 1;
1333 }
1334 }
1335
1336 template <size_t __i>
1337 constexpr bool
1338 operator[](_SizeConstant<__i>) const noexcept
1339 {
1340 static_assert(__i < _Np);
1341 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1342 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1343 return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1344 }
1345
1346 // Set the bit at offset __i to __x.
1347 constexpr void
1348 set(size_t __i, bool __x) noexcept
1349 {
1350 if constexpr (_Np == 1)
1351 _M_bits[0] = __x;
1352 else if constexpr (_S_array_size == 1)
1353 {
1354 _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1355 _M_bits[0] |= _Tp(_Tp(__x) << __i);
1356 }
1357 else
1358 {
1359 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1360 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1361 _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1362 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1363 }
1364 }
1365
1366 template <size_t __i>
1367 constexpr void
1368 set(_SizeConstant<__i>, bool __x) noexcept
1369 {
1370 static_assert(__i < _Np);
1371 if constexpr (_Np == 1)
1372 _M_bits[0] = __x;
1373 else
1374 {
1375 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1376 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1377 constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1378 _M_bits[__j] &= __mask;
1379 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1380 }
1381 }
1382
1383 // Inverts all bits. Sanitized input leads to sanitized output.
1384 constexpr _BitMask
1385 operator~() const noexcept
1386 {
1387 if constexpr (_Np == 1)
1388 return !_M_bits[0];
1389 else
1390 {
1391 _BitMask __result{};
1392 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1393 __result._M_bits[__i] = ~_M_bits[__i];
1394 if constexpr (_Sanitized)
1395 __result._M_bits[_S_array_size - 1]
1396 = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1397 else
1398 __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1399 return __result;
1400 }
1401 }
1402
1403 constexpr _BitMask&
1404 operator^=(const _BitMask& __b) & noexcept
1405 {
1406 __execute_n_times<_S_array_size>(
1407 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1408 return *this;
1409 }
1410
1411 constexpr _BitMask&
1412 operator|=(const _BitMask& __b) & noexcept
1413 {
1414 __execute_n_times<_S_array_size>(
1415 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1416 return *this;
1417 }
1418
1419 constexpr _BitMask&
1420 operator&=(const _BitMask& __b) & noexcept
1421 {
1422 __execute_n_times<_S_array_size>(
1423 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1424 return *this;
1425 }
1426
1427 friend constexpr _BitMask
1428 operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1429 {
1430 _BitMask __r = __a;
1431 __r ^= __b;
1432 return __r;
1433 }
1434
1435 friend constexpr _BitMask
1436 operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1437 {
1438 _BitMask __r = __a;
1439 __r |= __b;
1440 return __r;
1441 }
1442
1443 friend constexpr _BitMask
1444 operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1445 {
1446 _BitMask __r = __a;
1447 __r &= __b;
1448 return __r;
1449 }
1450
1451 _GLIBCXX_SIMD_INTRINSIC
1452 constexpr bool
1453 _M_is_constprop() const
1454 {
1455 if constexpr (_S_array_size == 0)
1456 return __builtin_constant_p(_M_bits[0]);
1457 else
1458 {
1459 for (int __i = 0; __i < _S_array_size; ++__i)
1460 if (!__builtin_constant_p(_M_bits[__i]))
1461 return false;
1462 return true;
1463 }
1464 }
1465 };
1466
1467// }}}
1468
1469// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1470// __min_vector_size {{{
1471template <typename _Tp = void>
1472 static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1473
1474#if _GLIBCXX_SIMD_HAVE_NEON
1475template <>
1476 inline constexpr int __min_vector_size<void> = 8;
1477#else
1478template <>
1479 inline constexpr int __min_vector_size<void> = 16;
1480#endif
1481
1482// }}}
1483// __vector_type {{{
1484template <typename _Tp, size_t _Np, typename = void>
1485 struct __vector_type_n {};
1486
1487// substition failure for 0-element case
1488template <typename _Tp>
1489 struct __vector_type_n<_Tp, 0, void> {};
1490
1491// special case 1-element to be _Tp itself
1492template <typename _Tp>
1493 struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1494 { using type = _Tp; };
1495
1496// else, use GNU-style builtin vector types
1497template <typename _Tp, size_t _Np>
1498 struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1499 {
1500 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1501
1502 static constexpr size_t _S_Bytes =
1503#ifdef __i386__
1504 // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1505 // those objects are passed via MMX registers and nothing ever calls EMMS.
1506 _S_Np2 == 8 ? 16 :
1507#endif
1508 _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1509 : _S_Np2;
1510
1511 using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1512 };
1513
1514template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1515 struct __vector_type;
1516
1517template <typename _Tp, size_t _Bytes>
1518 struct __vector_type<_Tp, _Bytes, 0>
1519 : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1520
1521template <typename _Tp, size_t _Size>
1522 using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1523
1524template <typename _Tp>
1525 using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1526template <typename _Tp>
1527 using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1528template <typename _Tp>
1529 using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1530template <typename _Tp>
1531 using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1532template <typename _Tp>
1533 using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1534template <typename _Tp>
1535 using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1536
1537// }}}
1538// __is_vector_type {{{
1539template <typename _Tp, typename = void_t<>>
1540 struct __is_vector_type : false_type {};
1541
1542template <typename _Tp>
1543 struct __is_vector_type<
1544 _Tp, void_t<typename __vector_type<
1545 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1546 : is_same<_Tp, typename __vector_type<
1547 remove_reference_t<decltype(declval<_Tp>()[0])>,
1548 sizeof(_Tp)>::type> {};
1549
1550template <typename _Tp>
1551 inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1552
1553// }}}
1554// __is_intrinsic_type {{{
1555#if _GLIBCXX_SIMD_HAVE_SSE_ABI
1556template <typename _Tp>
1557 using __is_intrinsic_type = __is_vector_type<_Tp>;
1558#else // not SSE (x86)
1559template <typename _Tp, typename = void_t<>>
1560 struct __is_intrinsic_type : false_type {};
1561
1562template <typename _Tp>
1563 struct __is_intrinsic_type<
1564 _Tp, void_t<typename __intrinsic_type<
1565 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1566 : is_same<_Tp, typename __intrinsic_type<
1567 remove_reference_t<decltype(declval<_Tp>()[0])>,
1568 sizeof(_Tp)>::type> {};
1569#endif
1570
1571template <typename _Tp>
1572 inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1573
1574// }}}
1575// _VectorTraits{{{
1576template <typename _Tp, typename = void_t<>>
1577 struct _VectorTraitsImpl;
1578
1579template <typename _Tp>
1580 struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1581 || __is_intrinsic_type_v<_Tp>>>
1582 {
1583 using type = _Tp;
1584 using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1585 static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1586 using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1587 template <typename _Up, int _W = _S_full_size>
1588 static constexpr bool _S_is
1589 = is_same_v<value_type, _Up> && _W == _S_full_size;
1590 };
1591
1592template <typename _Tp, size_t _Np>
1593 struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1594 void_t<__vector_type_t<_Tp, _Np>>>
1595 {
1596 using type = __vector_type_t<_Tp, _Np>;
1597 using value_type = _Tp;
1598 static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1599 using _Wrapper = _SimdWrapper<_Tp, _Np>;
1600 static constexpr bool _S_is_partial = (_Np == _S_full_size);
1601 static constexpr int _S_partial_width = _Np;
1602 template <typename _Up, int _W = _S_full_size>
1603 static constexpr bool _S_is
1604 = is_same_v<value_type, _Up>&& _W == _S_full_size;
1605 };
1606
1607template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1608 using _VectorTraits = _VectorTraitsImpl<_Tp>;
1609
1610// }}}
1611// __as_vector{{{
1612template <typename _V>
1613 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1614 __as_vector(_V __x)
1615 {
1616 if constexpr (__is_vector_type_v<_V>)
1617 return __x;
1618 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1619 return __data(__x)._M_data;
1620 else if constexpr (__is_vectorizable_v<_V>)
1621 return __vector_type_t<_V, 2>{__x};
1622 else
1623 return __x._M_data;
1624 }
1625
1626// }}}
1627// __as_wrapper{{{
1628template <size_t _Np = 0, typename _V>
1629 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1630 __as_wrapper(_V __x)
1631 {
1632 if constexpr (__is_vector_type_v<_V>)
1633 return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1634 (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1635 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1636 {
1637 static_assert(_V::size() == _Np);
1638 return __data(__x);
1639 }
1640 else
1641 {
1642 static_assert(_V::_S_size == _Np);
1643 return __x;
1644 }
1645 }
1646
1647// }}}
1648// __intrin_bitcast{{{
1649template <typename _To, typename _From>
1650 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1651 __intrin_bitcast(_From __v)
1652 {
1653 static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1654 && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1655 if constexpr (sizeof(_To) == sizeof(_From))
1656 return reinterpret_cast<_To>(__v);
1657 else if constexpr (sizeof(_From) > sizeof(_To))
1658 if constexpr (sizeof(_To) >= 16)
1659 return reinterpret_cast<const __may_alias<_To>&>(__v);
1660 else
1661 {
1662 _To __r;
1663 __builtin_memcpy(&__r, &__v, sizeof(_To));
1664 return __r;
1665 }
1666#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1667 else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1668 return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1669 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1670 else if constexpr (__have_avx512f && sizeof(_From) == 16
1671 && sizeof(_To) == 64)
1672 return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1673 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1674 else if constexpr (__have_avx512f && sizeof(_From) == 32
1675 && sizeof(_To) == 64)
1676 return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1677 reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1678#endif // _GLIBCXX_SIMD_X86INTRIN
1679 else if constexpr (sizeof(__v) <= 8)
1680 return reinterpret_cast<_To>(
1681 __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1682 reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1683 else
1684 {
1685 static_assert(sizeof(_To) > sizeof(_From));
1686 _To __r = {};
1687 __builtin_memcpy(&__r, &__v, sizeof(_From));
1688 return __r;
1689 }
1690 }
1691
1692// }}}
1693// __vector_bitcast{{{
1694template <typename _To, size_t _NN = 0, typename _From,
1695 typename _FromVT = _VectorTraits<_From>,
1696 size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1697 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1698 __vector_bitcast(_From __x)
1699 {
1700 using _R = __vector_type_t<_To, _Np>;
1701 return __intrin_bitcast<_R>(__x);
1702 }
1703
1704template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1705 size_t _Np
1706 = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1707 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1708 __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1709 {
1710 static_assert(_Np > 1);
1711 return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1712 }
1713
1714// }}}
1715// __convert_x86 declarations {{{
1716#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1717template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1718 _To __convert_x86(_Tp);
1719
1720template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1721 _To __convert_x86(_Tp, _Tp);
1722
1723template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1724 _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1725
1726template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1727 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1728
1729template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1730 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1731 _Tp, _Tp, _Tp, _Tp);
1732#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1733
1734//}}}
1735// __bit_cast {{{
1736template <typename _To, typename _From>
1737 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1738 __bit_cast(const _From __x)
1739 {
1740#if __has_builtin(__builtin_bit_cast)
1741 return __builtin_bit_cast(_To, __x);
1742#else
1743 static_assert(sizeof(_To) == sizeof(_From));
1744 constexpr bool __to_is_vectorizable
1745 = is_arithmetic_v<_To> || is_enum_v<_To>;
1746 constexpr bool __from_is_vectorizable
1747 = is_arithmetic_v<_From> || is_enum_v<_From>;
1748 if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1749 return reinterpret_cast<_To>(__x);
1750 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1751 {
1752 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1753 return reinterpret_cast<_To>(_FV{__x});
1754 }
1755 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1756 {
1757 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1758 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1759 return reinterpret_cast<_TV>(_FV{__x})[0];
1760 }
1761 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1762 {
1763 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1764 return reinterpret_cast<_TV>(__x)[0];
1765 }
1766 else
1767 {
1768 _To __r;
1769 __builtin_memcpy(reinterpret_cast<char*>(&__r),
1770 reinterpret_cast<const char*>(&__x), sizeof(_To));
1771 return __r;
1772 }
1773#endif
1774 }
1775
1776// }}}
1777// __to_intrin {{{
1778template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1779 typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1780 _GLIBCXX_SIMD_INTRINSIC constexpr _R
1781 __to_intrin(_Tp __x)
1782 {
1783 static_assert(sizeof(__x) <= sizeof(_R),
1784 "__to_intrin may never drop values off the end");
1785 if constexpr (sizeof(__x) == sizeof(_R))
1786 return reinterpret_cast<_R>(__as_vector(__x));
1787 else
1788 {
1789 using _Up = __int_for_sizeof_t<_Tp>;
1790 return reinterpret_cast<_R>(
1791 __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1792 }
1793 }
1794
1795// }}}
1796// __make_vector{{{
1797template <typename _Tp, typename... _Args>
1798 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1799 __make_vector(const _Args&... __args)
1800 { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1801
1802// }}}
1803// __vector_broadcast{{{
1804template <size_t _Np, typename _Tp, size_t... _I>
1805 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1806 __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1807 { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1808
1809template <size_t _Np, typename _Tp>
1810 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1811 __vector_broadcast(_Tp __x)
1812 { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1813
1814// }}}
1815// __generate_vector{{{
1816 template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1817 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1818 __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1819 { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1820
1821template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1822 _GLIBCXX_SIMD_INTRINSIC constexpr _V
1823 __generate_vector(_Gp&& __gen)
1824 {
1825 if constexpr (__is_vector_type_v<_V>)
1826 return __generate_vector_impl<typename _VVT::value_type,
1827 _VVT::_S_full_size>(
1828 static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1829 else
1830 return __generate_vector_impl<typename _VVT::value_type,
1831 _VVT::_S_partial_width>(
1832 static_cast<_Gp&&>(__gen),
1833 make_index_sequence<_VVT::_S_partial_width>());
1834 }
1835
1836template <typename _Tp, size_t _Np, typename _Gp>
1837 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1838 __generate_vector(_Gp&& __gen)
1839 {
1840 return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1841 make_index_sequence<_Np>());
1842 }
1843
1844// }}}
1845// __xor{{{
1846template <typename _TW>
1847 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1848 __xor(_TW __a, _TW __b) noexcept
1849 {
1850 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1851 {
1852 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1853 _VectorTraitsImpl<_TW>>::value_type;
1854 if constexpr (is_floating_point_v<_Tp>)
1855 {
1856 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1857 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1858 ^ __vector_bitcast<_Ip>(__b));
1859 }
1860 else if constexpr (__is_vector_type_v<_TW>)
1861 return __a ^ __b;
1862 else
1863 return __a._M_data ^ __b._M_data;
1864 }
1865 else
1866 return __a ^ __b;
1867 }
1868
1869// }}}
1870// __or{{{
1871template <typename _TW>
1872 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1873 __or(_TW __a, _TW __b) noexcept
1874 {
1875 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1876 {
1877 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1878 _VectorTraitsImpl<_TW>>::value_type;
1879 if constexpr (is_floating_point_v<_Tp>)
1880 {
1881 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1882 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1883 | __vector_bitcast<_Ip>(__b));
1884 }
1885 else if constexpr (__is_vector_type_v<_TW>)
1886 return __a | __b;
1887 else
1888 return __a._M_data | __b._M_data;
1889 }
1890 else
1891 return __a | __b;
1892 }
1893
1894// }}}
1895// __and{{{
1896template <typename _TW>
1897 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1898 __and(_TW __a, _TW __b) noexcept
1899 {
1900 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1901 {
1902 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1903 _VectorTraitsImpl<_TW>>::value_type;
1904 if constexpr (is_floating_point_v<_Tp>)
1905 {
1906 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1907 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1908 & __vector_bitcast<_Ip>(__b));
1909 }
1910 else if constexpr (__is_vector_type_v<_TW>)
1911 return __a & __b;
1912 else
1913 return __a._M_data & __b._M_data;
1914 }
1915 else
1916 return __a & __b;
1917 }
1918
1919// }}}
1920// __andnot{{{
1921#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1922static constexpr struct
1923{
1924 _GLIBCXX_SIMD_INTRINSIC __v4sf
1925 operator()(__v4sf __a, __v4sf __b) const noexcept
1926 { return __builtin_ia32_andnps(__a, __b); }
1927
1928 _GLIBCXX_SIMD_INTRINSIC __v2df
1929 operator()(__v2df __a, __v2df __b) const noexcept
1930 { return __builtin_ia32_andnpd(__a, __b); }
1931
1932 _GLIBCXX_SIMD_INTRINSIC __v2di
1933 operator()(__v2di __a, __v2di __b) const noexcept
1934 { return __builtin_ia32_pandn128(__a, __b); }
1935
1936 _GLIBCXX_SIMD_INTRINSIC __v8sf
1937 operator()(__v8sf __a, __v8sf __b) const noexcept
1938 { return __builtin_ia32_andnps256(__a, __b); }
1939
1940 _GLIBCXX_SIMD_INTRINSIC __v4df
1941 operator()(__v4df __a, __v4df __b) const noexcept
1942 { return __builtin_ia32_andnpd256(__a, __b); }
1943
1944 _GLIBCXX_SIMD_INTRINSIC __v4di
1945 operator()(__v4di __a, __v4di __b) const noexcept
1946 {
1947 if constexpr (__have_avx2)
1948 return __builtin_ia32_andnotsi256(__a, __b);
1949 else
1950 return reinterpret_cast<__v4di>(
1951 __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1952 reinterpret_cast<__v4df>(__b)));
1953 }
1954
1955 _GLIBCXX_SIMD_INTRINSIC __v16sf
1956 operator()(__v16sf __a, __v16sf __b) const noexcept
1957 {
1958 if constexpr (__have_avx512dq)
1959 return _mm512_andnot_ps(__a, __b);
1960 else
1961 return reinterpret_cast<__v16sf>(
1962 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1963 reinterpret_cast<__v8di>(__b)));
1964 }
1965
1966 _GLIBCXX_SIMD_INTRINSIC __v8df
1967 operator()(__v8df __a, __v8df __b) const noexcept
1968 {
1969 if constexpr (__have_avx512dq)
1970 return _mm512_andnot_pd(__a, __b);
1971 else
1972 return reinterpret_cast<__v8df>(
1973 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1974 reinterpret_cast<__v8di>(__b)));
1975 }
1976
1977 _GLIBCXX_SIMD_INTRINSIC __v8di
1978 operator()(__v8di __a, __v8di __b) const noexcept
1979 { return _mm512_andnot_si512(__a, __b); }
1980} _S_x86_andnot;
1981#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1982
1983template <typename _TW>
1984 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1985 __andnot(_TW __a, _TW __b) noexcept
1986 {
1987 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1988 {
1989 using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1990 _VectorTraitsImpl<_TW>>;
1991 using _Tp = typename _TVT::value_type;
1992#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1993 if constexpr (sizeof(_TW) >= 16)
1994 {
1995 const auto __ai = __to_intrin(__a);
1996 const auto __bi = __to_intrin(__b);
1997 if (!__builtin_is_constant_evaluated()
1998 && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1999 {
2000 const auto __r = _S_x86_andnot(__ai, __bi);
2001 if constexpr (is_convertible_v<decltype(__r), _TW>)
2002 return __r;
2003 else
2004 return reinterpret_cast<typename _TVT::type>(__r);
2005 }
2006 }
2007#endif // _GLIBCXX_SIMD_X86INTRIN
2008 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2009 return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2010 & __vector_bitcast<_Ip>(__b));
2011 }
2012 else
2013 return ~__a & __b;
2014 }
2015
2016// }}}
2017// __not{{{
2018template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2019 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2020 __not(_Tp __a) noexcept
2021 {
2022 if constexpr (is_floating_point_v<typename _TVT::value_type>)
2023 return reinterpret_cast<typename _TVT::type>(
2024 ~__vector_bitcast<unsigned>(__a));
2025 else
2026 return ~__a;
2027 }
2028
2029// }}}
2030// __concat{{{
2031template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2032 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2033 constexpr _R
2034 __concat(_Tp a_, _Tp b_)
2035 {
2036#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2037 using _W
2038 = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2039 conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2040 long long, typename _TVT::value_type>>;
2041 constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2042 const auto __a = __vector_bitcast<_W>(a_);
2043 const auto __b = __vector_bitcast<_W>(b_);
2044 using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2045#else
2046 constexpr int input_width = _TVT::_S_full_size;
2047 const _Tp& __a = a_;
2048 const _Tp& __b = b_;
2049 using _Up = _R;
2050#endif
2051 if constexpr (input_width == 2)
2052 return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2053 else if constexpr (input_width == 4)
2054 return reinterpret_cast<_R>(
2055 _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2056 else if constexpr (input_width == 8)
2057 return reinterpret_cast<_R>(
2058 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2059 __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2060 else if constexpr (input_width == 16)
2061 return reinterpret_cast<_R>(
2062 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2063 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2064 __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2065 __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2066 __b[12], __b[13], __b[14], __b[15]});
2067 else if constexpr (input_width == 32)
2068 return reinterpret_cast<_R>(
2069 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2070 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2071 __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2072 __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2073 __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2074 __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2075 __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2076 __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2077 __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2078 __b[31]});
2079 }
2080
2081// }}}
2082// __zero_extend {{{
2083template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2084 struct _ZeroExtendProxy
2085 {
2086 using value_type = typename _TVT::value_type;
2087 static constexpr size_t _Np = _TVT::_S_full_size;
2088 const _Tp __x;
2089
2090 template <typename _To, typename _ToVT = _VectorTraits<_To>,
2091 typename
2092 = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2093 _GLIBCXX_SIMD_INTRINSIC operator _To() const
2094 {
2095 constexpr size_t _ToN = _ToVT::_S_full_size;
2096 if constexpr (_ToN == _Np)
2097 return __x;
2098 else if constexpr (_ToN == 2 * _Np)
2099 {
2100#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2101 if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2102 return __vector_bitcast<value_type>(
2103 _mm256_insertf128_ps(__m256(), __x, 0));
2104 else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2105 return __vector_bitcast<value_type>(
2106 _mm256_insertf128_pd(__m256d(), __x, 0));
2107 else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2108 return __vector_bitcast<value_type>(
2109 _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2110 else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2111 {
2112 if constexpr (__have_avx512dq)
2113 return __vector_bitcast<value_type>(
2114 _mm512_insertf32x8(__m512(), __x, 0));
2115 else
2116 return reinterpret_cast<__m512>(
2117 _mm512_insertf64x4(__m512d(),
2118 reinterpret_cast<__m256d>(__x), 0));
2119 }
2120 else if constexpr (__have_avx512f
2121 && _TVT::template _S_is<double, 4>)
2122 return __vector_bitcast<value_type>(
2123 _mm512_insertf64x4(__m512d(), __x, 0));
2124 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2125 return __vector_bitcast<value_type>(
2126 _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2127#endif
2128 return __concat(__x, _Tp());
2129 }
2130 else if constexpr (_ToN == 4 * _Np)
2131 {
2132#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2133 if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2134 {
2135 return __vector_bitcast<value_type>(
2136 _mm512_insertf64x2(__m512d(), __x, 0));
2137 }
2138 else if constexpr (__have_avx512f
2139 && is_floating_point_v<value_type>)
2140 {
2141 return __vector_bitcast<value_type>(
2142 _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2143 0));
2144 }
2145 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2146 {
2147 return __vector_bitcast<value_type>(
2148 _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2149 }
2150#endif
2151 return __concat(__concat(__x, _Tp()),
2152 __vector_type_t<value_type, _Np * 2>());
2153 }
2154 else if constexpr (_ToN == 8 * _Np)
2155 return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2156 __vector_type_t<value_type, _Np * 4>());
2157 else if constexpr (_ToN == 16 * _Np)
2158 return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2159 __vector_type_t<value_type, _Np * 8>());
2160 else
2161 __assert_unreachable<_Tp>();
2162 }
2163 };
2164
2165template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2166 _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2167 __zero_extend(_Tp __x)
2168 { return {__x}; }
2169
2170// }}}
2171// __extract<_Np, By>{{{
2172template <int _Offset,
2173 int _SplitBy,
2174 typename _Tp,
2175 typename _TVT = _VectorTraits<_Tp>,
2176 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2177 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2178 __extract(_Tp __in)
2179 {
2180 using value_type = typename _TVT::value_type;
2181#if _GLIBCXX_SIMD_X86INTRIN // {{{
2182 if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2183 {
2184 if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2185 return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2186 else if constexpr (is_floating_point_v<value_type>)
2187 return __vector_bitcast<value_type>(
2188 _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2189 else
2190 return reinterpret_cast<_R>(
2191 _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2192 _Offset));
2193 }
2194 else
2195#endif // _GLIBCXX_SIMD_X86INTRIN }}}
2196 {
2197#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2198 using _W = conditional_t<
2199 is_floating_point_v<value_type>, double,
2200 conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2201 static_assert(sizeof(_R) % sizeof(_W) == 0);
2202 constexpr int __return_width = sizeof(_R) / sizeof(_W);
2203 using _Up = __vector_type_t<_W, __return_width>;
2204 const auto __x = __vector_bitcast<_W>(__in);
2205#else
2206 constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2207 using _Up = _R;
2208 const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2209 = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2210#endif
2211 constexpr int _O = _Offset * __return_width;
2212 return __call_with_subscripts<__return_width, _O>(
2213 __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2214 return reinterpret_cast<_R>(_Up{__entries...});
2215 });
2216 }
2217 }
2218
2219// }}}
2220// __lo/__hi64[z]{{{
2221template <typename _Tp,
2222 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2223 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2224 __lo64(_Tp __x)
2225 {
2226 _R __r{};
2227 __builtin_memcpy(&__r, &__x, 8);
2228 return __r;
2229 }
2230
2231template <typename _Tp,
2232 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2233 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2234 __hi64(_Tp __x)
2235 {
2236 static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2237 _R __r{};
2238 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2239 return __r;
2240 }
2241
2242template <typename _Tp,
2243 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2244 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2245 __hi64z([[maybe_unused]] _Tp __x)
2246 {
2247 _R __r{};
2248 if constexpr (sizeof(_Tp) == 16)
2249 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2250 return __r;
2251 }
2252
2253// }}}
2254// __lo/__hi128{{{
2255template <typename _Tp>
2256 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2257 __lo128(_Tp __x)
2258 { return __extract<0, sizeof(_Tp) / 16>(__x); }
2259
2260template <typename _Tp>
2261 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2262 __hi128(_Tp __x)
2263 {
2264 static_assert(sizeof(__x) == 32);
2265 return __extract<1, 2>(__x);
2266 }
2267
2268// }}}
2269// __lo/__hi256{{{
2270template <typename _Tp>
2271 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2272 __lo256(_Tp __x)
2273 {
2274 static_assert(sizeof(__x) == 64);
2275 return __extract<0, 2>(__x);
2276 }
2277
2278template <typename _Tp>
2279 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2280 __hi256(_Tp __x)
2281 {
2282 static_assert(sizeof(__x) == 64);
2283 return __extract<1, 2>(__x);
2284 }
2285
2286// }}}
2287// __auto_bitcast{{{
2288template <typename _Tp>
2289 struct _AutoCast
2290 {
2291 static_assert(__is_vector_type_v<_Tp>);
2292
2293 const _Tp __x;
2294
2295 template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2296 _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2297 { return __intrin_bitcast<typename _UVT::type>(__x); }
2298 };
2299
2300template <typename _Tp>
2301 _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2302 __auto_bitcast(const _Tp& __x)
2303 { return {__x}; }
2304
2305template <typename _Tp, size_t _Np>
2306 _GLIBCXX_SIMD_INTRINSIC constexpr
2307 _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2308 __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2309 { return {__x._M_data}; }
2310
2311// }}}
2312// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2313
2314#if _GLIBCXX_SIMD_HAVE_SSE_ABI
2315// __bool_storage_member_type{{{
2316#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2317template <size_t _Size>
2318 struct __bool_storage_member_type
2319 {
2320 static_assert((_Size & (_Size - 1)) != 0,
2321 "This trait may only be used for non-power-of-2 sizes. "
2322 "Power-of-2 sizes must be specialized.");
2323 using type =
2324 typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2325 };
2326
2327template <>
2328 struct __bool_storage_member_type<1> { using type = bool; };
2329
2330template <>
2331 struct __bool_storage_member_type<2> { using type = __mmask8; };
2332
2333template <>
2334 struct __bool_storage_member_type<4> { using type = __mmask8; };
2335
2336template <>
2337 struct __bool_storage_member_type<8> { using type = __mmask8; };
2338
2339template <>
2340 struct __bool_storage_member_type<16> { using type = __mmask16; };
2341
2342template <>
2343 struct __bool_storage_member_type<32> { using type = __mmask32; };
2344
2345template <>
2346 struct __bool_storage_member_type<64> { using type = __mmask64; };
2347#endif // _GLIBCXX_SIMD_HAVE_AVX512F
2348
2349// }}}
2350// __intrinsic_type (x86){{{
2351// the following excludes bool via __is_vectorizable
2352#if _GLIBCXX_SIMD_HAVE_SSE
2353template <typename _Tp, size_t _Bytes>
2354 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2355 {
2356 static_assert(!is_same_v<_Tp, long double>,
2357 "no __intrinsic_type support for long double on x86");
2358
2359 static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2360
2361 using type [[__gnu__::__vector_size__(_S_VBytes)]]
2362 = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2363 };
2364#endif // _GLIBCXX_SIMD_HAVE_SSE
2365
2366// }}}
2367#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2368// __intrinsic_type (ARM){{{
2369#if _GLIBCXX_SIMD_HAVE_NEON
2370template <>
2371 struct __intrinsic_type<float, 8, void>
2372 { using type = float32x2_t; };
2373
2374template <>
2375 struct __intrinsic_type<float, 16, void>
2376 { using type = float32x4_t; };
2377
2378template <>
2379 struct __intrinsic_type<double, 8, void>
2380 {
2381#if _GLIBCXX_SIMD_HAVE_NEON_A64
2382 using type = float64x1_t;
2383#endif
2384 };
2385
2386template <>
2387 struct __intrinsic_type<double, 16, void>
2388 {
2389#if _GLIBCXX_SIMD_HAVE_NEON_A64
2390 using type = float64x2_t;
2391#endif
2392 };
2393
2394#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2395template <> \
2396 struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2397 _Np * _Bits / 8, void> \
2398 { using type = int##_Bits##x##_Np##_t; }; \
2399template <> \
2400 struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2401 _Np * _Bits / 8, void> \
2402 { using type = uint##_Bits##x##_Np##_t; }
2403_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2404_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2405_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2406_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2407_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2408_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2409_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2410_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2411#undef _GLIBCXX_SIMD_ARM_INTRIN
2412
2413template <typename _Tp, size_t _Bytes>
2414 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2415 {
2416 static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2417
2418 using _Ip = __int_for_sizeof_t<_Tp>;
2419
2420 using _Up = conditional_t<
2421 is_floating_point_v<_Tp>, _Tp,
2422 conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2423
2424 static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2425 "should use explicit specialization above");
2426
2427 using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2428 };
2429#endif // _GLIBCXX_SIMD_HAVE_NEON
2430
2431// }}}
2432// __intrinsic_type (PPC){{{
2433#ifdef __ALTIVEC__
2434template <typename _Tp>
2435 struct __intrinsic_type_impl;
2436
2437#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2438 template <> \
2439 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2440_GLIBCXX_SIMD_PPC_INTRIN(float);
2441#ifdef __VSX__
2442_GLIBCXX_SIMD_PPC_INTRIN(double);
2443#endif
2444_GLIBCXX_SIMD_PPC_INTRIN(signed char);
2445_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2446_GLIBCXX_SIMD_PPC_INTRIN(signed short);
2447_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2448_GLIBCXX_SIMD_PPC_INTRIN(signed int);
2449_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2450#if defined __VSX__ || __SIZEOF_LONG__ == 4
2451_GLIBCXX_SIMD_PPC_INTRIN(signed long);
2452_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2453#endif
2454#ifdef __VSX__
2455_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2456_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2457#endif
2458#undef _GLIBCXX_SIMD_PPC_INTRIN
2459
2460template <typename _Tp, size_t _Bytes>
2461 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2462 {
2463 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2464
2465 // allow _Tp == long double with -mlong-double-64
2466 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2467 "no __intrinsic_type support for 128-bit floating point on PowerPC");
2468
2469#ifndef __VSX__
2470 static_assert(!(is_same_v<_Tp, double>
2471 || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2472 "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2473#endif
2474
2475 static constexpr auto __element_type()
2476 {
2477 if constexpr (is_floating_point_v<_Tp>)
2478 {
2479 if constexpr (_S_is_ldouble)
2480 return double {};
2481 else
2482 return _Tp {};
2483 }
2484 else if constexpr (is_signed_v<_Tp>)
2485 {
2486 if constexpr (sizeof(_Tp) == sizeof(_SChar))
2487 return _SChar {};
2488 else if constexpr (sizeof(_Tp) == sizeof(short))
2489 return short {};
2490 else if constexpr (sizeof(_Tp) == sizeof(int))
2491 return int {};
2492 else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2493 return _LLong {};
2494 }
2495 else
2496 {
2497 if constexpr (sizeof(_Tp) == sizeof(_UChar))
2498 return _UChar {};
2499 else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2500 return _UShort {};
2501 else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2502 return _UInt {};
2503 else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2504 return _ULLong {};
2505 }
2506 }
2507
2508 using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2509 };
2510#endif // __ALTIVEC__
2511
2512// }}}
2513// _SimdWrapper<bool>{{{1
2514template <size_t _Width>
2515 struct _SimdWrapper<bool, _Width,
2516 void_t<typename __bool_storage_member_type<_Width>::type>>
2517 {
2518 using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2519 using value_type = bool;
2520
2521 static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2522
2523 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2524 __as_full_vector() const
2525 { return _M_data; }
2526
2527 _GLIBCXX_SIMD_INTRINSIC constexpr
2528 _SimdWrapper() = default;
2529
2530 _GLIBCXX_SIMD_INTRINSIC constexpr
2531 _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2532
2533 _GLIBCXX_SIMD_INTRINSIC
2534 operator const _BuiltinType&() const
2535 { return _M_data; }
2536
2537 _GLIBCXX_SIMD_INTRINSIC
2538 operator _BuiltinType&()
2539 { return _M_data; }
2540
2541 _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2542 __intrin() const
2543 { return _M_data; }
2544
2545 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2546 operator[](size_t __i) const
2547 { return _M_data & (_BuiltinType(1) << __i); }
2548
2549 template <size_t __i>
2550 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2551 operator[](_SizeConstant<__i>) const
2552 { return _M_data & (_BuiltinType(1) << __i); }
2553
2554 _GLIBCXX_SIMD_INTRINSIC constexpr void
2555 _M_set(size_t __i, value_type __x)
2556 {
2557 if (__x)
2558 _M_data |= (_BuiltinType(1) << __i);
2559 else
2560 _M_data &= ~(_BuiltinType(1) << __i);
2561 }
2562
2563 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2564 _M_is_constprop() const
2565 { return __builtin_constant_p(_M_data); }
2566
2567 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2568 _M_is_constprop_none_of() const
2569 {
2570 if (__builtin_constant_p(_M_data))
2571 {
2572 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2573 constexpr _BuiltinType __active_mask
2574 = ~_BuiltinType() >> (__nbits - _Width);
2575 return (_M_data & __active_mask) == 0;
2576 }
2577 return false;
2578 }
2579
2580 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2581 _M_is_constprop_all_of() const
2582 {
2583 if (__builtin_constant_p(_M_data))
2584 {
2585 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2586 constexpr _BuiltinType __active_mask
2587 = ~_BuiltinType() >> (__nbits - _Width);
2588 return (_M_data & __active_mask) == __active_mask;
2589 }
2590 return false;
2591 }
2592
2593 _BuiltinType _M_data;
2594 };
2595
2596// _SimdWrapperBase{{{1
2597template <bool _MustZeroInitPadding, typename _BuiltinType>
2598 struct _SimdWrapperBase;
2599
2600template <typename _BuiltinType>
2601 struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2602 {
2603 _GLIBCXX_SIMD_INTRINSIC constexpr
2604 _SimdWrapperBase() = default;
2605
2606 _GLIBCXX_SIMD_INTRINSIC constexpr
2607 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2608
2609 _BuiltinType _M_data;
2610 };
2611
2612template <typename _BuiltinType>
2613 struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2614 // never become SNaN
2615 {
2616 _GLIBCXX_SIMD_INTRINSIC constexpr
2617 _SimdWrapperBase() : _M_data() {}
2618
2619 _GLIBCXX_SIMD_INTRINSIC constexpr
2620 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2621
2622 _BuiltinType _M_data;
2623 };
2624
2625// }}}
2626// _SimdWrapper{{{
2627template <typename _Tp, size_t _Width>
2628 struct _SimdWrapper<
2629 _Tp, _Width,
2630 void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2631 : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2632 && sizeof(_Tp) * _Width
2633 == sizeof(__vector_type_t<_Tp, _Width>),
2634 __vector_type_t<_Tp, _Width>>
2635 {
2636 using _Base
2637 = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2638 && sizeof(_Tp) * _Width
2639 == sizeof(__vector_type_t<_Tp, _Width>),
2640 __vector_type_t<_Tp, _Width>>;
2641
2642 static_assert(__is_vectorizable_v<_Tp>);
2643 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2644
2645 using _BuiltinType = __vector_type_t<_Tp, _Width>;
2646 using value_type = _Tp;
2647
2648 static inline constexpr size_t _S_full_size
2649 = sizeof(_BuiltinType) / sizeof(value_type);
2650 static inline constexpr int _S_size = _Width;
2651 static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2652
2653 using _Base::_M_data;
2654
2655 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2656 __as_full_vector() const
2657 { return _M_data; }
2658
2659 _GLIBCXX_SIMD_INTRINSIC constexpr
2660 _SimdWrapper(initializer_list<_Tp> __init)
2661 : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2662 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2663 return __init.begin()[__i.value];
2664 })) {}
2665
2666 _GLIBCXX_SIMD_INTRINSIC constexpr
2667 _SimdWrapper() = default;
2668
2669 _GLIBCXX_SIMD_INTRINSIC constexpr
2670 _SimdWrapper(const _SimdWrapper&) = default;
2671
2672 _GLIBCXX_SIMD_INTRINSIC constexpr
2673 _SimdWrapper(_SimdWrapper&&) = default;
2674
2675 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2676 operator=(const _SimdWrapper&) = default;
2677
2678 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2679 operator=(_SimdWrapper&&) = default;
2680
2681 template <typename _V, typename = enable_if_t<disjunction_v<
2682 is_same<_V, __vector_type_t<_Tp, _Width>>,
2683 is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2684 _GLIBCXX_SIMD_INTRINSIC constexpr
2685 _SimdWrapper(_V __x)
2686 // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2687 : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2688
2689 template <typename... _As,
2690 typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2691 && sizeof...(_As) <= _Width)>>
2692 _GLIBCXX_SIMD_INTRINSIC constexpr
2693 operator _SimdTuple<_Tp, _As...>() const
2694 {
2695 return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2696 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2697 { return _M_data[int(__i)]; });
2698 }
2699
2700 _GLIBCXX_SIMD_INTRINSIC constexpr
2701 operator const _BuiltinType&() const
2702 { return _M_data; }
2703
2704 _GLIBCXX_SIMD_INTRINSIC constexpr
2705 operator _BuiltinType&()
2706 { return _M_data; }
2707
2708 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2709 operator[](size_t __i) const
2710 { return _M_data[__i]; }
2711
2712 template <size_t __i>
2713 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2714 operator[](_SizeConstant<__i>) const
2715 { return _M_data[__i]; }
2716
2717 _GLIBCXX_SIMD_INTRINSIC constexpr void
2718 _M_set(size_t __i, _Tp __x)
2719 {
2720 if (__builtin_is_constant_evaluated())
2721 _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2722 return __j == __i ? __x : _M_data[__j()];
2723 });
2724 else
2725 _M_data[__i] = __x;
2726 }
2727
2728 _GLIBCXX_SIMD_INTRINSIC
2729 constexpr bool
2730 _M_is_constprop() const
2731 { return __builtin_constant_p(_M_data); }
2732
2733 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2734 _M_is_constprop_none_of() const
2735 {
2736 if (__builtin_constant_p(_M_data))
2737 {
2738 bool __r = true;
2739 if constexpr (is_floating_point_v<_Tp>)
2740 {
2741 using _Ip = __int_for_sizeof_t<_Tp>;
2742 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2743 __execute_n_times<_Width>(
2744 [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2745 }
2746 else
2747 __execute_n_times<_Width>(
2748 [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2749 if (__builtin_constant_p(__r))
2750 return __r;
2751 }
2752 return false;
2753 }
2754
2755 _GLIBCXX_SIMD_INTRINSIC constexpr bool
2756 _M_is_constprop_all_of() const
2757 {
2758 if (__builtin_constant_p(_M_data))
2759 {
2760 bool __r = true;
2761 if constexpr (is_floating_point_v<_Tp>)
2762 {
2763 using _Ip = __int_for_sizeof_t<_Tp>;
2764 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2765 __execute_n_times<_Width>(
2766 [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2767 }
2768 else
2769 __execute_n_times<_Width>(
2770 [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2771 if (__builtin_constant_p(__r))
2772 return __r;
2773 }
2774 return false;
2775 }
2776 };
2777
2778// }}}
2779
2780// __vectorized_sizeof {{{
2781template <typename _Tp>
2782 constexpr size_t
2783 __vectorized_sizeof()
2784 {
2785 if constexpr (!__is_vectorizable_v<_Tp>)
2786 return 0;
2787
2788 if constexpr (sizeof(_Tp) <= 8)
2789 {
2790 // X86:
2791 if constexpr (__have_avx512bw)
2792 return 64;
2793 if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2794 return 64;
2795 if constexpr (__have_avx2)
2796 return 32;
2797 if constexpr (__have_avx && is_floating_point_v<_Tp>)
2798 return 32;
2799 if constexpr (__have_sse2)
2800 return 16;
2801 if constexpr (__have_sse && is_same_v<_Tp, float>)
2802 return 16;
2803 /* The following is too much trouble because of mixed MMX and x87 code.
2804 * While nothing here explicitly calls MMX instructions of registers,
2805 * they are still emitted but no EMMS cleanup is done.
2806 if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2807 return 8;
2808 */
2809
2810 // PowerPC:
2811 if constexpr (__have_power8vec
2812 || (__have_power_vmx && (sizeof(_Tp) < 8))
2813 || (__have_power_vsx && is_floating_point_v<_Tp>) )
2814 return 16;
2815
2816 // ARM:
2817 if constexpr (__have_neon_a64)
2818 return 16;
2819 if constexpr (__have_neon_a32 and (not is_floating_point_v<_Tp>
2820 or is_same_v<_Tp, float>))
2821 return 16;
2822 if constexpr (__have_neon
2823 && sizeof(_Tp) < 8
2824 // Only allow fp if the user allows non-ICE559 fp (e.g.
2825 // via -ffast-math). ARMv7 NEON fp is not conforming to
2826 // IEC559.
2827 && (__support_neon_float || !is_floating_point_v<_Tp>))
2828 return 16;
2829 }
2830
2831 return sizeof(_Tp);
2832 }
2833
2834// }}}
2835namespace simd_abi {
2836// most of simd_abi is defined in simd_detail.h
2837template <typename _Tp>
2838 inline constexpr int max_fixed_size
2839 = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2840
2841// compatible {{{
2842#if defined __x86_64__ || defined __aarch64__
2843template <typename _Tp>
2844 using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2845#elif defined __ARM_NEON
2846// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2847// ABI?)
2848template <typename _Tp>
2849 using compatible
2850 = conditional_t<(sizeof(_Tp) < 8
2851 && (__support_neon_float || !is_floating_point_v<_Tp>)),
2852 _VecBuiltin<16>, scalar>;
2853#else
2854template <typename>
2855 using compatible = scalar;
2856#endif
2857
2858// }}}
2859// native {{{
2860template <typename _Tp>
2861 constexpr auto
2862 __determine_native_abi()
2863 {
2864 constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2865 if constexpr (__bytes == sizeof(_Tp))
2866 return static_cast<scalar*>(nullptr);
2867 else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2868 return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2869 else
2870 return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2871 }
2872
2873template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2874 using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2875
2876// }}}
2877// __default_abi {{{
2878#if defined _GLIBCXX_SIMD_DEFAULT_ABI
2879template <typename _Tp>
2880 using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2881#else
2882template <typename _Tp>
2883 using __default_abi = compatible<_Tp>;
2884#endif
2885
2886// }}}
2887} // namespace simd_abi
2888
2889// traits {{{1
2890template <typename _Tp>
2891 struct is_simd_flag_type
2892 : false_type
2893 {};
2894
2895template <>
2896 struct is_simd_flag_type<element_aligned_tag>
2897 : true_type
2898 {};
2899
2900template <>
2901 struct is_simd_flag_type<vector_aligned_tag>
2902 : true_type
2903 {};
2904
2905template <size_t _Np>
2906 struct is_simd_flag_type<overaligned_tag<_Np>>
2907 : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
2908 {};
2909
2910template <typename _Tp>
2911 inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
2912
2913template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
2914 using _IsSimdFlagType = _Tp;
2915
2916// is_abi_tag {{{2
2917template <typename _Tp, typename = void_t<>>
2918 struct is_abi_tag : false_type {};
2919
2920template <typename _Tp>
2921 struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2922 : public _Tp::_IsValidAbiTag {};
2923
2924template <typename _Tp>
2925 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2926
2927// is_simd(_mask) {{{2
2928template <typename _Tp>
2929 struct is_simd : public false_type {};
2930
2931template <typename _Tp>
2932 inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2933
2934template <typename _Tp>
2935 struct is_simd_mask : public false_type {};
2936
2937template <typename _Tp>
2938inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2939
2940// simd_size {{{2
2941template <typename _Tp, typename _Abi, typename = void>
2942 struct __simd_size_impl {};
2943
2944template <typename _Tp, typename _Abi>
2945 struct __simd_size_impl<
2946 _Tp, _Abi,
2947 enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2948 : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2949
2950template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2951 struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2952
2953template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2954 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2955
2956// simd_abi::deduce {{{2
2957template <typename _Tp, size_t _Np, typename = void>
2958 struct __deduce_impl;
2959
2960namespace simd_abi {
2961/**
2962 * @tparam _Tp The requested `value_type` for the elements.
2963 * @tparam _Np The requested number of elements.
2964 * @tparam _Abis This parameter is ignored, since this implementation cannot
2965 * make any use of it. Either __a good native ABI is matched and used as `type`
2966 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2967 * the best matching native ABIs.
2968 */
2969template <typename _Tp, size_t _Np, typename...>
2970 struct deduce : __deduce_impl<_Tp, _Np> {};
2971
2972template <typename _Tp, size_t _Np, typename... _Abis>
2973 using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2974} // namespace simd_abi
2975
2976// }}}2
2977// rebind_simd {{{2
2978template <typename _Tp, typename _V, typename = void>
2979 struct rebind_simd;
2980
2981template <typename _Tp, typename _Up, typename _Abi>
2982 struct rebind_simd<_Tp, simd<_Up, _Abi>,
2983 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2984 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2985
2986template <typename _Tp, typename _Up, typename _Abi>
2987 struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
2988 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2989 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2990
2991template <typename _Tp, typename _V>
2992 using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2993
2994// resize_simd {{{2
2995template <int _Np, typename _V, typename = void>
2996 struct resize_simd;
2997
2998template <int _Np, typename _Tp, typename _Abi>
2999 struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3000 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3001
3002template <int _Np, typename _Tp, typename _Abi>
3003 struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3004 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3005
3006template <int _Np, typename _V>
3007 using resize_simd_t = typename resize_simd<_Np, _V>::type;
3008
3009// }}}2
3010// memory_alignment {{{2
3011template <typename _Tp, typename _Up = typename _Tp::value_type>
3012 struct memory_alignment
3013 : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3014
3015template <typename _Tp, typename _Up = typename _Tp::value_type>
3016 inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3017
3018// class template simd [simd] {{{1
3019template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3020 class simd;
3021
3022template <typename _Tp, typename _Abi>
3023 struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3024
3025template <typename _Tp>
3026 using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3027
3028template <typename _Tp, int _Np>
3029 using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3030
3031template <typename _Tp, size_t _Np>
3032 using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3033
3034// class template simd_mask [simd_mask] {{{1
3035template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3036 class simd_mask;
3037
3038template <typename _Tp, typename _Abi>
3039 struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3040
3041template <typename _Tp>
3042 using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3043
3044template <typename _Tp, int _Np>
3045 using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3046
3047template <typename _Tp, size_t _Np>
3048 using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3049
3050// casts [simd.casts] {{{1
3051// static_simd_cast {{{2
3052template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3053 struct __static_simd_cast_return_type;
3054
3055template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3056 struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3057 : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3058
3059template <typename _Tp, typename _Up, typename _Ap>
3060 struct __static_simd_cast_return_type<
3061 _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3062 { using type = _Tp; };
3063
3064template <typename _Tp, typename _Ap>
3065 struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3066#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3067 enable_if_t<__is_vectorizable_v<_Tp>>
3068#else
3069 void
3070#endif
3071 >
3072 { using type = simd<_Tp, _Ap>; };
3073
3074template <typename _Tp, typename = void>
3075 struct __safe_make_signed { using type = _Tp;};
3076
3077template <typename _Tp>
3078 struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3079 {
3080 // the extra make_unsigned_t is because of PR85951
3081 using type = make_signed_t<make_unsigned_t<_Tp>>;
3082 };
3083
3084template <typename _Tp>
3085 using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3086
3087template <typename _Tp, typename _Up, typename _Ap>
3088 struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3089#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3090 enable_if_t<__is_vectorizable_v<_Tp>>
3091#else
3092 void
3093#endif
3094 >
3095 {
3096 using type = conditional_t<
3097 (is_integral_v<_Up> && is_integral_v<_Tp> &&
3098#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3099 is_signed_v<_Up> != is_signed_v<_Tp> &&
3100#endif
3101 is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3102 simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3103 };
3104
3105template <typename _Tp, typename _Up, typename _Ap,
3106 typename _R
3107 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3108 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3109 static_simd_cast(const simd<_Up, _Ap>& __x)
3110 {
3111 if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3112 return __x;
3113 else
3114 {
3115 _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3116 __c;
3117 return _R(__private_init, __c(__data(__x)));
3118 }
3119 }
3120
3121namespace __proposed {
3122template <typename _Tp, typename _Up, typename _Ap,
3123 typename _R
3124 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3125 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3126 static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3127 {
3128 using _RM = typename _R::mask_type;
3129 return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3130 typename _RM::simd_type::value_type>(__x)};
3131 }
3132
3133template <typename _To, typename _Up, typename _Abi>
3134 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3135 _To
3136 simd_bit_cast(const simd<_Up, _Abi>& __x)
3137 {
3138 using _Tp = typename _To::value_type;
3139 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3140 using _From = simd<_Up, _Abi>;
3141 using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3142 // with concepts, the following should be constraints
3143 static_assert(sizeof(_To) == sizeof(_From));
3144 static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3145 static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3146#if __has_builtin(__builtin_bit_cast)
3147 return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3148#else
3149 return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3150#endif
3151 }
3152
3153template <typename _To, typename _Up, typename _Abi>
3154 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3155 _To
3156 simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3157 {
3158 using _From = simd_mask<_Up, _Abi>;
3159 static_assert(sizeof(_To) == sizeof(_From));
3160 static_assert(is_trivially_copyable_v<_From>);
3161 // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3162 // copyable.
3163 if constexpr (is_simd_v<_To>)
3164 {
3165 using _Tp = typename _To::value_type;
3166 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3167 static_assert(is_trivially_copyable_v<_ToMember>);
3168#if __has_builtin(__builtin_bit_cast)
3169 return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3170#else
3171 return {__private_init, __bit_cast<_ToMember>(__x)};
3172#endif
3173 }
3174 else
3175 {
3176 static_assert(is_trivially_copyable_v<_To>);
3177#if __has_builtin(__builtin_bit_cast)
3178 return __builtin_bit_cast(_To, __x);
3179#else
3180 return __bit_cast<_To>(__x);
3181#endif
3182 }
3183 }
3184} // namespace __proposed
3185
3186// simd_cast {{{2
3187template <typename _Tp, typename _Up, typename _Ap,
3188 typename _To = __value_type_or_identity_t<_Tp>>
3189 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3190 simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3191 -> decltype(static_simd_cast<_Tp>(__x))
3192 { return static_simd_cast<_Tp>(__x); }
3193
3194namespace __proposed {
3195template <typename _Tp, typename _Up, typename _Ap,
3196 typename _To = __value_type_or_identity_t<_Tp>>
3197 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3198 simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3199 -> decltype(static_simd_cast<_Tp>(__x))
3200 { return static_simd_cast<_Tp>(__x); }
3201} // namespace __proposed
3202
3203// }}}2
3204// resizing_simd_cast {{{
3205namespace __proposed {
3206/* Proposed spec:
3207
3208template <class T, class U, class Abi>
3209T resizing_simd_cast(const simd<U, Abi>& x)
3210
3211p1 Constraints:
3212 - is_simd_v<T> is true and
3213 - T::value_type is the same type as U
3214
3215p2 Returns:
3216 A simd object with the i^th element initialized to x[i] for all i in the
3217 range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3218 than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3219
3220template <class T, class U, class Abi>
3221T resizing_simd_cast(const simd_mask<U, Abi>& x)
3222
3223p1 Constraints: is_simd_mask_v<T> is true
3224
3225p2 Returns:
3226 A simd_mask object with the i^th element initialized to x[i] for all i in
3227the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3228 than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3229
3230 */
3231
3232template <typename _Tp, typename _Up, typename _Ap>
3233 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3234 conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3235 resizing_simd_cast(const simd<_Up, _Ap>& __x)
3236 {
3237 if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3238 return __x;
3239 else if (__builtin_is_constant_evaluated())
3240 return _Tp([&](auto __i) constexpr {
3241 return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3242 });
3243 else if constexpr (simd_size_v<_Up, _Ap> == 1)
3244 {
3245 _Tp __r{};
3246 __r[0] = __x[0];
3247 return __r;
3248 }
3249 else if constexpr (_Tp::size() == 1)
3250 return __x[0];
3251 else if constexpr (sizeof(_Tp) == sizeof(__x)
3252 && !__is_fixed_size_abi_v<_Ap>)
3253 return {__private_init,
3254 __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3255 _Ap::_S_masked(__data(__x))._M_data)};
3256 else
3257 {
3258 _Tp __r{};
3259 __builtin_memcpy(&__data(__r), &__data(__x),
3260 sizeof(_Up)
3261 * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3262 return __r;
3263 }
3264 }
3265
3266template <typename _Tp, typename _Up, typename _Ap>
3267 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3268 enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3269 resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3270 {
3271 return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3272 typename _Tp::simd_type::value_type>(__x)};
3273 }
3274} // namespace __proposed
3275
3276// }}}
3277// to_fixed_size {{{2
3278template <typename _Tp, int _Np>
3279 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3280 to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3281 { return __x; }
3282
3283template <typename _Tp, int _Np>
3284 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3285 to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3286 { return __x; }
3287
3288template <typename _Tp, typename _Ap>
3289 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3290 to_fixed_size(const simd<_Tp, _Ap>& __x)
3291 {
3292 using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3293 return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3294 }
3295
3296template <typename _Tp, typename _Ap>
3297 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3298 to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3299 {
3300 return {__private_init,
3301 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3302 }
3303
3304// to_native {{{2
3305template <typename _Tp, int _Np>
3306 _GLIBCXX_SIMD_INTRINSIC
3307 enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3308 to_native(const fixed_size_simd<_Tp, _Np>& __x)
3309 {
3310 alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3311 __x.copy_to(__mem, vector_aligned);
3312 return {__mem, vector_aligned};
3313 }
3314
3315template <typename _Tp, int _Np>
3316 _GLIBCXX_SIMD_INTRINSIC
3317 enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3318 to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3319 {
3320 return native_simd_mask<_Tp>(
3321 __private_init,
3322 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3323 }
3324
3325// to_compatible {{{2
3326template <typename _Tp, int _Np>
3327 _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3328 to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3329 {
3330 alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3331 __x.copy_to(__mem, vector_aligned);
3332 return {__mem, vector_aligned};
3333 }
3334
3335template <typename _Tp, int _Np>
3336 _GLIBCXX_SIMD_INTRINSIC
3337 enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3338 to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3339 {
3340 return simd_mask<_Tp>(
3341 __private_init,
3342 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3343 }
3344
3345// masked assignment [simd_mask.where] {{{1
3346
3347// where_expression {{{1
3348// const_where_expression<M, T> {{{2
3349template <typename _M, typename _Tp>
3350 class const_where_expression
3351 {
3352 using _V = _Tp;
3353 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3354
3355 struct _Wrapper { using value_type = _V; };
3356
3357 protected:
3358 using _Impl = typename _V::_Impl;
3359
3360 using value_type =
3361 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3362
3363 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3364 __get_mask(const const_where_expression& __x)
3365 { return __x._M_k; }
3366
3367 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3368 __get_lvalue(const const_where_expression& __x)
3369 { return __x._M_value; }
3370
3371 const _M& _M_k;
3372 _Tp& _M_value;
3373
3374 public:
3375 const_where_expression(const const_where_expression&) = delete;
3376
3377 const_where_expression& operator=(const const_where_expression&) = delete;
3378
3379 _GLIBCXX_SIMD_INTRINSIC constexpr
3380 const_where_expression(const _M& __kk, const _Tp& dd)
3381 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3382
3383 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3384 operator-() const&&
3385 {
3386 return {__private_init,
3387 _Impl::template _S_masked_unary<negate>(__data(_M_k),
3388 __data(_M_value))};
3389 }
3390
3391 template <typename _Up, typename _Flags>
3392 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3393 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3394 {
3395 return {__private_init,
3396 _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3397 _Flags::template _S_apply<_V>(__mem))};
3398 }
3399
3400 template <typename _Up, typename _Flags>
3401 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3402 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3403 {
3404 _Impl::_S_masked_store(__data(_M_value),
3405 _Flags::template _S_apply<_V>(__mem),
3406 __data(_M_k));
3407 }
3408 };
3409
3410// const_where_expression<bool, T> {{{2
3411template <typename _Tp>
3412 class const_where_expression<bool, _Tp>
3413 {
3414 using _M = bool;
3415 using _V = _Tp;
3416
3417 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3418
3419 struct _Wrapper { using value_type = _V; };
3420
3421 protected:
3422 using value_type
3423 = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3424
3425 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3426 __get_mask(const const_where_expression& __x)
3427 { return __x._M_k; }
3428
3429 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3430 __get_lvalue(const const_where_expression& __x)
3431 { return __x._M_value; }
3432
3433 const bool _M_k;
3434 _Tp& _M_value;
3435
3436 public:
3437 const_where_expression(const const_where_expression&) = delete;
3438 const_where_expression& operator=(const const_where_expression&) = delete;
3439
3440 _GLIBCXX_SIMD_INTRINSIC constexpr
3441 const_where_expression(const bool __kk, const _Tp& dd)
3442 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3443
3444 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3445 operator-() const&&
3446 { return _M_k ? -_M_value : _M_value; }
3447
3448 template <typename _Up, typename _Flags>
3449 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3450 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3451 { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3452
3453 template <typename _Up, typename _Flags>
3454 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3455 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3456 {
3457 if (_M_k)
3458 __mem[0] = _M_value;
3459 }
3460 };
3461
3462// where_expression<M, T> {{{2
3463template <typename _M, typename _Tp>
3464 class where_expression : public const_where_expression<_M, _Tp>
3465 {
3466 using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3467
3468 static_assert(!is_const<_Tp>::value,
3469 "where_expression may only be instantiated with __a non-const "
3470 "_Tp parameter");
3471
3472 using typename const_where_expression<_M, _Tp>::value_type;
3473 using const_where_expression<_M, _Tp>::_M_k;
3474 using const_where_expression<_M, _Tp>::_M_value;
3475
3476 static_assert(
3477 is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3478 static_assert(_M::size() == _Tp::size(), "");
3479
3480 _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3481 __get_lvalue(where_expression& __x)
3482 { return __x._M_value; }
3483
3484 public:
3485 where_expression(const where_expression&) = delete;
3486 where_expression& operator=(const where_expression&) = delete;
3487
3488 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3489 where_expression(const _M& __kk, _Tp& dd)
3490 : const_where_expression<_M, _Tp>(__kk, dd) {}
3491
3492 template <typename _Up>
3493 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3494 operator=(_Up&& __x) &&
3495 {
3496 _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3497 __to_value_type_or_member_type<_Tp>(
3498 static_cast<_Up&&>(__x)));
3499 }
3500
3501#define _GLIBCXX_SIMD_OP_(__op, __name) \
3502 template <typename _Up> \
3503 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3504 operator __op##=(_Up&& __x)&& \
3505 { \
3506 _Impl::template _S_masked_cassign( \
3507 __data(_M_k), __data(_M_value), \
3508 __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3509 [](auto __impl, auto __lhs, auto __rhs) \
3510 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3511 { return __impl.__name(__lhs, __rhs); }); \
3512 } \
3513 static_assert(true)
3514 _GLIBCXX_SIMD_OP_(+, _S_plus);
3515 _GLIBCXX_SIMD_OP_(-, _S_minus);
3516 _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3517 _GLIBCXX_SIMD_OP_(/, _S_divides);
3518 _GLIBCXX_SIMD_OP_(%, _S_modulus);
3519 _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3520 _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3521 _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3522 _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3523 _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3524#undef _GLIBCXX_SIMD_OP_
3525
3526 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3527 operator++() &&
3528 {
3529 __data(_M_value)
3530 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3531 }
3532
3533 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3534 operator++(int) &&
3535 {
3536 __data(_M_value)
3537 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3538 }
3539
3540 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3541 operator--() &&
3542 {
3543 __data(_M_value)
3544 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3545 }
3546
3547 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3548 operator--(int) &&
3549 {
3550 __data(_M_value)
3551 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3552 }
3553
3554 // intentionally hides const_where_expression::copy_from
3555 template <typename _Up, typename _Flags>
3556 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3557 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3558 {
3559 __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3560 _Flags::template _S_apply<_Tp>(__mem));
3561 }
3562 };
3563
3564// where_expression<bool, T> {{{2
3565template <typename _Tp>
3566 class where_expression<bool, _Tp>
3567 : public const_where_expression<bool, _Tp>
3568 {
3569 using _M = bool;
3570 using typename const_where_expression<_M, _Tp>::value_type;
3571 using const_where_expression<_M, _Tp>::_M_k;
3572 using const_where_expression<_M, _Tp>::_M_value;
3573
3574 public:
3575 where_expression(const where_expression&) = delete;
3576 where_expression& operator=(const where_expression&) = delete;
3577
3578 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3579 where_expression(const _M& __kk, _Tp& dd)
3580 : const_where_expression<_M, _Tp>(__kk, dd) {}
3581
3582#define _GLIBCXX_SIMD_OP_(__op) \
3583 template <typename _Up> \
3584 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3585 operator __op(_Up&& __x)&& \
3586 { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3587
3588 _GLIBCXX_SIMD_OP_(=)
3589 _GLIBCXX_SIMD_OP_(+=)
3590 _GLIBCXX_SIMD_OP_(-=)
3591 _GLIBCXX_SIMD_OP_(*=)
3592 _GLIBCXX_SIMD_OP_(/=)
3593 _GLIBCXX_SIMD_OP_(%=)
3594 _GLIBCXX_SIMD_OP_(&=)
3595 _GLIBCXX_SIMD_OP_(|=)
3596 _GLIBCXX_SIMD_OP_(^=)
3597 _GLIBCXX_SIMD_OP_(<<=)
3598 _GLIBCXX_SIMD_OP_(>>=)
3599 #undef _GLIBCXX_SIMD_OP_
3600
3601 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3602 operator++() &&
3603 { if (_M_k) ++_M_value; }
3604
3605 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3606 operator++(int) &&
3607 { if (_M_k) ++_M_value; }
3608
3609 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3610 operator--() &&
3611 { if (_M_k) --_M_value; }
3612
3613 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3614 operator--(int) &&
3615 { if (_M_k) --_M_value; }
3616
3617 // intentionally hides const_where_expression::copy_from
3618 template <typename _Up, typename _Flags>
3619 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3620 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3621 { if (_M_k) _M_value = __mem[0]; }
3622 };
3623
3624// where {{{1
3625template <typename _Tp, typename _Ap>
3626 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3627 where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3628 where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3629 { return {__k, __value}; }
3630
3631template <typename _Tp, typename _Ap>
3632 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3633 const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3634 where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3635 { return {__k, __value}; }
3636
3637template <typename _Tp, typename _Ap>
3638 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3639 where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3640 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3641 { return {__k, __value}; }
3642
3643template <typename _Tp, typename _Ap>
3644 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3645 const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3646 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3647 { return {__k, __value}; }
3648
3649template <typename _Tp>
3650 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3651 where(_ExactBool __k, _Tp& __value)
3652 { return {__k, __value}; }
3653
3654template <typename _Tp>
3655 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3656 where(_ExactBool __k, const _Tp& __value)
3657 { return {__k, __value}; }
3658
3659template <typename _Tp, typename _Ap>
3660 _GLIBCXX_SIMD_CONSTEXPR void
3661 where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3662
3663template <typename _Tp, typename _Ap>
3664 _GLIBCXX_SIMD_CONSTEXPR void
3665 where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3666
3667// proposed mask iterations {{{1
3668namespace __proposed {
3669template <size_t _Np>
3670 class where_range
3671 {
3672 const bitset<_Np> __bits;
3673
3674 public:
3675 where_range(bitset<_Np> __b) : __bits(__b) {}
3676
3677 class iterator
3678 {
3679 size_t __mask;
3680 size_t __bit;
3681
3682 _GLIBCXX_SIMD_INTRINSIC void
3683 __next_bit()
3684 { __bit = __builtin_ctzl(__mask); }
3685
3686 _GLIBCXX_SIMD_INTRINSIC void
3687 __reset_lsb()
3688 {
3689 // 01100100 - 1 = 01100011
3690 __mask &= (__mask - 1);
3691 // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3692 }
3693
3694 public:
3695 iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3696 iterator(const iterator&) = default;
3697 iterator(iterator&&) = default;
3698
3699 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3700 operator->() const
3701 { return __bit; }
3702
3703 _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3704 operator*() const
3705 { return __bit; }
3706
3707 _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3708 operator++()
3709 {
3710 __reset_lsb();
3711 __next_bit();
3712 return *this;
3713 }
3714
3715 _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3716 operator++(int)
3717 {
3718 iterator __tmp = *this;
3719 __reset_lsb();
3720 __next_bit();
3721 return __tmp;
3722 }
3723
3724 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3725 operator==(const iterator& __rhs) const
3726 { return __mask == __rhs.__mask; }
3727
3728 _GLIBCXX_SIMD_ALWAYS_INLINE bool
3729 operator!=(const iterator& __rhs) const
3730 { return __mask != __rhs.__mask; }
3731 };
3732
3733 iterator
3734 begin() const
3735 { return __bits.to_ullong(); }
3736
3737 iterator
3738 end() const
3739 { return 0; }
3740 };
3741
3742template <typename _Tp, typename _Ap>
3743 where_range<simd_size_v<_Tp, _Ap>>
3744 where(const simd_mask<_Tp, _Ap>& __k)
3745 { return __k.__to_bitset(); }
3746
3747} // namespace __proposed
3748
3749// }}}1
3750// reductions [simd.reductions] {{{1
3751template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3752 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3753 reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3754 { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3755
3756template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3757 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3758 reduce(const const_where_expression<_M, _V>& __x,
3759 typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3760 {
3761 if (__builtin_expect(none_of(__get_mask(__x)), false))
3762 return __identity_element;
3763
3764 _V __tmp = __identity_element;
3765 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3766 __data(__get_lvalue(__x)));
3767 return reduce(__tmp, __binary_op);
3768 }
3769
3770template <typename _M, typename _V>
3771 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3772 reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3773 { return reduce(__x, 0, __binary_op); }
3774
3775template <typename _M, typename _V>
3776 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3777 reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3778 { return reduce(__x, 1, __binary_op); }
3779
3780template <typename _M, typename _V>
3781 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3782 reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3783 { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3784
3785template <typename _M, typename _V>
3786 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3787 reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3788 { return reduce(__x, 0, __binary_op); }
3789
3790template <typename _M, typename _V>
3791 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3792 reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3793 { return reduce(__x, 0, __binary_op); }
3794
3795template <typename _Tp, typename _Abi>
3796 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3797 hmin(const simd<_Tp, _Abi>& __v) noexcept
3798 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3799
3800template <typename _Tp, typename _Abi>
3801 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3802 hmax(const simd<_Tp, _Abi>& __v) noexcept
3803 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3804
3805template <typename _M, typename _V>
3806 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3807 typename _V::value_type
3808 hmin(const const_where_expression<_M, _V>& __x) noexcept
3809 {
3810 using _Tp = typename _V::value_type;
3811 constexpr _Tp __id_elem =
3812#ifdef __FINITE_MATH_ONLY__
3813 __finite_max_v<_Tp>;
3814#else
3815 __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3816#endif
3817 _V __tmp = __id_elem;
3818 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3819 __data(__get_lvalue(__x)));
3820 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3821 }
3822
3823template <typename _M, typename _V>
3824 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3825 typename _V::value_type
3826 hmax(const const_where_expression<_M, _V>& __x) noexcept
3827 {
3828 using _Tp = typename _V::value_type;
3829 constexpr _Tp __id_elem =
3830#ifdef __FINITE_MATH_ONLY__
3831 __finite_min_v<_Tp>;
3832#else
3833 [] {
3834 if constexpr (__value_exists_v<__infinity, _Tp>)
3835 return -__infinity_v<_Tp>;
3836 else
3837 return __finite_min_v<_Tp>;
3838 }();
3839#endif
3840 _V __tmp = __id_elem;
3841 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3842 __data(__get_lvalue(__x)));
3843 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3844 }
3845
3846// }}}1
3847// algorithms [simd.alg] {{{
3848template <typename _Tp, typename _Ap>
3849 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3850 min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3851 { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3852
3853template <typename _Tp, typename _Ap>
3854 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3855 max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3856 { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3857
3858template <typename _Tp, typename _Ap>
3859 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3860 pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3861 minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3862 {
3863 const auto pair_of_members
3864 = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3865 return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3866 simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3867 }
3868
3869template <typename _Tp, typename _Ap>
3870 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3871 clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3872 {
3873 using _Impl = typename _Ap::_SimdImpl;
3874 return {__private_init,
3875 _Impl::_S_min(__data(__hi),
3876 _Impl::_S_max(__data(__lo), __data(__v)))};
3877 }
3878
3879// }}}
3880
3881template <size_t... _Sizes, typename _Tp, typename _Ap,
3882 typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3883 inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3884 split(const simd<_Tp, _Ap>&);
3885
3886// __extract_part {{{
3887template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3888 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3889 _SimdWrapper<_Tp, _Np / _Total * _Combine>
3890 __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3891
3892template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3893 _GLIBCXX_SIMD_INTRINSIC constexpr auto
3894 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3895
3896// }}}
3897// _SizeList {{{
3898template <size_t _V0, size_t... _Values>
3899 struct _SizeList
3900 {
3901 template <size_t _I>
3902 static constexpr size_t
3903 _S_at(_SizeConstant<_I> = {})
3904 {
3905 if constexpr (_I == 0)
3906 return _V0;
3907 else
3908 return _SizeList<_Values...>::template _S_at<_I - 1>();
3909 }
3910
3911 template <size_t _I>
3912 static constexpr auto
3913 _S_before(_SizeConstant<_I> = {})
3914 {
3915 if constexpr (_I == 0)
3916 return _SizeConstant<0>();
3917 else
3918 return _SizeConstant<
3919 _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3920 }
3921
3922 template <size_t _Np>
3923 static constexpr auto
3924 _S_pop_front(_SizeConstant<_Np> = {})
3925 {
3926 if constexpr (_Np == 0)
3927 return _SizeList();
3928 else
3929 return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3930 }
3931 };
3932
3933// }}}
3934// __extract_center {{{
3935template <typename _Tp, size_t _Np>
3936 _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3937 __extract_center(_SimdWrapper<_Tp, _Np> __x)
3938 {
3939 static_assert(_Np >= 4);
3940 static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3941#if _GLIBCXX_SIMD_X86INTRIN // {{{
3942 if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3943 {
3944 const auto __intrin = __to_intrin(__x);
3945 if constexpr (is_integral_v<_Tp>)
3946 return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3947 _mm512_shuffle_i32x4(__intrin, __intrin,
3948 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3949 else if constexpr (sizeof(_Tp) == 4)
3950 return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3951 _mm512_shuffle_f32x4(__intrin, __intrin,
3952 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3953 else if constexpr (sizeof(_Tp) == 8)
3954 return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3955 _mm512_shuffle_f64x2(__intrin, __intrin,
3956 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3957 else
3958 __assert_unreachable<_Tp>();
3959 }
3960 else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3961 return __vector_bitcast<_Tp>(
3962 _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3963 __hi128(__vector_bitcast<double>(__x)), 1));
3964 else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3965 return __vector_bitcast<_Tp>(
3966 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3967 __lo128(__vector_bitcast<_LLong>(__x)),
3968 sizeof(_Tp) * _Np / 4));
3969 else
3970#endif // _GLIBCXX_SIMD_X86INTRIN }}}
3971 {
3972 __vector_type_t<_Tp, _Np / 2> __r;
3973 __builtin_memcpy(&__r,
3974 reinterpret_cast<const char*>(&__x)
3975 + sizeof(_Tp) * _Np / 4,
3976 sizeof(_Tp) * _Np / 2);
3977 return __r;
3978 }
3979 }
3980
3981template <typename _Tp, typename _A0, typename... _As>
3982 _GLIBCXX_SIMD_INTRINSIC
3983 _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3984 __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3985 {
3986 if constexpr (sizeof...(_As) == 0)
3987 return __extract_center(__x.first);
3988 else
3989 return __extract_part<1, 4, 2>(__x);
3990 }
3991
3992// }}}
3993// __split_wrapper {{{
3994template <size_t... _Sizes, typename _Tp, typename... _As>
3995 auto
3996 __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3997 {
3998 return split<_Sizes...>(
3999 fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4000 __x));
4001 }
4002
4003// }}}
4004
4005// split<simd>(simd) {{{
4006template <typename _V, typename _Ap,
4007 size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4008 enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4009 && is_simd_v<_V>, array<_V, _Parts>>
4010 split(const simd<typename _V::value_type, _Ap>& __x)
4011 {
4012 using _Tp = typename _V::value_type;
4013 if constexpr (_Parts == 1)
4014 {
4015 return {simd_cast<_V>(__x)};
4016 }
4017 else if (__x._M_is_constprop())
4018 {
4019 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4020 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4021 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4022 { return __x[__i * _V::size() + __j]; });
4023 });
4024 }
4025 else if constexpr (
4026 __is_fixed_size_abi_v<_Ap>
4027 && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4028 || (__is_fixed_size_abi_v<typename _V::abi_type>
4029 && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4030 )))
4031 {
4032 // fixed_size -> fixed_size (w/o padding) or scalar
4033#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4034 const __may_alias<_Tp>* const __element_ptr
4035 = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4036 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4037 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4038 { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4039#else
4040 const auto& __xx = __data(__x);
4041 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4042 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4043 [[maybe_unused]] constexpr size_t __offset
4044 = decltype(__i)::value * _V::size();
4045 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4046 constexpr _SizeConstant<__j + __offset> __k;
4047 return __xx[__k];
4048 });
4049 });
4050#endif
4051 }
4052 else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4053 {
4054 // normally memcpy should work here as well
4055 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4056 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4057 }
4058 else
4059 {
4060 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4061 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4062 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4063 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4064 return __x[__i * _V::size() + __j];
4065 });
4066 else
4067 return _V(__private_init,
4068 __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4069 });
4070 }
4071 }
4072
4073// }}}
4074// split<simd_mask>(simd_mask) {{{
4075template <typename _V, typename _Ap,
4076 size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4077 enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4078 _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4079 split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4080 {
4081 if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4082 return {__x};
4083 else if constexpr (_Parts == 1)
4084 return {__proposed::static_simd_cast<_V>(__x)};
4085 else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4086 && __is_avx_abi<_Ap>())
4087 return {_V(__private_init, __lo128(__data(__x))),
4088 _V(__private_init, __hi128(__data(__x)))};
4089 else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4090 {
4091 const bitset __bits = __x.__to_bitset();
4092 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4093 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4094 constexpr size_t __offset = __i * _V::size();
4095 return _V(__bitset_init, (__bits >> __offset).to_ullong());
4096 });
4097 }
4098 else
4099 {
4100 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4101 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4102 constexpr size_t __offset = __i * _V::size();
4103 return _V(__private_init,
4104 [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4105 return __x[__j + __offset];
4106 });
4107 });
4108 }
4109 }
4110
4111// }}}
4112// split<_Sizes...>(simd) {{{
4113template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4114 _GLIBCXX_SIMD_ALWAYS_INLINE
4115 tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4116 split(const simd<_Tp, _Ap>& __x)
4117 {
4118 using _SL = _SizeList<_Sizes...>;
4119 using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4120 constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4121 constexpr size_t _N0 = _SL::template _S_at<0>();
4122 using _V = __deduced_simd<_Tp, _N0>;
4123
4124 if (__x._M_is_constprop())
4125 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4126 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4127 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4128 constexpr size_t __offset = _SL::_S_before(__i);
4129 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4130 return __x[__offset + __j];
4131 });
4132 });
4133 else if constexpr (_Np == _N0)
4134 {
4135 static_assert(sizeof...(_Sizes) == 1);
4136 return {simd_cast<_V>(__x)};
4137 }
4138 else if constexpr // split from fixed_size, such that __x::first.size == _N0
4139 (__is_fixed_size_abi_v<
4140 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4141 {
4142 static_assert(
4143 !__is_fixed_size_abi_v<typename _V::abi_type>,
4144 "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4145 "fixed_size_simd "
4146 "when deduced?");
4147 // extract first and recurse (__split_wrapper is needed to deduce a new
4148 // _Sizes pack)
4149 return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4150 __split_wrapper(_SL::template _S_pop_front<1>(),
4151 __data(__x).second));
4152 }
4153 else if constexpr ((!is_same_v<simd_abi::scalar,
4154 simd_abi::deduce_t<_Tp, _Sizes>> && ...)
4155 && (!__is_fixed_size_abi_v<
4156 simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4157 {
4158 if constexpr (((_Sizes * 2 == _Np) && ...))
4159 return {{__private_init, __extract_part<0, 2>(__data(__x))},
4160 {__private_init, __extract_part<1, 2>(__data(__x))}};
4161 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4162 _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
4163 return {{__private_init, __extract_part<0, 3>(__data(__x))},
4164 {__private_init, __extract_part<1, 3>(__data(__x))},
4165 {__private_init, __extract_part<2, 3>(__data(__x))}};
4166 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4167 _SizeList<2 * _Np / 3, _Np / 3>>)
4168 return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
4169 {__private_init, __extract_part<2, 3>(__data(__x))}};
4170 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4171 _SizeList<_Np / 3, 2 * _Np / 3>>)
4172 return {{__private_init, __extract_part<0, 3>(__data(__x))},
4173 {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
4174 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4175 _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
4176 return {{__private_init, __extract_part<0, 2>(__data(__x))},
4177 {__private_init, __extract_part<2, 4>(__data(__x))},
4178 {__private_init, __extract_part<3, 4>(__data(__x))}};
4179 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4180 _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
4181 return {{__private_init, __extract_part<0, 4>(__data(__x))},
4182 {__private_init, __extract_part<1, 4>(__data(__x))},
4183 {__private_init, __extract_part<1, 2>(__data(__x))}};
4184 else if constexpr (is_same_v<_SizeList<_Sizes...>,
4185 _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
4186 return {{__private_init, __extract_part<0, 4>(__data(__x))},
4187 {__private_init, __extract_center(__data(__x))},
4188 {__private_init, __extract_part<3, 4>(__data(__x))}};
4189 else if constexpr (((_Sizes * 4 == _Np) && ...))
4190 return {{__private_init, __extract_part<0, 4>(__data(__x))},
4191 {__private_init, __extract_part<1, 4>(__data(__x))},
4192 {__private_init, __extract_part<2, 4>(__data(__x))},
4193 {__private_init, __extract_part<3, 4>(__data(__x))}};
4194 // else fall through
4195 }
4196#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4197 const __may_alias<_Tp>* const __element_ptr
4198 = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4199 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4200 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4201 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4202 constexpr size_t __offset = _SL::_S_before(__i);
4203 constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4204 constexpr size_t __a
4205 = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4206 constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4207 constexpr size_t __alignment = __b == 0 ? __a : __b;
4208 return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4209 });
4210#else
4211 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4212 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4213 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4214 const auto& __xx = __data(__x);
4215 using _Offset = decltype(_SL::_S_before(__i));
4216 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4217 constexpr _SizeConstant<_Offset::value + __j> __k;
4218 return __xx[__k];
4219 });
4220 });
4221#endif
4222 }
4223
4224// }}}
4225
4226// __subscript_in_pack {{{
4227template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4228 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4229 __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4230 {
4231 if constexpr (_I < simd_size_v<_Tp, _Ap>)
4232 return __x[_I];
4233 else
4234 return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4235 }
4236
4237// }}}
4238// __store_pack_of_simd {{{
4239template <typename _Tp, typename _A0, typename... _As>
4240 _GLIBCXX_SIMD_INTRINSIC void
4241 __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4242 {
4243 constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4244 __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4245 if constexpr (sizeof...(__xs) > 0)
4246 __store_pack_of_simd(__mem + __n_bytes, __xs...);
4247 }
4248
4249// }}}
4250// concat(simd...) {{{
4251template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4252 inline _GLIBCXX_SIMD_CONSTEXPR
4253 simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4254 concat(const simd<_Tp, _As>&... __xs)
4255 {
4256 using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
4257 if constexpr (sizeof...(__xs) == 1)
4258 return simd_cast<_Rp>(__xs...);
4259 else if ((... && __xs._M_is_constprop()))
4260 return simd<_Tp,
4261 simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
4262 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4263 { return __subscript_in_pack<__i>(__xs...); });
4264 else
4265 {
4266 _Rp __r{};
4267 __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4268 return __r;
4269 }
4270 }
4271
4272// }}}
4273// concat(array<simd>) {{{
4274template <typename _Tp, typename _Abi, size_t _Np>
4275 _GLIBCXX_SIMD_ALWAYS_INLINE
4276 _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4277 concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4278 {
4279 return __call_with_subscripts<_Np>(
4280 __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4281 return concat(__xs...);
4282 });
4283 }
4284
4285// }}}
4286
4287/// @cond undocumented
4288// _SmartReference {{{
4289template <typename _Up, typename _Accessor = _Up,
4290 typename _ValueType = typename _Up::value_type>
4291 class _SmartReference
4292 {
4293 friend _Accessor;
4294 int _M_index;
4295 _Up& _M_obj;
4296
4297 _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4298 _M_read() const noexcept
4299 {
4300 if constexpr (is_arithmetic_v<_Up>)
4301 return _M_obj;
4302 else
4303 return _M_obj[_M_index];
4304 }
4305
4306 template <typename _Tp>
4307 _GLIBCXX_SIMD_INTRINSIC constexpr void
4308 _M_write(_Tp&& __x) const
4309 { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4310
4311 public:
4312 _GLIBCXX_SIMD_INTRINSIC constexpr
4313 _SmartReference(_Up& __o, int __i) noexcept
4314 : _M_index(__i), _M_obj(__o) {}
4315
4316 using value_type = _ValueType;
4317
4318 _GLIBCXX_SIMD_INTRINSIC
4319 _SmartReference(const _SmartReference&) = delete;
4320
4321 _GLIBCXX_SIMD_INTRINSIC constexpr
4322 operator value_type() const noexcept
4323 { return _M_read(); }
4324
4325 template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4326 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4327 operator=(_Tp&& __x) &&
4328 {
4329 _M_write(static_cast<_Tp&&>(__x));
4330 return {_M_obj, _M_index};
4331 }
4332
4333#define _GLIBCXX_SIMD_OP_(__op) \
4334 template <typename _Tp, \
4335 typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4336 typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4337 typename = _ValuePreservingOrInt<_TT, value_type>> \
4338 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4339 operator __op##=(_Tp&& __x) && \
4340 { \
4341 const value_type& __lhs = _M_read(); \
4342 _M_write(__lhs __op __x); \
4343 return {_M_obj, _M_index}; \
4344 }
4345 _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4346 _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4347 _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4348#undef _GLIBCXX_SIMD_OP_
4349
4350 template <typename _Tp = void,
4351 typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4352 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4353 operator++() &&
4354 {
4355 value_type __x = _M_read();
4356 _M_write(++__x);
4357 return {_M_obj, _M_index};
4358 }
4359
4360 template <typename _Tp = void,
4361 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4362 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4363 operator++(int) &&
4364 {
4365 const value_type __r = _M_read();
4366 value_type __x = __r;
4367 _M_write(++__x);
4368 return __r;
4369 }
4370
4371 template <typename _Tp = void,
4372 typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4373 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4374 operator--() &&
4375 {
4376 value_type __x = _M_read();
4377 _M_write(--__x);
4378 return {_M_obj, _M_index};
4379 }
4380
4381 template <typename _Tp = void,
4382 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4383 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4384 operator--(int) &&
4385 {
4386 const value_type __r = _M_read();
4387 value_type __x = __r;
4388 _M_write(--__x);
4389 return __r;
4390 }
4391
4392 _GLIBCXX_SIMD_INTRINSIC friend void
4393 swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4394 conjunction<
4395 is_nothrow_constructible<value_type, _SmartReference&&>,
4396 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4397 {
4398 value_type __tmp = static_cast<_SmartReference&&>(__a);
4399 static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4400 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4401 }
4402
4403 _GLIBCXX_SIMD_INTRINSIC friend void
4404 swap(value_type& __a, _SmartReference&& __b) noexcept(
4405 conjunction<
4406 is_nothrow_constructible<value_type, value_type&&>,
4407 is_nothrow_assignable<value_type&, value_type&&>,
4408 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4409 {
4410 value_type __tmp(std::move(__a));
4411 __a = static_cast<value_type>(__b);
4412 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4413 }
4414
4415 _GLIBCXX_SIMD_INTRINSIC friend void
4416 swap(_SmartReference&& __a, value_type& __b) noexcept(
4417 conjunction<
4418 is_nothrow_constructible<value_type, _SmartReference&&>,
4419 is_nothrow_assignable<value_type&, value_type&&>,
4420 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4421 {
4422 value_type __tmp(__a);
4423 static_cast<_SmartReference&&>(__a) = std::move(__b);
4424 __b = std::move(__tmp);
4425 }
4426 };
4427
4428// }}}
4429// __scalar_abi_wrapper {{{
4430template <int _Bytes>
4431 struct __scalar_abi_wrapper
4432 {
4433 template <typename _Tp> static constexpr size_t _S_full_size = 1;
4434 template <typename _Tp> static constexpr size_t _S_size = 1;
4435 template <typename _Tp> static constexpr size_t _S_is_partial = false;
4436
4437 template <typename _Tp, typename _Abi = simd_abi::scalar>
4438 static constexpr bool _S_is_valid_v
4439 = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4440 };
4441
4442// }}}
4443// __decay_abi metafunction {{{
4444template <typename _Tp>
4445 struct __decay_abi { using type = _Tp; };
4446
4447template <int _Bytes>
4448 struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4449 { using type = simd_abi::scalar; };
4450
4451// }}}
4452// __find_next_valid_abi metafunction {{{1
4453// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4454// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4455// recursion at 2 elements in the resulting ABI tag. In this case
4456// type::_S_is_valid_v<_Tp> may be false.
4457template <template <int> class _Abi, int _Bytes, typename _Tp>
4458 struct __find_next_valid_abi
4459 {
4460 static constexpr auto
4461 _S_choose()
4462 {
4463 constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4464 using _NextAbi = _Abi<_NextBytes>;
4465 if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4466 return _Abi<_Bytes>();
4467 else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4468 && _NextAbi::template _S_is_valid_v<_Tp>)
4469 return _NextAbi();
4470 else
4471 return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4472 }
4473
4474 using type = decltype(_S_choose());
4475 };
4476
4477template <int _Bytes, typename _Tp>
4478 struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4479 { using type = simd_abi::scalar; };
4480
4481// _AbiList {{{1
4482template <template <int> class...>
4483 struct _AbiList
4484 {
4485 template <typename, int> static constexpr bool _S_has_valid_abi = false;
4486 template <typename, int> using _FirstValidAbi = void;
4487 template <typename, int> using _BestAbi = void;
4488 };
4489
4490template <template <int> class _A0, template <int> class... _Rest>
4491 struct _AbiList<_A0, _Rest...>
4492 {
4493 template <typename _Tp, int _Np>
4494 static constexpr bool _S_has_valid_abi
4495 = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4496 _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4497
4498 template <typename _Tp, int _Np>
4499 using _FirstValidAbi = conditional_t<
4500 _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4501 typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4502 typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4503
4504 template <typename _Tp, int _Np>
4505 static constexpr auto
4506 _S_determine_best_abi()
4507 {
4508 static_assert(_Np >= 1);
4509 constexpr int _Bytes = sizeof(_Tp) * _Np;
4510 if constexpr (_Np == 1)
4511 return __make_dependent_t<_Tp, simd_abi::scalar>{};
4512 else
4513 {
4514 constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4515 // _A0<_Bytes> is good if:
4516 // 1. The ABI tag is valid for _Tp
4517 // 2. The storage overhead is no more than padding to fill the next
4518 // power-of-2 number of bytes
4519 if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4520 _Tp> && __fullsize / 2 < _Np)
4521 return typename __decay_abi<_A0<_Bytes>>::type{};
4522 else
4523 {
4524 using _Bp =
4525 typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4526 if constexpr (_Bp::template _S_is_valid_v<
4527 _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4528 return _Bp{};
4529 else
4530 return
4531 typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4532 }
4533 }
4534 }
4535
4536 template <typename _Tp, int _Np>
4537 using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4538 };
4539
4540// }}}1
4541
4542// the following lists all native ABIs, which makes them accessible to
4543// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4544// matters: Whatever comes first has higher priority.
4545using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4546 __scalar_abi_wrapper>;
4547
4548// valid _SimdTraits specialization {{{1
4549template <typename _Tp, typename _Abi>
4550 struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4551 : _Abi::template __traits<_Tp> {};
4552
4553// __deduce_impl specializations {{{1
4554// try all native ABIs (including scalar) first
4555template <typename _Tp, size_t _Np>
4556 struct __deduce_impl<
4557 _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4558 { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4559
4560// fall back to fixed_size only if scalar and native ABIs don't match
4561template <typename _Tp, size_t _Np, typename = void>
4562 struct __deduce_fixed_size_fallback {};
4563
4564template <typename _Tp, size_t _Np>
4565 struct __deduce_fixed_size_fallback<_Tp, _Np,
4566 enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4567 { using type = simd_abi::fixed_size<_Np>; };
4568
4569template <typename _Tp, size_t _Np, typename>
4570 struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4571
4572//}}}1
4573/// @endcond
4574
4575// simd_mask {{{
4576template <typename _Tp, typename _Abi>
4577 class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4578 {
4579 // types, tags, and friends {{{
4580 using _Traits = _SimdTraits<_Tp, _Abi>;
4581 using _MemberType = typename _Traits::_MaskMember;
4582
4583 // We map all masks with equal element sizeof to a single integer type, the
4584 // one given by __int_for_sizeof_t<_Tp>. This is the approach
4585 // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4586 // template specializations in the implementation classes.
4587 using _Ip = __int_for_sizeof_t<_Tp>;
4588 static constexpr _Ip* _S_type_tag = nullptr;
4589
4590 friend typename _Traits::_MaskBase;
4591 friend class simd<_Tp, _Abi>; // to construct masks on return
4592 friend typename _Traits::_SimdImpl; // to construct masks on return and
4593 // inspect data on masked operations
4594 public:
4595 using _Impl = typename _Traits::_MaskImpl;
4596 friend _Impl;
4597
4598 // }}}
4599 // member types {{{
4600 using value_type = bool;
4601 using reference = _SmartReference<_MemberType, _Impl, value_type>;
4602 using simd_type = simd<_Tp, _Abi>;
4603 using abi_type = _Abi;
4604
4605 // }}}
4606 static constexpr size_t size() // {{{
4607 { return __size_or_zero_v<_Tp, _Abi>; }
4608
4609 // }}}
4610 // constructors & assignment {{{
4611 simd_mask() = default;
4612 simd_mask(const simd_mask&) = default;
4613 simd_mask(simd_mask&&) = default;
4614 simd_mask& operator=(const simd_mask&) = default;
4615 simd_mask& operator=(simd_mask&&) = default;
4616
4617 // }}}
4618 // access to internal representation (optional feature) {{{
4619 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4620 simd_mask(typename _Traits::_MaskCastType __init)
4621 : _M_data{__init} {}
4622 // conversions to internal type is done in _MaskBase
4623
4624 // }}}
4625 // bitset interface (extension to be proposed) {{{
4626 // TS_FEEDBACK:
4627 // Conversion of simd_mask to and from bitset makes it much easier to
4628 // interface with other facilities. I suggest adding `static
4629 // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4630 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4631 __from_bitset(bitset<size()> bs)
4632 { return {__bitset_init, bs}; }
4633
4634 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4635 __to_bitset() const
4636 { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4637
4638 // }}}
4639 // explicit broadcast constructor {{{
4640 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4641 simd_mask(value_type __x)
4642 : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4643
4644 // }}}
4645 // implicit type conversion constructor {{{
4646 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4647 // proposed improvement
4648 template <typename _Up, typename _A2,
4649 typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4650 _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4651 != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4652 simd_mask(const simd_mask<_Up, _A2>& __x)
4653 : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4654 #else
4655 // conforming to ISO/IEC 19570:2018
4656 template <typename _Up, typename = enable_if_t<conjunction<
4657 is_same<abi_type, simd_abi::fixed_size<size()>>,
4658 is_same<_Up, _Up>>::value>>
4659 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4660 simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4661 : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4662 #endif
4663
4664 // }}}
4665 // load constructor {{{
4666 template <typename _Flags>
4667 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4668 simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4669 : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4670
4671 template <typename _Flags>
4672 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4673 simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4674 : _M_data{}
4675 {
4676 _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4677 _Flags::template _S_apply<simd_mask>(__mem));
4678 }
4679
4680 // }}}
4681 // loads [simd_mask.load] {{{
4682 template <typename _Flags>
4683 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4684 copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4685 { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4686
4687 // }}}
4688 // stores [simd_mask.store] {{{
4689 template <typename _Flags>
4690 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4691 copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4692 { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4693
4694 // }}}
4695 // scalar access {{{
4696 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4697 operator[](size_t __i)
4698 {
4699 if (__i >= size())
4700 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4701 return {_M_data, int(__i)};
4702 }
4703
4704 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4705 operator[](size_t __i) const
4706 {
4707 if (__i >= size())
4708 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4709 if constexpr (__is_scalar_abi<_Abi>())
4710 return _M_data;
4711 else
4712 return static_cast<bool>(_M_data[__i]);
4713 }
4714
4715 // }}}
4716 // negation {{{
4717 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4718 operator!() const
4719 { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4720
4721 // }}}
4722 // simd_mask binary operators [simd_mask.binary] {{{
4723 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4724 // simd_mask<int> && simd_mask<uint> needs disambiguation
4725 template <typename _Up, typename _A2,
4726 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4727 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4728 operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4729 {
4730 return {__private_init,
4731 _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4732 }
4733
4734 template <typename _Up, typename _A2,
4735 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4736 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4737 operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4738 {
4739 return {__private_init,
4740 _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4741 }
4742 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4743
4744 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4745 operator&&(const simd_mask& __x, const simd_mask& __y)
4746 { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4747
4748 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4749 operator||(const simd_mask& __x, const simd_mask& __y)
4750 { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4751
4752 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4753 operator&(const simd_mask& __x, const simd_mask& __y)
4754 { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4755
4756 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4757 operator|(const simd_mask& __x, const simd_mask& __y)
4758 { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4759
4760 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4761 operator^(const simd_mask& __x, const simd_mask& __y)
4762 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4763
4764 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4765 operator&=(simd_mask& __x, const simd_mask& __y)
4766 {
4767 __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4768 return __x;
4769 }
4770
4771 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4772 operator|=(simd_mask& __x, const simd_mask& __y)
4773 {
4774 __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4775 return __x;
4776 }
4777
4778 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4779 operator^=(simd_mask& __x, const simd_mask& __y)
4780 {
4781 __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4782 return __x;
4783 }
4784
4785 // }}}
4786 // simd_mask compares [simd_mask.comparison] {{{
4787 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4788 operator==(const simd_mask& __x, const simd_mask& __y)
4789 { return !operator!=(__x, __y); }
4790
4791 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4792 operator!=(const simd_mask& __x, const simd_mask& __y)
4793 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4794
4795 // }}}
4796 // private_init ctor {{{
4797 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4798 simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4799 : _M_data(__init) {}
4800
4801 // }}}
4802 // private_init generator ctor {{{
4803 template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4804 _GLIBCXX_SIMD_INTRINSIC constexpr
4805 simd_mask(_PrivateInit, _Fp&& __gen)
4806 : _M_data()
4807 {
4808 __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4809 _Impl::_S_set(_M_data, __i, __gen(__i));
4810 });
4811 }
4812
4813 // }}}
4814 // bitset_init ctor {{{
4815 _GLIBCXX_SIMD_INTRINSIC constexpr
4816 simd_mask(_BitsetInit, bitset<size()> __init)
4817 : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4818 {}
4819
4820 // }}}
4821 // __cvt {{{
4822 // TS_FEEDBACK:
4823 // The conversion operator this implements should be a ctor on simd_mask.
4824 // Once you call .__cvt() on a simd_mask it converts conveniently.
4825 // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4826 struct _CvtProxy
4827 {
4828 template <typename _Up, typename _A2,
4829 typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4830 _GLIBCXX_SIMD_ALWAYS_INLINE
4831 operator simd_mask<_Up, _A2>() &&
4832 {
4833 using namespace std::experimental::__proposed;
4834 return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4835 }
4836
4837 const simd_mask<_Tp, _Abi>& _M_data;
4838 };
4839
4840 _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4841 __cvt() const
4842 { return {*this}; }
4843
4844 // }}}
4845 // operator?: overloads (suggested extension) {{{
4846 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4847 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4848 operator?:(const simd_mask& __k, const simd_mask& __where_true,
4849 const simd_mask& __where_false)
4850 {
4851 auto __ret = __where_false;
4852 _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4853 return __ret;
4854 }
4855
4856 template <typename _U1, typename _U2,
4857 typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4858 typename = enable_if_t<conjunction_v<
4859 is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4860 is_convertible<simd_mask, typename _Rp::mask_type>>>>
4861 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4862 operator?:(const simd_mask& __k, const _U1& __where_true,
4863 const _U2& __where_false)
4864 {
4865 _Rp __ret = __where_false;
4866 _Rp::_Impl::_S_masked_assign(
4867 __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4868 __data(static_cast<_Rp>(__where_true)));
4869 return __ret;
4870 }
4871
4872 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4873 template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4874 typename = enable_if_t<
4875 conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4876 is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4877 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4878 operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4879 const simd_mask<_Up, _Au>& __where_false)
4880 {
4881 simd_mask __ret = __where_false;
4882 _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4883 __where_true._M_data);
4884 return __ret;
4885 }
4886 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4887 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4888
4889 // }}}
4890 // _M_is_constprop {{{
4891 _GLIBCXX_SIMD_INTRINSIC constexpr bool
4892 _M_is_constprop() const
4893 {
4894 if constexpr (__is_scalar_abi<_Abi>())
4895 return __builtin_constant_p(_M_data);
4896 else
4897 return _M_data._M_is_constprop();
4898 }
4899
4900 // }}}
4901
4902 private:
4903 friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4904 friend auto& __data<_Tp, abi_type>(simd_mask&);
4905 alignas(_Traits::_S_mask_align) _MemberType _M_data;
4906 };
4907
4908// }}}
4909
4910/// @cond undocumented
4911// __data(simd_mask) {{{
4912template <typename _Tp, typename _Ap>
4913 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4914 __data(const simd_mask<_Tp, _Ap>& __x)
4915 { return __x._M_data; }
4916
4917template <typename _Tp, typename _Ap>
4918 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4919 __data(simd_mask<_Tp, _Ap>& __x)
4920 { return __x._M_data; }
4921
4922// }}}
4923/// @endcond
4924
4925// simd_mask reductions [simd_mask.reductions] {{{
4926template <typename _Tp, typename _Abi>
4927 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4928 all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4929 {
4930 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4931 {
4932 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4933 if (!__k[__i])
4934 return false;
4935 return true;
4936 }
4937 else
4938 return _Abi::_MaskImpl::_S_all_of(__k);
4939 }
4940
4941template <typename _Tp, typename _Abi>
4942 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4943 any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4944 {
4945 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4946 {
4947 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4948 if (__k[__i])
4949 return true;
4950 return false;
4951 }
4952 else
4953 return _Abi::_MaskImpl::_S_any_of(__k);
4954 }
4955
4956template <typename _Tp, typename _Abi>
4957 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4958 none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4959 {
4960 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4961 {
4962 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4963 if (__k[__i])
4964 return false;
4965 return true;
4966 }
4967 else
4968 return _Abi::_MaskImpl::_S_none_of(__k);
4969 }
4970
4971template <typename _Tp, typename _Abi>
4972 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4973 some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4974 {
4975 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4976 {
4977 for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4978 if (__k[__i] != __k[__i - 1])
4979 return true;
4980 return false;
4981 }
4982 else
4983 return _Abi::_MaskImpl::_S_some_of(__k);
4984 }
4985
4986template <typename _Tp, typename _Abi>
4987 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4988 popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4989 {
4990 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4991 {
4992 const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4993 __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4994 return ((__elements != 0) + ...);
4995 });
4996 if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4997 return __r;
4998 }
4999 return _Abi::_MaskImpl::_S_popcount(__k);
5000 }
5001
5002template <typename _Tp, typename _Abi>
5003 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5004 find_first_set(const simd_mask<_Tp, _Abi>& __k)
5005 {
5006 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5007 {
5008 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5009 const size_t _Idx = __call_with_n_evaluations<_Np>(
5010 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5011 return std::min({__indexes...});
5012 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5013 return __k[__i] ? +__i : _Np;
5014 });
5015 if (_Idx >= _Np)
5016 __invoke_ub("find_first_set(empty mask) is UB");
5017 if (__builtin_constant_p(_Idx))
5018 return _Idx;
5019 }
5020 return _Abi::_MaskImpl::_S_find_first_set(__k);
5021 }
5022
5023template <typename _Tp, typename _Abi>
5024 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5025 find_last_set(const simd_mask<_Tp, _Abi>& __k)
5026 {
5027 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5028 {
5029 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5030 const int _Idx = __call_with_n_evaluations<_Np>(
5031 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5032 return std::max({__indexes...});
5033 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5034 return __k[__i] ? int(__i) : -1;
5035 });
5036 if (_Idx < 0)
5037 __invoke_ub("find_first_set(empty mask) is UB");
5038 if (__builtin_constant_p(_Idx))
5039 return _Idx;
5040 }
5041 return _Abi::_MaskImpl::_S_find_last_set(__k);
5042 }
5043
5044_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5045all_of(_ExactBool __x) noexcept
5046{ return __x; }
5047
5048_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5049any_of(_ExactBool __x) noexcept
5050{ return __x; }
5051
5052_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5053none_of(_ExactBool __x) noexcept
5054{ return !__x; }
5055
5056_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5057some_of(_ExactBool) noexcept
5058{ return false; }
5059
5060_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5061popcount(_ExactBool __x) noexcept
5062{ return __x; }
5063
5064_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5065find_first_set(_ExactBool)
5066{ return 0; }
5067
5068_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5069find_last_set(_ExactBool)
5070{ return 0; }
5071
5072// }}}
5073
5074/// @cond undocumented
5075// _SimdIntOperators{{{1
5076template <typename _V, typename _Tp, typename _Abi, bool>
5077 class _SimdIntOperators {};
5078
5079template <typename _V, typename _Tp, typename _Abi>
5080 class _SimdIntOperators<_V, _Tp, _Abi, true>
5081 {
5082 using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5083
5084 _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5085 __derived() const
5086 { return *static_cast<const _V*>(this); }
5087
5088 template <typename _Up>
5089 _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5090 _S_make_derived(_Up&& __d)
5091 { return {__private_init, static_cast<_Up&&>(__d)}; }
5092
5093 public:
5094 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5095 _V&
5096 operator%=(_V& __lhs, const _V& __x)
5097 { return __lhs = __lhs % __x; }
5098
5099 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5100 _V&
5101 operator&=(_V& __lhs, const _V& __x)
5102 { return __lhs = __lhs & __x; }
5103
5104 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5105 _V&
5106 operator|=(_V& __lhs, const _V& __x)
5107 { return __lhs = __lhs | __x; }
5108
5109 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5110 _V&
5111 operator^=(_V& __lhs, const _V& __x)
5112 { return __lhs = __lhs ^ __x; }
5113
5114 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5115 _V&
5116 operator<<=(_V& __lhs, const _V& __x)
5117 { return __lhs = __lhs << __x; }
5118
5119 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5120 _V&
5121 operator>>=(_V& __lhs, const _V& __x)
5122 { return __lhs = __lhs >> __x; }
5123
5124 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5125 _V&
5126 operator<<=(_V& __lhs, int __x)
5127 { return __lhs = __lhs << __x; }
5128
5129 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5130 _V&
5131 operator>>=(_V& __lhs, int __x)
5132 { return __lhs = __lhs >> __x; }
5133
5134 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5135 _V
5136 operator%(const _V& __x, const _V& __y)
5137 {
5138 return _SimdIntOperators::_S_make_derived(
5139 _Impl::_S_modulus(__data(__x), __data(__y)));
5140 }
5141
5142 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5143 _V
5144 operator&(const _V& __x, const _V& __y)
5145 {
5146 return _SimdIntOperators::_S_make_derived(
5147 _Impl::_S_bit_and(__data(__x), __data(__y)));
5148 }
5149
5150 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5151 _V
5152 operator|(const _V& __x, const _V& __y)
5153 {
5154 return _SimdIntOperators::_S_make_derived(
5155 _Impl::_S_bit_or(__data(__x), __data(__y)));
5156 }
5157
5158 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5159 _V
5160 operator^(const _V& __x, const _V& __y)
5161 {
5162 return _SimdIntOperators::_S_make_derived(
5163 _Impl::_S_bit_xor(__data(__x), __data(__y)));
5164 }
5165
5166 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5167 _V
5168 operator<<(const _V& __x, const _V& __y)
5169 {
5170 return _SimdIntOperators::_S_make_derived(
5171 _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5172 }
5173
5174 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5175 _V
5176 operator>>(const _V& __x, const _V& __y)
5177 {
5178 return _SimdIntOperators::_S_make_derived(
5179 _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5180 }
5181
5182 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5183 _V
5184 operator<<(const _V& __x, int __y)
5185 {
5186 if (__y < 0)
5187 __invoke_ub("The behavior is undefined if the right operand of a "
5188 "shift operation is negative. [expr.shift]\nA shift by "
5189 "%d was requested",
5190 __y);
5191 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5192 __invoke_ub(
5193 "The behavior is undefined if the right operand of a "
5194 "shift operation is greater than or equal to the width of the "
5195 "promoted left operand. [expr.shift]\nA shift by %d was requested",
5196 __y);
5197 return _SimdIntOperators::_S_make_derived(
5198 _Impl::_S_bit_shift_left(__data(__x), __y));
5199 }
5200
5201 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5202 _V
5203 operator>>(const _V& __x, int __y)
5204 {
5205 if (__y < 0)
5206 __invoke_ub(
5207 "The behavior is undefined if the right operand of a shift "
5208 "operation is negative. [expr.shift]\nA shift by %d was requested",
5209 __y);
5210 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5211 __invoke_ub(
5212 "The behavior is undefined if the right operand of a shift "
5213 "operation is greater than or equal to the width of the promoted "
5214 "left operand. [expr.shift]\nA shift by %d was requested",
5215 __y);
5216 return _SimdIntOperators::_S_make_derived(
5217 _Impl::_S_bit_shift_right(__data(__x), __y));
5218 }
5219
5220 // unary operators (for integral _Tp)
5221 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5222 _V
5223 operator~() const
5224 { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5225 };
5226
5227//}}}1
5228/// @endcond
5229
5230// simd {{{
5231template <typename _Tp, typename _Abi>
5232 class simd : public _SimdIntOperators<
5233 simd<_Tp, _Abi>, _Tp, _Abi,
5234 conjunction<is_integral<_Tp>,
5235 typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5236 public _SimdTraits<_Tp, _Abi>::_SimdBase
5237 {
5238 using _Traits = _SimdTraits<_Tp, _Abi>;
5239 using _MemberType = typename _Traits::_SimdMember;
5240 using _CastType = typename _Traits::_SimdCastType;
5241 static constexpr _Tp* _S_type_tag = nullptr;
5242 friend typename _Traits::_SimdBase;
5243
5244 public:
5245 using _Impl = typename _Traits::_SimdImpl;
5246 friend _Impl;
5247 friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5248
5249 using value_type = _Tp;
5250 using reference = _SmartReference<_MemberType, _Impl, value_type>;
5251 using mask_type = simd_mask<_Tp, _Abi>;
5252 using abi_type = _Abi;
5253
5254 static constexpr size_t size()
5255 { return __size_or_zero_v<_Tp, _Abi>; }
5256
5257 _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5258 _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5259 _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5260 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5261 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5262
5263 // implicit broadcast constructor
5264 template <typename _Up,
5265 typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5266 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5267 simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5268 : _M_data(
5269 _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5270 {}
5271
5272 // implicit type conversion constructor (convert from fixed_size to
5273 // fixed_size)
5274 template <typename _Up>
5275 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5276 simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5277 enable_if_t<
5278 conjunction<
5279 is_same<simd_abi::fixed_size<size()>, abi_type>,
5280 negation<__is_narrowing_conversion<_Up, value_type>>,
5281 __converts_to_higher_integer_rank<_Up, value_type>>::value,
5282 void*> = nullptr)
5283 : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5284
5285 // explicit type conversion constructor
5286#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5287 template <typename _Up, typename _A2,
5288 typename = decltype(static_simd_cast<simd>(
5289 declval<const simd<_Up, _A2>&>()))>
5290 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5291 simd(const simd<_Up, _A2>& __x)
5292 : simd(static_simd_cast<simd>(__x)) {}
5293#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5294
5295 // generator constructor
5296 template <typename _Fp>
5297 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5298 simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5299 declval<_SizeConstant<0>&>())),
5300 value_type>* = nullptr)
5301 : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5302
5303 // load constructor
5304 template <typename _Up, typename _Flags>
5305 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5306 simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5307 : _M_data(
5308 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5309 {}
5310
5311 // loads [simd.load]
5312 template <typename _Up, typename _Flags>
5313 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5314 copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5315 {
5316 _M_data = static_cast<decltype(_M_data)>(
5317 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5318 }
5319
5320 // stores [simd.store]
5321 template <typename _Up, typename _Flags>
5322 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5323 copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5324 {
5325 _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5326 _S_type_tag);
5327 }
5328
5329 // scalar access
5330 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5331 operator[](size_t __i)
5332 { return {_M_data, int(__i)}; }
5333
5334 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5335 operator[]([[maybe_unused]] size_t __i) const
5336 {
5337 if constexpr (__is_scalar_abi<_Abi>())
5338 {
5339 _GLIBCXX_DEBUG_ASSERT(__i == 0);
5340 return _M_data;
5341 }
5342 else
5343 return _M_data[__i];
5344 }
5345
5346 // increment and decrement:
5347 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5348 operator++()
5349 {
5350 _Impl::_S_increment(_M_data);
5351 return *this;
5352 }
5353
5354 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5355 operator++(int)
5356 {
5357 simd __r = *this;
5358 _Impl::_S_increment(_M_data);
5359 return __r;
5360 }
5361
5362 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5363 operator--()
5364 {
5365 _Impl::_S_decrement(_M_data);
5366 return *this;
5367 }
5368
5369 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5370 operator--(int)
5371 {
5372 simd __r = *this;
5373 _Impl::_S_decrement(_M_data);
5374 return __r;
5375 }
5376
5377 // unary operators (for any _Tp)
5378 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5379 operator!() const
5380 { return {__private_init, _Impl::_S_negate(_M_data)}; }
5381
5382 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5383 operator+() const
5384 { return *this; }
5385
5386 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5387 operator-() const
5388 { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5389
5390 // access to internal representation (suggested extension)
5391 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5392 simd(_CastType __init) : _M_data(__init) {}
5393
5394 // compound assignment [simd.cassign]
5395 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5396 operator+=(simd& __lhs, const simd& __x)
5397 { return __lhs = __lhs + __x; }
5398
5399 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5400 operator-=(simd& __lhs, const simd& __x)
5401 { return __lhs = __lhs - __x; }
5402
5403 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5404 operator*=(simd& __lhs, const simd& __x)
5405 { return __lhs = __lhs * __x; }
5406
5407 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5408 operator/=(simd& __lhs, const simd& __x)
5409 { return __lhs = __lhs / __x; }
5410
5411 // binary operators [simd.binary]
5412 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5413 operator+(const simd& __x, const simd& __y)
5414 { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5415
5416 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5417 operator-(const simd& __x, const simd& __y)
5418 { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5419
5420 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5421 operator*(const simd& __x, const simd& __y)
5422 { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5423
5424 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5425 operator/(const simd& __x, const simd& __y)
5426 { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5427
5428 // compares [simd.comparison]
5429 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5430 operator==(const simd& __x, const simd& __y)
5431 { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5432
5433 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5434 operator!=(const simd& __x, const simd& __y)
5435 {
5436 return simd::_S_make_mask(
5437 _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5438 }
5439
5440 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5441 operator<(const simd& __x, const simd& __y)
5442 { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5443
5444 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5445 operator<=(const simd& __x, const simd& __y)
5446 {
5447 return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5448 }
5449
5450 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5451 operator>(const simd& __x, const simd& __y)
5452 { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5453
5454 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5455 operator>=(const simd& __x, const simd& __y)
5456 {
5457 return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5458 }
5459
5460 // operator?: overloads (suggested extension) {{{
5461#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5462 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5463 operator?:(const mask_type& __k, const simd& __where_true,
5464 const simd& __where_false)
5465 {
5466 auto __ret = __where_false;
5467 _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5468 return __ret;
5469 }
5470
5471#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5472 // }}}
5473
5474 // "private" because of the first arguments's namespace
5475 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5476 simd(_PrivateInit, const _MemberType& __init)
5477 : _M_data(__init) {}
5478
5479 // "private" because of the first arguments's namespace
5480 _GLIBCXX_SIMD_INTRINSIC
5481 simd(_BitsetInit, bitset<size()> __init) : _M_data()
5482 { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5483
5484 _GLIBCXX_SIMD_INTRINSIC constexpr bool
5485 _M_is_constprop() const
5486 {
5487 if constexpr (__is_scalar_abi<_Abi>())
5488 return __builtin_constant_p(_M_data);
5489 else
5490 return _M_data._M_is_constprop();
5491 }
5492
5493 private:
5494 _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5495 _S_make_mask(typename mask_type::_MemberType __k)
5496 { return {__private_init, __k}; }
5497
5498 friend const auto& __data<value_type, abi_type>(const simd&);
5499 friend auto& __data<value_type, abi_type>(simd&);
5500 alignas(_Traits::_S_simd_align) _MemberType _M_data;
5501 };
5502
5503// }}}
5504/// @cond undocumented
5505// __data {{{
5506template <typename _Tp, typename _Ap>
5507 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5508 __data(const simd<_Tp, _Ap>& __x)
5509 { return __x._M_data; }
5510
5511template <typename _Tp, typename _Ap>
5512 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5513 __data(simd<_Tp, _Ap>& __x)
5514 { return __x._M_data; }
5515
5516// }}}
5517namespace __float_bitwise_operators { //{{{
5518template <typename _Tp, typename _Ap>
5519 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5520 operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5521 { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5522
5523template <typename _Tp, typename _Ap>
5524 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5525 operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5526 { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5527
5528template <typename _Tp, typename _Ap>
5529 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5530 operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5531 { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5532
5533template <typename _Tp, typename _Ap>
5534 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5535 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5536 operator~(const simd<_Tp, _Ap>& __a)
5537 { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5538} // namespace __float_bitwise_operators }}}
5539/// @endcond
5540
5541/// @}
5542_GLIBCXX_SIMD_END_NAMESPACE
5543
5544#endif // __cplusplus >= 201703L
5545#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5546
5547// vim: foldmethod=marker foldmarker={{{,}}}
constexpr _If_is_unsigned_integer< _Tp, int > popcount(_Tp __x) noexcept
The number of bits set in x.
Definition bit:426
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition chrono.h:779
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition complex:395
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition complex:365
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition complex:335
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition complex:425
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition type_traits:1640
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition type_traits:1983
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition type_traits:2632
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition type_traits:82
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition type_traits:2614
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition type_traits:85
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition type_traits:2610
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition tuple:2155
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition type_traits:2386
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition tuple:2001
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition move.h:97
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition numeric:287
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1573
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1593
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition bitset:1683
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1563
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1553