19#include <riscv_vector.h>
33using DFromV =
typename DFromV_t<RemoveConst<V>>::type;
36using TFromV = TFromD<DFromV<V>>;
39#define HWY_RVV_IF_POW2_IN(D, min, max) \
40 hwy::EnableIf<(min) <= Pow2(D()) && Pow2(D()) <= (max)>* = nullptr
42template <
typename T,
size_t N,
int kPow2>
59#define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP) \
60 X_MACRO(64, 0, 64, NAME, OP) \
61 X_MACRO(32, 0, 32, NAME, OP) \
62 X_MACRO(16, 0, 16, NAME, OP) \
63 X_MACRO(8, 0, 8, NAME, OP) \
64 X_MACRO(8, 1, 4, NAME, OP) \
65 X_MACRO(8, 2, 2, NAME, OP) \
66 X_MACRO(8, 3, 1, NAME, OP)
78#define HWY_RVV_FOREACH_08_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
79 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
80 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
81 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
82 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP) \
83 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP) \
84 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
86#define HWY_RVV_FOREACH_16_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
87 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
88 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
89 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP) \
90 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP) \
91 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
93#define HWY_RVV_FOREACH_32_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
94 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
95 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP) \
96 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP) \
97 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
99#define HWY_RVV_FOREACH_64_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
100 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP) \
101 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP) \
102 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
105#define HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
106 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
107 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
108 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
109 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP) \
110 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP) \
111 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
113#define HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
114 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, 64, NAME, OP) \
115 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
116 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
117 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP) \
118 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP) \
119 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
121#define HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
122 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, 64, NAME, OP) \
123 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
124 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP) \
125 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP) \
126 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
128#define HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
129 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, 64, NAME, OP) \
130 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP) \
131 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP) \
132 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
135#define HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
136 X_MACRO(BASE, CHAR, 8, 16, __, mf8, mf4, __, -3, 64, NAME, OP) \
137 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
138 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
139 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
140 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP)
142#define HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
143 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, 64, NAME, OP) \
144 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
145 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
146 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP)
148#define HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
149 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, 64, NAME, OP) \
150 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
151 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP)
153#define HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
154 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, 64, NAME, OP) \
155 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP)
158#define HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
159 HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
160 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP)
162#define HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
163 HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
164 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP)
166#define HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
167 HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
168 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP)
170#define HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
171 HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
172 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP)
175#define HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
176 HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
177 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
179#define HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
180 HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
181 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
183#define HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
184 HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
185 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
187#define HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
188 HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
189 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
207#define HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
209#define HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
210 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -3, 64, NAME, OP)
212#define HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
213 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -2, 64, NAME, OP)
215#define HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
216 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, -1, 64, NAME, OP)
219#define HWY_RVV_FOREACH_08_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
220 HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
221 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
223#define HWY_RVV_FOREACH_16_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
224 HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
225 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
227#define HWY_RVV_FOREACH_32_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
228 HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
229 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
231#define HWY_RVV_FOREACH_64_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
232 HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
233 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
236#define HWY_RVV_FOREACH_08_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
237 HWY_RVV_FOREACH_08_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
238 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
240#define HWY_RVV_FOREACH_16_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
241 HWY_RVV_FOREACH_16_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
242 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
244#define HWY_RVV_FOREACH_32_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
245 HWY_RVV_FOREACH_32_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
246 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
248#define HWY_RVV_FOREACH_64_LE2_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
249 HWY_RVV_FOREACH_64_LE2(X_MACRO, BASE, CHAR, NAME, OP) \
250 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
253#define HWY_RVV_FOREACH_08_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
254 HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
255 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
257#define HWY_RVV_FOREACH_16_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
258 HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
259 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
261#define HWY_RVV_FOREACH_32_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
262 HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
263 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
265#define HWY_RVV_FOREACH_64_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
266 HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
267 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
270#define HWY_RVV_FOREACH_08_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
271 HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
272 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
274#define HWY_RVV_FOREACH_16_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
275 HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
276 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
278#define HWY_RVV_FOREACH_32_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
279 HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
280 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
282#define HWY_RVV_FOREACH_64_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
283 HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
284 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
287#define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
288 HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, uint, u, NAME, OP)
289#define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
290 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, uint, u, NAME, OP)
291#define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
292 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, uint, u, NAME, OP)
293#define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS) \
294 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, uint, u, NAME, OP)
297#define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS) \
298 HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, int, i, NAME, OP)
299#define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
300 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, int, i, NAME, OP)
301#define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
302 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, int, i, NAME, OP)
303#define HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS) \
304 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, int, i, NAME, OP)
308#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS) \
309 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, float, f, NAME, OP)
311#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)
313#define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS) \
314 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, float, f, NAME, OP)
315#define HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS) \
316 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, float, f, NAME, OP)
319#define HWY_RVV_FOREACH_UI08(X_MACRO, NAME, OP, LMULS) \
320 HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
321 HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)
323#define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP, LMULS) \
324 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
325 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)
327#define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS) \
328 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
329 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)
331#define HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS) \
332 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS) \
333 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
335#define HWY_RVV_FOREACH_UI3264(X_MACRO, NAME, OP, LMULS) \
336 HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS) \
337 HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS)
339#define HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS) \
340 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
341 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
342 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
344#define HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS) \
345 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
346 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
347 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
349#define HWY_RVV_FOREACH_UI163264(X_MACRO, NAME, OP, LMULS) \
350 HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS) \
351 HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS)
353#define HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS) \
354 HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS) \
355 HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS)
358#define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
359 HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
360 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
361 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
362 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
364#define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS) \
365 HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS) \
366 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
367 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
368 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
370#define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS) \
371 HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS) \
372 HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS)
375#define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP, LMULS) \
376 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
377 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)
379#define HWY_RVV_FOREACH(X_MACRO, NAME, OP, LMULS) \
380 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
381 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS) \
382 HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS)
385#define HWY_RVV_T(BASE, SEW) BASE##SEW##_t
386#define HWY_RVV_D(BASE, SEW, N, SHIFT) Simd<HWY_RVV_T(BASE, SEW), N, SHIFT>
387#define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##LMUL##_t
388#define HWY_RVV_M(MLEN) vbool##MLEN##_t
394#define HWY_SPECIALIZE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
397 struct DFromV_t<HWY_RVV_V(BASE, SEW, LMUL)> { \
398 using Lane = HWY_RVV_T(BASE, SEW); \
399 using type = ScalableTag<Lane, SHIFT>; \
409#define HWY_RVV_LANES(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
411 template <size_t N> \
412 HWY_API size_t NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) { \
413 size_t actual = v##OP##SEW##LMUL(); \
416 if (detail::IsFull(d)) return actual; \
420 if (detail::ScaleByPower(128 / SEW, SHIFT) == 1) actual >>= 1; \
421 return HWY_MIN(actual, N); \
427template <
size_t N,
int kPow2>
436#define HWY_RVV_AVL(SEW, SHIFT) \
437 Lanes(ScalableTag<HWY_RVV_T(uint, SEW), SHIFT>())
440#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
441 SHIFT, MLEN, NAME, OP) \
442 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
443 return v##OP##_v_##CHAR##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
447#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
448 SHIFT, MLEN, NAME, OP) \
449 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
450 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
451 return v##OP##_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
455#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
456 SHIFT, MLEN, NAME, OP) \
457 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
458 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
459 return v##OP##_vv_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
463#define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP) \
464 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) m) { \
465 return vm##OP##_m_b##MLEN(m, ~0ull); \
472#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
474 template <size_t N> \
475 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
476 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_T(BASE, SEW) arg) { \
477 return v##OP##_##CHAR##SEW##LMUL(arg, Lanes(d)); \
486template <
size_t N,
int kPow2>
493using VFromD =
decltype(
Set(D(), TFromD<D>()));
510#define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
511 SHIFT, MLEN, NAME, OP) \
512 template <size_t N> \
513 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
514 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) ) { \
515 return v##OP##_##CHAR##SEW##LMUL(); \
519#undef HWY_RVV_UNDEFINED
532#define HWY_RVV_TRUNC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
534 HWY_API HWY_RVV_V(BASE, SEW, LMULH) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
535 return v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULH(v); \
541#define HWY_RVV_EXT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
543 template <size_t N> \
544 HWY_API HWY_RVV_V(BASE, SEW, LMULD) \
545 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) , \
546 HWY_RVV_V(BASE, SEW, LMUL) v) { \
547 return v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULD(v); \
554#define HWY_RVV_EXT_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
555 SHIFT, MLEN, NAME, OP) \
556 template <size_t N> \
557 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
558 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) , \
559 HWY_RVV_V(BASE, SEW, LMUL) v) { \
563#undef HWY_RVV_EXT_VIRT
569#define HWY_RVV_CAST_U8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
570 SHIFT, MLEN, NAME, OP) \
571 template <typename T, size_t N> \
572 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
573 vuint8##LMUL##_t v) { \
576 template <size_t N> \
577 HWY_API vuint8##LMUL##_t BitCastFromByte( \
578 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
583#define HWY_RVV_CAST_I8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
584 SHIFT, MLEN, NAME, OP) \
585 template <typename T, size_t N> \
586 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
587 vint8##LMUL##_t v) { \
588 return vreinterpret_v_i8##LMUL##_u8##LMUL(v); \
590 template <size_t N> \
591 HWY_API vint8##LMUL##_t BitCastFromByte( \
592 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
593 return vreinterpret_v_u8##LMUL##_i8##LMUL(v); \
598#define HWY_RVV_CAST_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
600 template <typename T, size_t N> \
601 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
602 HWY_RVV_V(BASE, SEW, LMUL) v) { \
603 return v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v); \
605 template <size_t N> \
606 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
607 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
608 return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v); \
612#define HWY_RVV_CAST_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
613 SHIFT, MLEN, NAME, OP) \
614 template <typename T, size_t N> \
615 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
616 HWY_RVV_V(BASE, SEW, LMUL) v) { \
617 return v##OP##_v_u##SEW##LMUL##_u8##LMUL( \
618 v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v)); \
620 template <size_t N> \
621 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
622 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
623 return v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL( \
624 v##OP##_v_u8##LMUL##_u##SEW##LMUL(v)); \
628#define HWY_RVV_CAST_VIRT_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
629 SHIFT, MLEN, NAME, OP) \
630 template <typename T, size_t N> \
631 HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> , \
632 HWY_RVV_V(BASE, SEW, LMUL) v) { \
633 return detail::Trunc(v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v)); \
635 template <size_t N> \
636 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
637 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMULH##_t v) { \
638 HWY_RVV_D(uint, 8, N, SHIFT + 1) d2; \
639 const vuint8##LMUL##_t v2 = detail::Ext(d2, v); \
640 return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v2); \
644#define HWY_RVV_CAST_VIRT_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
645 SHIFT, MLEN, NAME, OP) \
646 template <typename T, size_t N> \
647 HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> , \
648 HWY_RVV_V(BASE, SEW, LMUL) v) { \
649 return detail::Trunc(v##OP##_v_u##SEW##LMUL##_u8##LMUL( \
650 v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v))); \
652 template <size_t N> \
653 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
654 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMULH##_t v) { \
655 HWY_RVV_D(uint, 8, N, SHIFT + 1) d2; \
656 const vuint8##LMUL##_t v2 = detail::Ext(d2, v); \
657 return v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL( \
658 v##OP##_v_u8##LMUL##_u##SEW##LMUL(v2)); \
670#undef HWY_RVV_CAST_U8
671#undef HWY_RVV_CAST_I8
673#undef HWY_RVV_CAST_IF
674#undef HWY_RVV_CAST_VIRT_U
675#undef HWY_RVV_CAST_VIRT_IF
677template <
size_t N,
int kPow2>
685template <
class D,
class FromV>
692template <
class V,
class DU = RebindToUn
signed<DFromV<V>>>
703#define HWY_RVV_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
705 template <size_t N> \
706 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) { \
707 return v##OP##_##CHAR##SEW##LMUL(Lanes(d)); \
713template <
class D,
class DU = RebindToUn
signed<D>>
726template <
class V, HWY_IF_FLOAT_V(V)>
742template <
class V, HWY_IF_FLOAT_V(V)>
744 using DF = DFromV<V>;
745 using DU = RebindToUnsigned<DF>;
753template <
class V, HWY_IF_FLOAT_V(V)>
755 using DF = DFromV<V>;
756 using DU = RebindToUnsigned<DF>;
769template <
class V, HWY_IF_FLOAT_V(V)>
771 using DF = DFromV<V>;
772 using DU = RebindToUnsigned<DF>;
779 return And(
Not(not_a), b);
785 return Xor(x1,
Xor(x2, x3));
791 return Or(o1,
Or(o2, o3));
797 return Or(o,
And(a1, a2));
853#define HWY_RVV_SHIFT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
855 template <int kBits> \
856 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
857 return v##OP##_vx_##CHAR##SEW##LMUL(v, kBits, HWY_RVV_AVL(SEW, SHIFT)); \
859 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
860 NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) { \
861 return v##OP##_vx_##CHAR##SEW##LMUL(v, static_cast<uint8_t>(bits), \
862 HWY_RVV_AVL(SEW, SHIFT)); \
881 using VU16 =
VFromD<
decltype(du16)>;
883 const VU16 vFDB97531 = ShiftRight<8>(
BitCast(du16,
v));
884 const VU16 vECA86420 = detail::AndS(
BitCast(du16,
v), 0xFF);
885 const VU16 sFE_DC_BA_98_76_54_32_10 =
Add(vFDB97531, vECA86420);
887 const VU16 szz_FE_zz_BA_zz_76_zz_32 =
888 BitCast(du16, ShiftRight<16>(
BitCast(du32, sFE_DC_BA_98_76_54_32_10)));
889 const VU16 sxx_FC_xx_B8_xx_74_xx_30 =
890 Add(sFE_DC_BA_98_76_54_32_10, szz_FE_zz_BA_zz_76_zz_32);
891 const VU16 szz_zz_xx_FC_zz_zz_xx_74 =
892 BitCast(du16, ShiftRight<32>(
BitCast(du64, sxx_FC_xx_B8_xx_74_xx_30)));
893 const VU16 sxx_xx_xx_F8_xx_xx_xx_70 =
894 Add(sxx_FC_xx_B8_xx_74_xx_30, szz_zz_xx_FC_zz_zz_xx_74);
895 return detail::AndS(
BitCast(du64, sxx_xx_xx_F8_xx_xx_xx_70), 0xFFFFull);
899template <
int kBits,
class V>
901 constexpr size_t kSizeInBits =
sizeof(TFromV<V>) * 8;
902 static_assert(0 <= kBits && kBits < kSizeInBits,
"Invalid shift count");
903 if (kBits == 0)
return v;
904 return Or(ShiftRight<kBits>(
v), ShiftLeft<kSizeInBits - kBits>(
v));
908#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
909 SHIFT, MLEN, NAME, OP) \
910 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
911 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
912 return v##OP##_vv_##CHAR##SEW##LMUL(v, bits, HWY_RVV_AVL(SEW, SHIFT)); \
917#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
918 SHIFT, MLEN, NAME, OP) \
919 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
920 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
921 return v##OP##_vv_##CHAR##SEW##LMUL(v, detail::BitCastToUnsigned(bits), \
922 HWY_RVV_AVL(SEW, SHIFT)); \
932#undef HWY_RVV_SHIFT_II
933#undef HWY_RVV_SHIFT_VV
961#ifdef HWY_NATIVE_I64MULLO
962#undef HWY_NATIVE_I64MULLO
964#define HWY_NATIVE_I64MULLO
997#define HWY_RVV_FMA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
999 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1000 NAME(HWY_RVV_V(BASE, SEW, LMUL) mul, HWY_RVV_V(BASE, SEW, LMUL) x, \
1001 HWY_RVV_V(BASE, SEW, LMUL) add) { \
1002 return v##OP##_vv_##CHAR##SEW##LMUL(add, mul, x, HWY_RVV_AVL(SEW, SHIFT)); \
1025#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1026 SHIFT, MLEN, NAME, OP) \
1027 HWY_API HWY_RVV_M(MLEN) \
1028 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
1029 return v##OP##_vv_##CHAR##SEW##LMUL##_b##MLEN(a, b, \
1030 HWY_RVV_AVL(SEW, SHIFT)); \
1034#define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1035 SHIFT, MLEN, NAME, OP) \
1036 HWY_API HWY_RVV_M(MLEN) \
1037 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
1038 return v##OP##_##CHAR##SEW##LMUL##_b##MLEN(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
1073#undef HWY_RVV_RETM_ARGVV
1074#undef HWY_RVV_RETM_ARGVS
1079HWY_API auto Ge(
const V a,
const V b) ->
decltype(Le(a, b)) {
1084HWY_API auto Gt(
const V a,
const V b) ->
decltype(Lt(a, b)) {
1091 return detail::NeS(
And(a, bit), 0);
1101#define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP) \
1102 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) a, HWY_RVV_M(MLEN) b) { \
1103 return vm##OP##_mm_b##MLEN(b, a, HWY_RVV_AVL(SEW, SHIFT)); \
1120#undef HWY_RVV_RETM_ARGMM
1123#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1124 SHIFT, MLEN, NAME, OP) \
1125 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1126 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes, \
1127 HWY_RVV_V(BASE, SEW, LMUL) no) { \
1128 return v##OP##_vvm_##CHAR##SEW##LMUL(no, yes, m, HWY_RVV_AVL(SEW, SHIFT)); \
1133#undef HWY_RVV_IF_THEN_ELSE
1136template <
class M,
class V>
1143#define HWY_RVV_IF_THEN_ZERO_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
1144 LMULH, SHIFT, MLEN, NAME, OP) \
1145 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1146 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) no) { \
1147 return v##OP##_##CHAR##SEW##LMUL(no, 0, m, HWY_RVV_AVL(SEW, SHIFT)); \
1153#undef HWY_RVV_IF_THEN_ZERO_ELSE
1159 return detail::NeS(
v, 0);
1165template <
class D,
typename MFrom>
1175#define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1176 SHIFT, MLEN, NAME, OP) \
1177 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1178 NAME(HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_M(MLEN) m) { \
1179 return v##OP##_##CHAR##SEW##LMUL##_m(m, v0, v0, 1, \
1180 HWY_RVV_AVL(SEW, SHIFT)); \
1184#undef HWY_RVV_VEC_FROM_MASK
1187template <
class D, HWY_IF_NOT_FLOAT_D(D)>
1189 return detail::SubS(
Zero(
d), mask);
1192template <
class D, HWY_IF_FLOAT_D(D)>
1219 static_assert(IsSigned<TFromV<V>>(),
"Only works for signed/float");
1230#define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1231 template <class D> \
1232 HWY_API intptr_t FindFirstTrue(D d, HWY_RVV_M(MLEN) m) { \
1233 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1234 return vfirst_m_b##MLEN(m, Lanes(d)); \
1236 template <class D> \
1237 HWY_API size_t FindKnownFirstTrue(D d, HWY_RVV_M(MLEN) m) { \
1238 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1239 return static_cast<size_t>(vfirst_m_b##MLEN(m, Lanes(d))); \
1243#undef HWY_RVV_FIND_FIRST_TRUE
1253#define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1254 template <class D> \
1255 HWY_API bool AllTrue(D d, HWY_RVV_M(MLEN) m) { \
1256 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1257 return AllFalse(d, vmnot_m_b##MLEN(m, Lanes(d))); \
1261#undef HWY_RVV_ALL_TRUE
1265#define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1266 template <class D> \
1267 HWY_API size_t CountTrue(D d, HWY_RVV_M(MLEN) m) { \
1268 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1269 return vcpop_m_b##MLEN(m, Lanes(d)); \
1273#undef HWY_RVV_COUNT_TRUE
1279#define HWY_RVV_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1281 template <size_t N> \
1282 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1283 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1284 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1285 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, Lanes(d)); \
1291template <
size_t N,
int kPow2>
1298template <
size_t N,
int kPow2>
1315#define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1316 SHIFT, MLEN, NAME, OP) \
1317 template <size_t N> \
1318 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1319 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1320 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1321 return v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, Zero(d), p, Lanes(d)); \
1324#undef HWY_RVV_MASKED_LOAD
1328#define HWY_RVV_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1330 template <size_t N> \
1331 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
1332 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1333 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1334 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, Lanes(d)); \
1341#define HWY_RVV_BLENDED_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1342 SHIFT, MLEN, NAME, OP) \
1343 template <size_t N> \
1344 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) m, \
1345 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1346 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1347 return v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, p, v, Lanes(d)); \
1350#undef HWY_RVV_BLENDED_STORE
1354#define HWY_RVV_STOREN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1356 template <size_t N> \
1357 HWY_API void NAME(size_t count, HWY_RVV_V(BASE, SEW, LMUL) v, \
1358 HWY_RVV_D(BASE, SEW, N, SHIFT) , \
1359 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1360 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, count); \
1363#undef HWY_RVV_STOREN
1370template <
class V,
class D>
1376template <
class V,
class D,
typename T>
1383#define HWY_RVV_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1384 SHIFT, MLEN, NAME, OP) \
1385 template <size_t N> \
1386 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
1387 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1388 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
1389 HWY_RVV_V(int, SEW, LMUL) offset) { \
1390 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
1391 base, detail::BitCastToUnsigned(offset), v, Lanes(d)); \
1394#undef HWY_RVV_SCATTER
1398template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1404template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1406 const VFromD<RebindToSigned<D>> index) {
1412#define HWY_RVV_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1414 template <size_t N> \
1415 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1416 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1417 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
1418 HWY_RVV_V(int, SEW, LMUL) offset) { \
1419 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
1420 base, detail::BitCastToUnsigned(offset), Lanes(d)); \
1423#undef HWY_RVV_GATHER
1427template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1433template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1435 const VFromD<RebindToSigned<D>> index) {
1442#ifdef HWY_NATIVE_LOAD_STORE_INTERLEAVED
1443#undef HWY_NATIVE_LOAD_STORE_INTERLEAVED
1445#define HWY_NATIVE_LOAD_STORE_INTERLEAVED
1448#define HWY_RVV_LOAD2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1450 template <size_t N> \
1451 HWY_API void NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1452 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned, \
1453 HWY_RVV_V(BASE, SEW, LMUL) & v0, \
1454 HWY_RVV_V(BASE, SEW, LMUL) & v1) { \
1455 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(&v0, &v1, unaligned, Lanes(d)); \
1463#define HWY_RVV_LOAD3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1465 template <size_t N> \
1466 HWY_API void NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1467 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned, \
1468 HWY_RVV_V(BASE, SEW, LMUL) & v0, \
1469 HWY_RVV_V(BASE, SEW, LMUL) & v1, \
1470 HWY_RVV_V(BASE, SEW, LMUL) & v2) { \
1471 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(&v0, &v1, &v2, unaligned, Lanes(d)); \
1479#define HWY_RVV_LOAD4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1481 template <size_t N> \
1482 HWY_API void NAME( \
1483 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1484 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT aligned, \
1485 HWY_RVV_V(BASE, SEW, LMUL) & v0, HWY_RVV_V(BASE, SEW, LMUL) & v1, \
1486 HWY_RVV_V(BASE, SEW, LMUL) & v2, HWY_RVV_V(BASE, SEW, LMUL) & v3) { \
1487 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(&v0, &v1, &v2, &v3, aligned, \
1496#define HWY_RVV_STORE2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1498 template <size_t N> \
1499 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v0, \
1500 HWY_RVV_V(BASE, SEW, LMUL) v1, \
1501 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1502 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) { \
1503 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(unaligned, v0, v1, Lanes(d)); \
1507#undef HWY_RVV_STORE2
1511#define HWY_RVV_STORE3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1513 template <size_t N> \
1514 HWY_API void NAME( \
1515 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1516 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1517 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) { \
1518 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(unaligned, v0, v1, v2, Lanes(d)); \
1522#undef HWY_RVV_STORE3
1526#define HWY_RVV_STORE4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1528 template <size_t N> \
1529 HWY_API void NAME( \
1530 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1531 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_V(BASE, SEW, LMUL) v3, \
1532 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1533 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT aligned) { \
1534 v##OP##e##SEW##_v_##CHAR##SEW##LMUL(aligned, v0, v1, v2, v3, Lanes(d)); \
1538#undef HWY_RVV_STORE4
1545#define HWY_RVV_PROMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1546 SHIFT, MLEN, NAME, OP) \
1547 template <size_t N> \
1548 HWY_API HWY_RVV_V(BASE, SEWD, LMULD) NAME( \
1549 HWY_RVV_D(BASE, SEWD, N, SHIFT + 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1550 return OP##CHAR##SEWD##LMULD(v, Lanes(d)); \
1561#undef HWY_RVV_PROMOTE
1565#define HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, LMUL, LMUL_IN, \
1567 template <size_t N> \
1568 HWY_API HWY_RVV_V(BASE, BITS, LMUL) \
1569 PromoteTo(HWY_RVV_D(BASE, BITS, N, SHIFT + ADD) d, \
1570 HWY_RVV_V(BASE_IN, BITS_IN, LMUL_IN) v) { \
1571 return OP##CHAR##BITS##LMUL(v, Lanes(d)); \
1574#define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1575 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -2, 1) \
1576 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -1, 1) \
1577 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, m1, 0, 1) \
1578 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m2, 1, 1) \
1579 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m4, 2, 1)
1581#define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1582 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, mf2, mf8, -3, 2) \
1583 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf4, -2, 2) \
1584 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, mf2, -1, 2) \
1585 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m1, 0, 2) \
1586 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m2, 1, 2)
1594#undef HWY_RVV_PROMOTE_X4
1595#undef HWY_RVV_PROMOTE_X2
1596#undef HWY_RVV_PROMOTE
1599template <
size_t N,
int kPow2>
1606template <
size_t N,
int kPow2>
1613template <
size_t N,
int kPow2>
1620template <
size_t N,
int kPow2>
1625 const Rebind<uint16_t,
decltype(
d)> du16;
1632#define HWY_RVV_DEMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1634 template <size_t N> \
1635 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME( \
1636 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1637 return OP##CHAR##SEWH##LMULH(v, 0, Lanes(d)); \
1639 template <size_t N> \
1640 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME##Shr16( \
1641 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1642 return OP##CHAR##SEWH##LMULH(v, 16, Lanes(d)); \
1652#define HWY_RVV_DEMOTE_I_TO_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1653 SHIFT, MLEN, NAME, OP) \
1654 template <size_t N> \
1655 HWY_API HWY_RVV_V(uint, SEWH, LMULH) NAME( \
1656 HWY_RVV_D(uint, SEWH, N, SHIFT - 1) d, HWY_RVV_V(int, SEW, LMUL) v) { \
1658 return detail::DemoteTo(d, detail::BitCastToUnsigned(detail::MaxS(v, 0))); \
1662#undef HWY_RVV_DEMOTE_I_TO_U
1687 return vnclipu_wx_u8mf8(vnclipu_wx_u16mf4(
v, 0, avl), 0, avl);
1691 return vnclipu_wx_u8mf4(vnclipu_wx_u16mf2(
v, 0, avl), 0, avl);
1695 return vnclipu_wx_u8mf2(vnclipu_wx_u16m1(
v, 0, avl), 0, avl);
1699 return vnclipu_wx_u8m1(vnclipu_wx_u16m2(
v, 0, avl), 0, avl);
1703 return vnclipu_wx_u8m2(vnclipu_wx_u16m4(
v, 0, avl), 0, avl);
1711 const size_t avl =
Lanes(
d);
1712 const vuint64m1_t v1 = vand(
v, 0xFF, avl);
1713 const vuint32mf2_t v2 = vnclipu_wx_u32mf2(v1, 0, avl);
1714 const vuint16mf4_t v3 = vnclipu_wx_u16mf4(v2, 0, avl);
1715 return vnclipu_wx_u8mf8(v3, 0, avl);
1721 const size_t avl =
Lanes(
d);
1722 const vuint64m2_t v1 = vand(
v, 0xFF, avl);
1723 const vuint32m1_t v2 = vnclipu_wx_u32m1(v1, 0, avl);
1724 const vuint16mf2_t v3 = vnclipu_wx_u16mf2(v2, 0, avl);
1725 return vnclipu_wx_u8mf4(v3, 0, avl);
1731 const size_t avl =
Lanes(
d);
1732 const vuint64m4_t v1 = vand(
v, 0xFF, avl);
1733 const vuint32m2_t v2 = vnclipu_wx_u32m2(v1, 0, avl);
1734 const vuint16m1_t v3 = vnclipu_wx_u16m1(v2, 0, avl);
1735 return vnclipu_wx_u8mf2(v3, 0, avl);
1741 const size_t avl =
Lanes(
d);
1742 const vuint64m8_t v1 = vand(
v, 0xFF, avl);
1743 const vuint32m4_t v2 = vnclipu_wx_u32m4(v1, 0, avl);
1744 const vuint16m2_t v3 = vnclipu_wx_u16m2(v2, 0, avl);
1745 return vnclipu_wx_u8m1(v3, 0, avl);
1751 const size_t avl =
Lanes(
d);
1752 const vuint64m1_t v1 = vand(
v, 0xFFFF, avl);
1753 const vuint32mf2_t v2 = vnclipu_wx_u32mf2(v1, 0, avl);
1754 return vnclipu_wx_u16mf4(v2, 0, avl);
1760 const size_t avl =
Lanes(
d);
1761 const vuint64m2_t v1 = vand(
v, 0xFFFF, avl);
1762 const vuint32m1_t v2 = vnclipu_wx_u32m1(v1, 0, avl);
1763 return vnclipu_wx_u16mf2(v2, 0, avl);
1769 const size_t avl =
Lanes(
d);
1770 const vuint64m4_t v1 = vand(
v, 0xFFFF, avl);
1771 const vuint32m2_t v2 = vnclipu_wx_u32m2(v1, 0, avl);
1772 return vnclipu_wx_u16m1(v2, 0, avl);
1778 const size_t avl =
Lanes(
d);
1779 const vuint64m8_t v1 = vand(
v, 0xFFFF, avl);
1780 const vuint32m4_t v2 = vnclipu_wx_u32m4(v1, 0, avl);
1781 return vnclipu_wx_u16m2(v2, 0, avl);
1787 const size_t avl =
Lanes(
d);
1788 const vuint64m1_t v1 = vand(
v, 0xFFFFFFFFu, avl);
1789 return vnclipu_wx_u32mf2(v1, 0, avl);
1795 const size_t avl =
Lanes(
d);
1796 const vuint64m2_t v1 = vand(
v, 0xFFFFFFFFu, avl);
1797 return vnclipu_wx_u32m1(v1, 0, avl);
1803 const size_t avl =
Lanes(
d);
1804 const vuint64m4_t v1 = vand(
v, 0xFFFFFFFFu, avl);
1805 return vnclipu_wx_u32m2(v1, 0, avl);
1811 const size_t avl =
Lanes(
d);
1812 const vuint64m8_t v1 = vand(
v, 0xFFFFFFFFu, avl);
1813 return vnclipu_wx_u32m4(v1, 0, avl);
1819 const size_t avl =
Lanes(
d);
1820 const vuint32mf2_t v1 = vand(
v, 0xFF, avl);
1821 const vuint16mf4_t v2 = vnclipu_wx_u16mf4(v1, 0, avl);
1822 return vnclipu_wx_u8mf8(v2, 0, avl);
1828 const size_t avl =
Lanes(
d);
1829 const vuint32m1_t v1 = vand(
v, 0xFF, avl);
1830 const vuint16mf2_t v2 = vnclipu_wx_u16mf2(v1, 0, avl);
1831 return vnclipu_wx_u8mf4(v2, 0, avl);
1837 const size_t avl =
Lanes(
d);
1838 const vuint32m2_t v1 = vand(
v, 0xFF, avl);
1839 const vuint16m1_t v2 = vnclipu_wx_u16m1(v1, 0, avl);
1840 return vnclipu_wx_u8mf2(v2, 0, avl);
1846 const size_t avl =
Lanes(
d);
1847 const vuint32m4_t v1 = vand(
v, 0xFF, avl);
1848 const vuint16m2_t v2 = vnclipu_wx_u16m2(v1, 0, avl);
1849 return vnclipu_wx_u8m1(v2, 0, avl);
1855 const size_t avl =
Lanes(
d);
1856 const vuint32m8_t v1 = vand(
v, 0xFF, avl);
1857 const vuint16m4_t v2 = vnclipu_wx_u16m4(v1, 0, avl);
1858 return vnclipu_wx_u8m2(v2, 0, avl);
1864 const size_t avl =
Lanes(
d);
1865 const vuint32mf2_t v1 = vand(
v, 0xFFFF, avl);
1866 return vnclipu_wx_u16mf4(v1, 0, avl);
1872 const size_t avl =
Lanes(
d);
1873 const vuint32m1_t v1 = vand(
v, 0xFFFF, avl);
1874 return vnclipu_wx_u16mf2(v1, 0, avl);
1880 const size_t avl =
Lanes(
d);
1881 const vuint32m2_t v1 = vand(
v, 0xFFFF, avl);
1882 return vnclipu_wx_u16m1(v1, 0, avl);
1888 const size_t avl =
Lanes(
d);
1889 const vuint32m4_t v1 = vand(
v, 0xFFFF, avl);
1890 return vnclipu_wx_u16m2(v1, 0, avl);
1896 const size_t avl =
Lanes(
d);
1897 const vuint32m8_t v1 = vand(
v, 0xFFFF, avl);
1898 return vnclipu_wx_u16m4(v1, 0, avl);
1904 const size_t avl =
Lanes(
d);
1905 const vuint16mf4_t v1 = vand(
v, 0xFF, avl);
1906 return vnclipu_wx_u8mf8(v1, 0, avl);
1912 const size_t avl =
Lanes(
d);
1913 const vuint16mf2_t v1 = vand(
v, 0xFF, avl);
1914 return vnclipu_wx_u8mf4(v1, 0, avl);
1920 const size_t avl =
Lanes(
d);
1921 const vuint16m1_t v1 = vand(
v, 0xFF, avl);
1922 return vnclipu_wx_u8mf2(v1, 0, avl);
1928 const size_t avl =
Lanes(
d);
1929 const vuint16m2_t v1 = vand(
v, 0xFF, avl);
1930 return vnclipu_wx_u8m1(v1, 0, avl);
1936 const size_t avl =
Lanes(
d);
1937 const vuint16m4_t v1 = vand(
v, 0xFF, avl);
1938 return vnclipu_wx_u8m2(v1, 0, avl);
1944 const size_t avl =
Lanes(
d);
1945 const vuint16m8_t v1 = vand(
v, 0xFF, avl);
1946 return vnclipu_wx_u8m4(v1, 0, avl);
1975#undef HWY_RVV_DEMOTE
1980#define HWY_RVV_DEMOTE_F(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1981 SHIFT, MLEN, NAME, OP) \
1982 template <size_t N> \
1983 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME( \
1984 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1985 return OP##SEWH##LMULH(v, Lanes(d)); \
1994#undef HWY_RVV_DEMOTE_F
1999 return vfncvt_rtz_x_f_w_i32mf2(
v,
Lanes(
d));
2003 return vfncvt_rtz_x_f_w_i32mf2(
v,
Lanes(
d));
2007 return vfncvt_rtz_x_f_w_i32m1(
v,
Lanes(
d));
2011 return vfncvt_rtz_x_f_w_i32m2(
v,
Lanes(
d));
2015 return vfncvt_rtz_x_f_w_i32m4(
v,
Lanes(
d));
2018template <
size_t N,
int kPow2>
2022 const Rebind<uint32_t,
decltype(
d)> du32;
2023 return detail::DemoteToShr16(du16,
BitCast(du32,
v));
2028#define HWY_RVV_CONVERT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2029 SHIFT, MLEN, NAME, OP) \
2030 template <size_t N> \
2031 HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo( \
2032 HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(int, SEW, LMUL) v) { \
2033 return vfcvt_f_x_v_f##SEW##LMUL(v, Lanes(d)); \
2035 template <size_t N> \
2036 HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo( \
2037 HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(uint, SEW, LMUL) v) {\
2038 return vfcvt_f_xu_v_f##SEW##LMUL(v, Lanes(d)); \
2041 template <size_t N> \
2042 HWY_API HWY_RVV_V(int, SEW, LMUL) ConvertTo(HWY_RVV_D(int, SEW, N, SHIFT) d, \
2043 HWY_RVV_V(BASE, SEW, LMUL) v) { \
2044 return vfcvt_rtz_x_f_v_i##SEW##LMUL(v, Lanes(d)); \
2048#undef HWY_RVV_CONVERT
2051#define HWY_RVV_NEAREST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2052 SHIFT, MLEN, NAME, OP) \
2053 HWY_API HWY_RVV_V(int, SEW, LMUL) NearestInt(HWY_RVV_V(BASE, SEW, LMUL) v) { \
2054 return vfcvt_x_f_v_i##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
2057#undef HWY_RVV_NEAREST
2065template <
typename T,
size_t N,
int kPow2>
2067 size_t lpb = 16 /
sizeof(T);
2072 if (kPow2 >= 0)
return lpb;
2077template <
class D,
class V>
2083template <
size_t kLanes,
class D>
2087 using TU =
TFromD<
decltype(du)>;
2089 return LtS(
BitCast(di, idx_mod),
static_cast<TFromD<decltype(di)
>>(kLanes));
2093#define HWY_RVV_SLIDE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
2095 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2096 NAME(HWY_RVV_V(BASE, SEW, LMUL) dst, HWY_RVV_V(BASE, SEW, LMUL) src, \
2098 return v##OP##_vx_##CHAR##SEW##LMUL(dst, src, lanes, \
2099 HWY_RVV_AVL(SEW, SHIFT)); \
2110template <
class D,
class V>
2116template <
class D,
class V>
2118 return detail::SlideUp(lo, hi,
Lanes(
d) / 2);
2122template <
class D,
class V>
2125 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
d) / 2);
2130template <
class D,
class V>
2133 const auto hi_up = detail::SlideUp(hi, hi,
Lanes(
d) / 2);
2134 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
d) / 2);
2139template <
class D2,
class V>
2141 return detail::SlideUp(detail::Ext(d2, lo), detail::Ext(d2, hi),
2147template <
class D2,
class V>
2166template <
class DH, hwy::EnableIf<detail::IsSupportedLMUL(DH())>* =
nullptr>
2176template <
class DH,
class V,
2197#define HWY_RVV_SLIDE1(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
2199 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
2200 return v##OP##_##CHAR##SEW##LMUL(v, 0, HWY_RVV_AVL(SEW, SHIFT)); \
2207#undef HWY_RVV_SLIDE1
2212#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2213 SHIFT, MLEN, NAME, OP) \
2214 HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
2215 return v##OP##_s_##CHAR##SEW##LMUL##_##CHAR##SEW(v); \
2220#undef HWY_RVV_GET_LANE
2225 return GetLane(detail::SlideDown(
v,
v, i));
2230template <
class V, HWY_IF_NOT_LANE_SIZE_V(V, 1)>
2234 using TU =
TFromD<
decltype(du)>;
2235 const auto is_i = detail::EqS(
detail::Iota0(du),
static_cast<TU
>(i));
2244template <
class V, HWY_IF_LANE_SIZE_V(V, 1)>
2247 const auto zero =
Zero(
d);
2248 const auto one =
Set(
d, 1);
2249 const auto ge_i = Eq(detail::SlideUp(zero, one, i), one);
2250 const auto is_i = detail::SetOnlyFirst(ge_i);
2258 const auto is_even = detail::EqS(detail::AndS(
detail::Iota0(du), 1), 0);
2265 const V up = detail::Slide1Up(
v);
2272 const V down = detail::Slide1Down(
v);
2279 const RebindToUnsigned<DFromV<V>> du;
2280 constexpr size_t kShift =
CeilLog2(16 /
sizeof(TFromV<V>));
2281 const auto idx_block = ShiftRight<kShift>(
detail::Iota0(du));
2282 const auto is_even = detail::EqS(detail::AndS(idx_block, 1), 0);
2292 const V down = detail::SlideDown(
v,
v, lpb);
2293 const V up = detail::SlideUp(
v,
v, lpb);
2299template <
class D,
class VI>
2301 static_assert(
sizeof(TFromD<D>) ==
sizeof(TFromV<VI>),
"Index != lane");
2303 const auto indices =
BitCast(du, vec);
2304#if HWY_IS_DEBUG_BUILD
2310template <
class D,
typename TI>
2312 static_assert(
sizeof(TFromD<D>) ==
sizeof(TI),
"Index size must match lane");
2318#define HWY_RVV_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
2320 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2321 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) { \
2322 return v##OP##_vv_##CHAR##SEW##LMUL(v, idx, HWY_RVV_AVL(SEW, SHIFT)); \
2329template <
class D,
class V>
2333 const auto idx = detail::AddS(
Add(iota, iota), 1);
2336 return detail::SlideUp(lo_odd, hi_odd,
Lanes(
d) / 2);
2340template <
class D,
class V>
2344 const auto idx =
Add(iota, iota);
2347 return detail::SlideUp(lo_even, hi_even,
Lanes(
d) / 2);
2354 using TU =
TFromD<
decltype(du)>;
2355 const size_t N =
Lanes(du);
2373 const Twice<
decltype(
d)> d2;
2374 const Twice<
decltype(d2)> d4;
2376 const auto vx = detail::Ext(d4, detail::Ext(d2,
v));
2383template <
class D, HWY_IF_LANE_SIZE_D(D, 4), HWY_RVV_IF_POW2_IN(D, 0, 3)>
2392 const Twice<
decltype(
d)> d2;
2393 const Twice<
decltype(d2)> d4;
2395 const auto vx = detail::Ext(d4, detail::Ext(d2,
v));
2400template <
class D,
class V = VFromD<D>, HWY_IF_LANE_SIZE_D(D, 8)>
2402 const V up = detail::Slide1Up(
v);
2403 const V down = detail::Slide1Down(
v);
2411 const RebindToUnsigned<D> du;
2420 const RebindToUnsigned<D> du;
2426template <
class D,
class V = VFromD<D>>
2428 const Repartition<uint64_t, D> du64;
2429 const size_t N =
Lanes(du64);
2431 detail::ReverseSubS(
detail::Iota0(du64),
static_cast<uint64_t
>(
N - 1));
2433 const auto idx = detail::XorS(rev, 1);
2440#ifdef HWY_NATIVE_COMPRESS8
2441#undef HWY_NATIVE_COMPRESS8
2443#define HWY_NATIVE_COMPRESS8
2446template <
typename T>
2447struct CompressIsPartition {
2451#define HWY_RVV_COMPRESS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2452 SHIFT, MLEN, NAME, OP) \
2453 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2454 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) mask) { \
2455 return v##OP##_vm_##CHAR##SEW##LMUL(v, v, mask, HWY_RVV_AVL(SEW, SHIFT)); \
2459#undef HWY_RVV_COMPRESS
2462template <
class V,
class M>
2468template <
class V,
class M>
2474template <
class V,
class M,
class D>
2482template <
class V,
class M,
class D>
2486 detail::StoreN(count,
Compress(
v, mask),
d, unaligned);
2493template <
size_t kBytes,
class D,
class V = VFromD<D>>
2496 const auto hi8 =
BitCast(d8, hi);
2497 const auto lo8 =
BitCast(d8, lo);
2498 const auto hi_up = detail::SlideUp(hi8, hi8, 16 - kBytes);
2499 const auto lo_down = detail::SlideDown(lo8, lo8, kBytes);
2505template <
size_t kLanes,
class D,
class V = VFromD<D>>
2507 constexpr size_t kLanesUp = 16 /
sizeof(
TFromV<V>) - kLanes;
2508 const auto hi_up = detail::SlideUp(hi, hi, kLanesUp);
2509 const auto lo_down = detail::SlideDown(lo, lo, kLanes);
2510 const auto is_lo = detail::FirstNPerBlock<kLanesUp>(
d);
2518 static_assert(
sizeof(
TFromD<
decltype(
d)>) == 4,
"Defined for 32-bit types");
2521 return BitCast(
d,
Or(ShiftRight<32>(v64), ShiftLeft<32>(v64)));
2528 static_assert(
sizeof(
TFromD<
decltype(
d)>) == 4,
"Defined for 32-bit types");
2529 return CombineShiftRightLanes<3>(
d,
v,
v);
2536 static_assert(
sizeof(
TFromD<
decltype(
d)>) == 4,
"Defined for 32-bit types");
2537 return CombineShiftRightLanes<1>(
d,
v,
v);
2544 static_assert(
sizeof(
TFromD<
decltype(
d)>) == 4,
"Defined for 32-bit types");
2545 return CombineShiftRightLanes<2>(
d,
v,
v);
2552 static_assert(
sizeof(
TFromD<
decltype(
d)>) == 8,
"Defined for 64-bit types");
2553 return CombineShiftRightLanes<1>(
d,
v,
v);
2567template <
typename T,
size_t N,
int kPow2>
2570 const Simd<T,
N, kPow2 - 1> dh;
2571 const Simd<T,
N, kPow2 - 2> dhh;
2572 return Ext(
d, Ext(dh, Ext(dhh,
v)));
2574template <
typename T,
size_t N,
int kPow2>
2577 const Simd<T,
N, kPow2 - 1> dh;
2578 return Ext(
d, Ext(dh,
v));
2580template <
typename T,
size_t N,
int kPow2>
2586template <
typename T,
size_t N,
int kPow2>
2592template <
typename T,
size_t N,
int kPow2>
2597template <
typename T,
size_t N,
int kPow2>
2602template <
typename T,
size_t N,
int kPow2>
2610template <
class VT,
class VI>
2620 constexpr int kPow2T =
Pow2(dt8);
2621 constexpr int kPow2I =
Pow2(di8);
2628 if (kPow2T < kPow2I) {
2629 offsets = detail::AndS(offsets,
static_cast<uint8_t
>(
Lanes(dt8) - 1));
2635template <
class VT,
class VI>
2639 const auto idx8 =
BitCast(di8, idx);
2645template <
int kLane,
class V>
2651 idx = detail::AddS(idx, kLane);
2658template <
size_t kLanes,
class D,
class V = VFromD<D>>
2661 using TI =
TFromD<
decltype(di)>;
2662 const auto shifted = detail::SlideUp(
v,
v, kLanes);
2664 const auto idx_mod =
2667 const auto clear = detail::LtS(idx_mod,
static_cast<TI
>(kLanes));
2671template <
size_t kLanes,
class V>
2673 return ShiftLeftLanes<kLanes>(DFromV<V>(),
v);
2678template <
int kBytes,
class D>
2684template <
int kBytes,
class V>
2686 return ShiftLeftBytes<kBytes>(DFromV<V>(),
v);
2690template <
size_t kLanes,
typename T,
size_t N,
int kPow2,
2691 class V = VFromD<Simd<T, N, kPow2>>>
2694 using TI =
TFromD<
decltype(di)>;
2696 if (
N <= 16 /
sizeof(T)) {
2700 const auto shifted = detail::SlideDown(
v,
v, kLanes);
2703 const auto idx_mod =
2705 const auto keep = detail::LtS(idx_mod,
static_cast<TI
>(lpb - kLanes));
2710template <
int kBytes,
class D,
class V = VFromD<D>>
2718template <
class D,
class V>
2720 static_assert(IsSame<TFromD<D>, TFromV<V>>(),
"D/V mismatch");
2722 using TU =
TFromD<
decltype(du)>;
2724 const auto idx_mod = ShiftRight<1>(
2727 const auto is_even = detail::EqS(detail::AndS(i, 1), 0u);
2739template <
class D,
class V>
2741 static_assert(IsSame<TFromD<D>,
TFromV<V>>(),
"D/V mismatch");
2743 using TU =
TFromD<
decltype(du)>;
2746 const auto idx_mod = ShiftRight<1>(detail::AndS(i,
static_cast<TU
>(lpb - 1)));
2748 const auto idx = detail::AddS(idx_lower,
static_cast<TU
>(lpb / 2));
2749 const auto is_even = detail::EqS(detail::AndS(i, 1), 0u);
2756template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
2758 const RepartitionToNarrow<DW> dn;
2759 static_assert(
IsSame<
TFromD<
decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
2763template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
2769template <
class DW,
class V>
2771 const RepartitionToNarrow<DW> dn;
2772 static_assert(
IsSame<
TFromD<
decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
2779#define HWY_RVV_REDUCE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
2781 template <class D> \
2782 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2783 NAME(D d, HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, m1) v0) { \
2784 return Set(d, GetLane(v##OP##_vs_##CHAR##SEW##LMUL##_##CHAR##SEW##m1( \
2785 v0, v, v0, Lanes(d)))); \
2798 return detail::RedSum(
d,
v, v0);
2812 const auto neutral =
Set(d1, HighestValue<T>());
2813 return detail::RedMin(
d,
v, neutral);
2827 const auto neutral =
Set(d1, LowestValue<T>());
2828 return detail::RedMax(
d,
v, neutral);
2831#undef HWY_RVV_REDUCE
2838template <
typename V,
class D = DFromV<V>, HWY_IF_LANE_SIZE_D(D, 1),
2839 hwy::EnableIf<Pow2(D()) < 1 || MaxLanes(D()) < 16>* =
nullptr>
2840HWY_API V PopulationCount(V v) {
2842 v = Sub(v, detail::AndS(ShiftRight<1>(v), 0x55));
2843 v = Add(detail::AndS(ShiftRight<2>(v), 0x33), detail::AndS(v, 0x33));
2844 return detail::AndS(Add(v, ShiftRight<4>(v)), 0x0F);
2851 const VFromD<D> loaded =
Load(
d, p);
2856 const VFromD<RebindToUnsigned<D>> idx = detail::AndS(
detail::Iota0(
d), mask);
2874#define HWY_RVV_LOAD_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP) \
2875 HWY_INLINE HWY_RVV_M(MLEN) \
2876 NAME(hwy::SizeTag<MLEN> , const uint8_t* bits, size_t N) { \
2877 return OP##_v_b##MLEN(bits, N); \
2880#undef HWY_RVV_LOAD_MASK_BITS
2883template <
class D,
class MT = detail::MaskTag<D>>
2890#define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP) \
2891 template <class D> \
2892 HWY_API size_t NAME(D d, HWY_RVV_M(MLEN) m, uint8_t* bits) { \
2893 const size_t N = Lanes(d); \
2894 OP##_v_b##MLEN(bits, m, N); \
2897 constexpr bool kLessThan8 = \
2898 detail::ScaleByPower(16 / sizeof(TFromD<D>), Pow2(d)) < 8; \
2899 if (MaxLanes(d) < 8 || (kLessThan8 && N < 8)) { \
2900 const int mask = (1 << N) - 1; \
2901 bits[0] = static_cast<uint8_t>(bits[0] & mask); \
2903 return (N + 7) / 8; \
2906#undef HWY_RVV_STORE_MASK_BITS
2924template <
class D, HWY_IF_NOT_LANE_SIZE_D(D, 1)>
2926 const RebindToSigned<D> di;
2927 using TI =
TFromD<
decltype(di)>;
2932template <
class D, HWY_IF_LANE_SIZE_D(D, 1)>
2934 const auto zero =
Zero(
d);
2935 const auto one =
Set(
d, 1);
2936 return Eq(detail::SlideUp(one, zero, n), one);
2941template <
class V, HWY_IF_SIGNED_V(V)>
2943 return detail::ReverseSubS(
v, 0);
2947#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2948 SHIFT, MLEN, NAME, OP) \
2949 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
2950 return v##OP##_vv_##CHAR##SEW##LMUL(v, v, HWY_RVV_AVL(SEW, SHIFT)); \
2957template <
class V, HWY_IF_SIGNED_V(V)>
2964#undef HWY_RVV_RETV_ARGV2
2980enum RoundingModes { kNear, kTrunc, kDown, kUp };
2994 const auto int_f =
ConvertTo(df, integer);
3006 const auto int_f =
ConvertTo(df, integer);
3014 asm volatile(
"fsrm %0" ::
"r"(detail::kUp));
3015 const auto ret =
Round(
v);
3016 asm volatile(
"fsrm %0" ::
"r"(detail::kNear));
3023 asm volatile(
"fsrm %0" ::
"r"(detail::kDown));
3024 const auto ret =
Round(
v);
3025 asm volatile(
"fsrm %0" ::
"r"(detail::kNear));
3039template <
class V,
class D = DFromV<V>>
3046 return RebindMask(
d, detail::EqS(
Add(vi, vi), hwy::MaxExponentTimes2<T>()));
3050template <
class V,
class D = DFromV<V>>
3060 const VFromD<
decltype(di)> exp =
3062 return RebindMask(
d, detail::LtS(exp, hwy::MaxExponentField<T>()));
3067template <
class D, HWY_IF_UNSIGNED_D(D)>
3072template <
class D, HWY_IF_SIGNED_D(D)>
3074 const RebindToUnsigned<D> du;
3078template <
class D, HWY_IF_FLOAT_D(D)>
3080 const RebindToUnsigned<D> du;
3081 const RebindToSigned<D> di;
3087template <
class V, HWY_IF_LANE_SIZE_V(V, 4),
class D = DFromV<V>,
3088 class DW = RepartitionToW
ide<D>>
3090 const auto lo =
Mul(a, b);
3096template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
3098 const auto lo =
Mul(a, b);
3100 return OddEven(detail::Slide1Up(hi), lo);
3103template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
3105 const auto lo =
Mul(a, b);
3107 return OddEven(hi, detail::Slide1Down(lo));
3112template <
size_t N,
int kPow2>
3119 const VFromD<
decltype(du32)> b_in_even = ShiftRight<16>(
BitCast(du32, b));
3124template <
size_t N,
int kPow2, hwy::EnableIf<(kPow2 < 3)>* =
nullptr,
3125 class D32 = RepartitionToW
ide<Simd<
int16_t, N, kPow2>>>
3126HWY_API VFromD<Simd<
int16_t, N, kPow2>> ReorderDemote2To(
3127 Simd<
int16_t, N, kPow2> d16, VFromD<D32> a, VFromD<D32> b) {
3128 const Twice<D32> d32t;
3129 const VFromD<decltype(d32t)> ab = Combine(d32t, a, b);
3130 return DemoteTo(d16, ab);
3134template <
size_t N,
class V32 = VFromD<RepartitionToW
ide<Simd<
int16_t, N, 3>>>>
3137 const Half<
decltype(d16)> d16h;
3140 return Combine(d16, a16, b16);
3149 size_t N,
int kPow2,
class DF32 = Simd<float, N, kPow2>,
3150 class VF32 = VFromD<DF32>,
3151 class DU16 = RepartitionToNarrow<RebindToUnsigned<Simd<float, N, kPow2>>>>
3154 const VF32 sum0, VF32& sum1) {
3156 using VU32 =
VFromD<
decltype(du32)>;
3157 const VU32 odd =
Set(du32, 0xFFFF0000u);
3160 const VU32 ae = ShiftLeft<16>(
BitCast(du32, a));
3162 const VU32 be = ShiftLeft<16>(
BitCast(du32, b));
3168#define HWY_RVV_WIDEN_MACC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3169 SHIFT, MLEN, NAME, OP) \
3170 template <size_t N> \
3171 HWY_API HWY_RVV_V(BASE, SEWD, LMULD) NAME( \
3172 HWY_RVV_D(BASE, SEWD, N, SHIFT + 1) d, HWY_RVV_V(BASE, SEWD, LMULD) sum, \
3173 HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
3174 return OP##CHAR##SEWD##LMULD(sum, a, b, Lanes(d)); \
3178#undef HWY_RVV_WIDEN_MACC
3181template <
size_t N,
int kPow2, hwy::EnableIf<(kPow2 < 3)>* =
nullptr,
3182 class D32 = Simd<
int32_t, N, kPow2>,
class V32 = VFromD<D32>,
3183 class D16 = RepartitionToNarrow<D32>>
3184HWY_API VFromD<D32> ReorderW
idenMulAccumulateI16(Simd<
int32_t, N, kPow2> d32,
3185 VFromD<D16> a, VFromD<D16> b,
3186 const V32 sum0, V32& sum1) {
3187 const Twice<decltype(d32)> d32t;
3188 using V32T = VFromD<decltype(d32t)>;
3189 V32T sum = Combine(d32t, sum1, sum0);
3190 sum = detail::W
idenMulAcc(d32t, sum, a, b);
3191 sum1 = UpperHalf(d32, sum);
3192 return LowerHalf(d32, sum);
3196template <
size_t N,
class D32 = Simd<
int32_t, N, 3>,
class V32 = VFromD<D32>,
3197 class D16 = RepartitionToNarrow<D32>>
3200 const V32 sum0, V32& sum1) {
3202 using V16H =
VFromD<
decltype(d16h)>;
3207 sum1 = detail::WidenMulAcc(d32, sum1, a1, b1);
3208 return detail::WidenMulAcc(d32, sum0, a0, b0);
3213template <
size_t N,
int kPow2,
class VN,
class VW>
3215 const VW sum0, VW& sum1) {
3219template <
size_t N,
int kPow2,
class VN,
class VW>
3221 const VW sum0, VW& sum1) {
3227template <
class VW, HWY_IF_SIGNED_V(VW)>
3235 const Twice<
decltype(di32)> di32x2;
3238 const auto combined =
BitCast(di64x2,
Combine(di32x2, sum1, sum0));
3240 const auto even = ShiftRight<32>(ShiftLeft<32>(combined));
3241 const auto odd = ShiftRight<32>(combined);
3248 const Half<
decltype(
d)> dh;
3249 const vint32m4_t lo =
3251 const vint32m4_t hi =
3256template <
class VW, HWY_IF_FLOAT_V(VW)>
3258 return Add(sum0, sum1);
3264 static_assert(!IsSigned<TFromD<D>>() &&
sizeof(
TFromD<D>) == 8,
3282 const VFromD<D> ltLx = detail::Slide1Up(ltHL);
3291 static_assert(!IsSigned<TFromD<D>>() &&
sizeof(
TFromD<D>) == 8,
3301 static_assert(!IsSigned<TFromD<D>>() &&
sizeof(
TFromD<D>) == 8,
3311 static_assert(!IsSigned<TFromD<D>>() &&
sizeof(
TFromD<D>) == 8,
3321 static_assert(!IsSigned<TFromD<D>>() &&
sizeof(
TFromD<D>) == 8,
3331 static_assert(!IsSigned<TFromD<D>>() &&
sizeof(
TFromD<D>) == 8,
3342 const VFromD<D> aXH = detail::Slide1Down(a);
3343 const VFromD<D> bXH = detail::Slide1Down(b);
3344 const VFromD<D> minHL =
Min(a, b);
3345 const MFromD<D> ltXH = Lt(aXH, bXH);
3346 const MFromD<D> eqXH = Eq(aXH, bXH);
3356 const VFromD<D> aXH = detail::Slide1Down(a);
3357 const VFromD<D> bXH = detail::Slide1Down(b);
3358 const VFromD<D> maxHL =
Max(a, b);
3359 const MFromD<D> ltXH = Lt(aXH, bXH);
3360 const MFromD<D> eqXH = Eq(aXH, bXH);
3382#undef HWY_RVV_FOREACH
3383#undef HWY_RVV_FOREACH_08_ALL
3384#undef HWY_RVV_FOREACH_08_ALL_VIRT
3385#undef HWY_RVV_FOREACH_08_DEMOTE
3386#undef HWY_RVV_FOREACH_08_DEMOTE_VIRT
3387#undef HWY_RVV_FOREACH_08_EXT
3388#undef HWY_RVV_FOREACH_08_EXT_VIRT
3389#undef HWY_RVV_FOREACH_08_TRUNC
3390#undef HWY_RVV_FOREACH_08_VIRT
3391#undef HWY_RVV_FOREACH_16_ALL
3392#undef HWY_RVV_FOREACH_16_ALL_VIRT
3393#undef HWY_RVV_FOREACH_16_DEMOTE
3394#undef HWY_RVV_FOREACH_16_DEMOTE_VIRT
3395#undef HWY_RVV_FOREACH_16_EXT
3396#undef HWY_RVV_FOREACH_16_EXT_VIRT
3397#undef HWY_RVV_FOREACH_16_TRUNC
3398#undef HWY_RVV_FOREACH_16_VIRT
3399#undef HWY_RVV_FOREACH_32_ALL
3400#undef HWY_RVV_FOREACH_32_ALL_VIRT
3401#undef HWY_RVV_FOREACH_32_DEMOTE
3402#undef HWY_RVV_FOREACH_32_DEMOTE_VIRT
3403#undef HWY_RVV_FOREACH_32_EXT
3404#undef HWY_RVV_FOREACH_32_EXT_VIRT
3405#undef HWY_RVV_FOREACH_32_TRUNC
3406#undef HWY_RVV_FOREACH_32_VIRT
3407#undef HWY_RVV_FOREACH_64_ALL
3408#undef HWY_RVV_FOREACH_64_ALL_VIRT
3409#undef HWY_RVV_FOREACH_64_DEMOTE
3410#undef HWY_RVV_FOREACH_64_DEMOTE_VIRT
3411#undef HWY_RVV_FOREACH_64_EXT
3412#undef HWY_RVV_FOREACH_64_EXT_VIRT
3413#undef HWY_RVV_FOREACH_64_TRUNC
3414#undef HWY_RVV_FOREACH_64_VIRT
3415#undef HWY_RVV_FOREACH_B
3416#undef HWY_RVV_FOREACH_F
3417#undef HWY_RVV_FOREACH_F16
3418#undef HWY_RVV_FOREACH_F32
3419#undef HWY_RVV_FOREACH_F3264
3420#undef HWY_RVV_FOREACH_F64
3421#undef HWY_RVV_FOREACH_I
3422#undef HWY_RVV_FOREACH_I08
3423#undef HWY_RVV_FOREACH_I16
3424#undef HWY_RVV_FOREACH_I163264
3425#undef HWY_RVV_FOREACH_I32
3426#undef HWY_RVV_FOREACH_I64
3427#undef HWY_RVV_FOREACH_U
3428#undef HWY_RVV_FOREACH_U08
3429#undef HWY_RVV_FOREACH_U16
3430#undef HWY_RVV_FOREACH_U163264
3431#undef HWY_RVV_FOREACH_U32
3432#undef HWY_RVV_FOREACH_U64
3433#undef HWY_RVV_FOREACH_UI
3434#undef HWY_RVV_FOREACH_UI08
3435#undef HWY_RVV_FOREACH_UI16
3436#undef HWY_RVV_FOREACH_UI163264
3437#undef HWY_RVV_FOREACH_UI32
3438#undef HWY_RVV_FOREACH_UI3264
3439#undef HWY_RVV_FOREACH_UI64
3441#undef HWY_RVV_RETM_ARGM
3442#undef HWY_RVV_RETV_ARGV
3443#undef HWY_RVV_RETV_ARGVS
3444#undef HWY_RVV_RETV_ARGVV
#define HWY_MAX(a, b)
Definition: base.h:135
#define HWY_RESTRICT
Definition: base.h:64
#define HWY_API
Definition: base.h:129
#define HWY_MIN(a, b)
Definition: base.h:134
#define HWY_INLINE
Definition: base.h:70
#define HWY_DASSERT(condition)
Definition: base.h:238
HWY_INLINE Vec128< T, N > Abs(SignedTag, Vec128< T, N > a)
Definition: emu128-inl.h:633
HWY_INLINE VFromD< DU > BitCastToUnsigned(V v)
Definition: rvv-inl.h:693
HWY_API VF32 ReorderWidenMulAccumulateBF16(Simd< float, N, kPow2 > df32, VFromD< DU16 > a, VFromD< DU16 > b, const VF32 sum0, VF32 &sum1)
Definition: rvv-inl.h:3152
HWY_INLINE Mask128< T, N > MaskFromVec(hwy::SizeTag< 1 >, const Vec128< T, N > v)
Definition: x86_128-inl.h:1570
HWY_INLINE V OffsetsOf128BitBlocks(const D d, const V iota0)
Definition: rvv-inl.h:2078
HWY_INLINE Vec128< T, N > Add(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition: emu128-inl.h:535
constexpr size_t LanesPerBlock(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:2069
HWY_INLINE Mask128< T, N > And(hwy::SizeTag< 1 >, const Mask128< T, N > a, const Mask128< T, N > b)
Definition: x86_128-inl.h:815
HWY_API Vec128< uint16_t, N > Shl(hwy::UnsignedTag, Vec128< uint16_t, N > v, Vec128< uint16_t, N > bits)
Definition: x86_128-inl.h:5009
HWY_INLINE Vec128< uint8_t, N > BitCastFromByte(Simd< uint8_t, N, 0 >, Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:888
HWY_INLINE Vec128< T, N > Sub(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition: emu128-inl.h:545
HWY_INLINE Mask128< float, N > UseInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3418
HWY_INLINE Vec128< uint8_t, N > BitCastToByte(Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:861
HWY_INLINE auto ChangeLMUL(Simd< T, N, kPow2 > d, VFromD< Simd< T, N, kPow2 - 3 > > v) -> VFromD< decltype(d)>
Definition: rvv-inl.h:2568
constexpr size_t ScaleByPower(size_t N, int pow2)
Definition: ops/shared-inl.h:123
constexpr bool IsSupportedLMUL(D d)
Definition: rvv-inl.h:2159
constexpr bool IsFull(Simd< T, N, kPow2 >)
Definition: ops/shared-inl.h:115
HWY_INLINE MFromD< D > FirstNPerBlock(D)
Definition: rvv-inl.h:2084
HWY_INLINE VFromD< DU > Iota0(const D)
Definition: rvv-inl.h:714
HWY_INLINE Mask128< T, N > LoadMaskBits(Simd< T, N, 0 > d, uint64_t mask_bits)
Definition: arm_neon-inl.h:5364
HWY_API VFromD< D32 > ReorderWidenMulAccumulateI16(Simd< int32_t, N, kPow2 > d32, VFromD< D16 > a, VFromD< D16 > b, const V32 sum0, V32 &sum1)
Definition: rvv-inl.h:3184
HWY_INLINE Vec128< T, N > Mul(hwy::FloatTag, Vec128< T, N > a, const Vec128< T, N > b)
Definition: emu128-inl.h:748
d
Definition: rvv-inl.h:1998
HWY_API Vec128< T, N > AverageRound(Vec128< T, N > a, const Vec128< T, N > b)
Definition: emu128-inl.h:619
HWY_API Vec128< T, N > CopySign(const Vec128< T, N > magn, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:2190
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition: arm_neon-inl.h:4697
decltype(FirstN(D(), 0)) MFromD
Definition: arm_sve-inl.h:276
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:2230
HWY_API Vec128< T, N > DupOdd(Vec128< T, N > v)
Definition: arm_neon-inl.h:4662
HWY_API VFromD< DW > ZipLower(V a, V b)
Definition: arm_neon-inl.h:4272
HWY_INLINE Mask128< T, N > Ne128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6685
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:5716
HWY_API void LoadInterleaved2(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1)
Definition: arm_neon-inl.h:6349
HWY_API Vec128< T > Shuffle1032(const Vec128< T > v)
Definition: arm_neon-inl.h:4131
HWY_API void StoreInterleaved4(const Vec128< T, N > v0, const Vec128< T, N > v1, const Vec128< T, N > v2, const Vec128< T, N > v3, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6584
HWY_API Vec128< int16_t > MulHigh(const Vec128< int16_t > a, const Vec128< int16_t > b)
Definition: arm_neon-inl.h:1684
HWY_API Vec128< T > Shuffle2103(const Vec128< T > v)
Definition: arm_neon-inl.h:4147
HWY_API Vec128< float, N > Round(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3436
HWY_API Vec128< T, N > ZeroExtendVector(Simd< T, N, 0 > d, Vec128< T, N/2 > lo)
Definition: arm_neon-inl.h:4448
HWY_API Mask128< T, N > IsNaN(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3506
HWY_API intptr_t FindFirstTrue(const Simd< T, N, 0 > d, const Mask128< T, N > mask)
Definition: arm_neon-inl.h:5691
HWY_API V128 CombineShiftRightBytes(Full128< T > d, V128 hi, V128 lo)
Definition: arm_neon-inl.h:3592
HWY_API Vec128< T, N > ShiftLeftLanes(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3695
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2456
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:5701
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1799
HWY_API void Stream(const Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2955
HWY_API Vec128< T, N > Xor3(Vec128< T, N > x1, Vec128< T, N > x2, Vec128< T, N > x3)
Definition: arm_neon-inl.h:2025
Repartition< MakeWide< TFromD< D > >, D > RepartitionToWide
Definition: ops/shared-inl.h:221
HWY_INLINE Mask128< T, N > Eq128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6668
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1949
HWY_API Vec128< T, N > SumOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:5334
HWY_API Vec128< T, N > BroadcastSignBit(const Vec128< T, N > v)
Definition: arm_neon-inl.h:2207
HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t MaxLanes(D)
Definition: ops/shared-inl.h:295
HWY_API Vec128< To, 1 > TruncateTo(Simd< To, 1, 0 >, const Vec128< From, 1 > v)
Definition: arm_neon-inl.h:4806
HWY_RVV_FOREACH_F64(HWY_RVV_DEMOTE_F, DemoteTo, vfncvt_rod_f_f_w_f, _DEMOTE_VIRT) template< size_t N > HWY_API vint32mf2_t DemoteTo(Simd< int32_t
HWY_API Vec128< uint64_t, N > Min(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:2517
HWY_API Vec128< uint64_t, N > Max(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:2555
HWY_API Mask128< T, N > MaskFromVec(const Vec128< T, N > v)
Definition: arm_neon-inl.h:2217
HWY_API Vec128< T, N > ConcatUpperUpper(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4517
HWY_INLINE Mask128< T, N > Ne128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6677
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition: ops/shared-inl.h:212
HWY_API Vec128< T, N > SaturatedAdd(Vec128< T, N > a, const Vec128< T, N > b)
Definition: emu128-inl.h:597
HWY_API Vec128< T, N > GatherIndex(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:5037
HWY_INLINE Vec128< uint64_t > MulOdd(Vec128< uint64_t > a, Vec128< uint64_t > b)
Definition: arm_neon-inl.h:4912
HWY_INLINE Mask128< T, N > Eq128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6660
N ConcatEven(Simd< T, N, 0 >, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4617
HWY_API Vec128< T > Shuffle0321(const Vec128< T > v)
Definition: arm_neon-inl.h:4141
HWY_API Vec128< T > Not(const Vec128< T > v)
Definition: arm_neon-inl.h:1931
HWY_API Mask128< T, N > IsInf(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3511
HWY_API Vec128< T, N > ConcatLowerUpper(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4544
HWY_API Vec128< T, N/2 > LowerHalf(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3540
HWY_API Vec128< uint64_t > InterleaveLower(const Vec128< uint64_t > a, const Vec128< uint64_t > b)
Definition: arm_neon-inl.h:4181
HWY_API Vec128< int64_t > MulEven(Vec128< int32_t > a, Vec128< int32_t > b)
Definition: arm_neon-inl.h:4872
HWY_API Vec128< bfloat16_t, 2 *N > ReorderDemote2To(Simd< bfloat16_t, 2 *N, 0 > dbf16, Vec128< float, N > a, Vec128< float, N > b)
Definition: arm_neon-inl.h:4719
HWY_API Vec128< T, 1 > CompressNot(Vec128< T, 1 > v, Mask128< T, 1 >)
Definition: arm_neon-inl.h:6198
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N, 0 > d, const T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2758
typename D::Twice Twice
Definition: ops/shared-inl.h:231
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: ops/shared-inl.h:210
HWY_API Vec128< uint64_t > CompressBlocksNot(Vec128< uint64_t > v, Mask128< uint64_t >)
Definition: arm_neon-inl.h:6226
HWY_API Vec128< float, N > ReorderWidenMulAccumulate(Simd< float, N, 0 > df32, Vec128< bfloat16_t, 2 *N > a, Vec128< bfloat16_t, 2 *N > b, const Vec128< float, N > sum0, Vec128< float, N > &sum1)
Definition: arm_neon-inl.h:4288
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:2047
HWY_API void BlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2941
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:5671
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:2223
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition: arm_neon-inl.h:4646
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition: arm_neon-inl.h:2253
HWY_API Mask128< uint64_t, N > TestBit(Vec128< uint64_t, N > v, Vec128< uint64_t, N > bit)
Definition: arm_neon-inl.h:2477
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:243
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2753
HWY_API Vec128< int64_t > Neg(const Vec128< int64_t > v)
Definition: arm_neon-inl.h:1413
HWY_API Vec128< TI > TableLookupBytes(const Vec128< T > bytes, const Vec128< TI > from)
Definition: arm_neon-inl.h:4922
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition: emu128-inl.h:303
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition: arm_neon-inl.h:4019
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1998
HWY_API Vec128< float, N > Floor(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3467
Simd< typename V::PrivateT, V::kPrivateN, 0 > DFromV
Definition: arm_neon-inl.h:842
HWY_API Vec128< float, N > MulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1853
HWY_API Vec128< T, N > CopySignToAbs(const Vec128< T, N > abs, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:2198
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2772
HWY_INLINE VFromD< D > Min128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6705
constexpr size_t MLenFromD(Simd< T, N, kPow2 >)
Definition: rvv-inl.h:43
N ConcatOdd(Simd< T, N, 0 >, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4586
HWY_API Vec128< float, N > Ceil(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3453
HWY_API Indices128< T, N > IndicesFromVec(Simd< T, N, 0 > d, Vec128< TI, N > vec)
Definition: arm_neon-inl.h:3973
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition: arm_neon-inl.h:4704
HWY_API Vec128< T, N > ShiftLeftBytes(Simd< T, N, 0 >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3684
HWY_INLINE VFromD< D > Min128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6695
HWY_API Vec128< T, N > Reverse2(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4061
HWY_API Vec64< uint32_t > Shuffle2301(const Vec64< uint32_t > v)
Definition: arm_neon-inl.h:2326
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:322
HWY_API Vec128< uint8_t > Combine(Full128< uint8_t >, Vec64< uint8_t > hi, Vec64< uint8_t > lo)
Definition: arm_neon-inl.h:4352
HWY_API Vec128< T, N > Reverse8(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4113
HWY_API Vec128< T, N > MaxOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:5342
Vec128< T, N > Iota(const Simd< T, N, 0 > d, const T2 first)
Definition: arm_neon-inl.h:1049
HWY_API Mask128< T, N > ExclusiveNeither(const Mask128< T, N > a, Mask128< T, N > b)
Definition: arm_neon-inl.h:2314
typename V::PrivateT TFromV
Definition: arm_neon-inl.h:845
HWY_INLINE Vec128< T, N > CompressBits(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits)
Definition: arm_neon-inl.h:6234
HWY_API Mask128< T, N > LoadMaskBits(Simd< T, N, 0 > d, const uint8_t *HWY_RESTRICT bits)
Definition: arm_neon-inl.h:5407
HWY_API Vec128< T, N > ZeroIfNegative(Vec128< T, N > v)
Definition: arm_neon-inl.h:2277
HWY_API Vec128< T > Shuffle01(const Vec128< T > v)
Definition: arm_neon-inl.h:4135
HWY_INLINE VFromD< D > Max128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6710
HWY_INLINE Mask128< T, N > Lt128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6623
HWY_API Vec64< uint16_t > DemoteTo(Full64< uint16_t >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:3145
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2591
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition: arm_neon-inl.h:2040
HWY_API Vec128< T, N > IfNegativeThenElse(Vec128< T, N > v, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:2266
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4570
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition: ops/shared-inl.h:173
HWY_API Vec128< T, N > BitCast(Simd< T, N, 0 > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition: arm_neon-inl.h:997
HWY_API bool AllFalse(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:5710
HWY_API Vec64< uint8_t > UpperHalf(Full64< uint8_t >, const Vec128< uint8_t > v)
Definition: arm_neon-inl.h:3739
HWY_API T ExtractLane(const Vec128< T, 1 > v, size_t i)
Definition: arm_neon-inl.h:1085
HWY_API svbool_t Gt(const V a, const V b)
Definition: arm_sve-inl.h:881
HWY_API void ScatterOffset(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:4984
HWY_API Vec128< T, N > Undefined(Simd< T, N, 0 >)
Definition: arm_neon-inl.h:1040
HWY_API VFromD< DW > ZipUpper(DW dw, V a, V b)
Definition: arm_neon-inl.h:4281
HWY_API Vec128< T, N > ShiftRight(Vec128< T, N > v)
Definition: emu128-inl.h:386
HWY_API Vec128< T, N > ConcatLowerLower(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4456
typename D::template Rebind< T > Rebind
Definition: ops/shared-inl.h:207
HWY_API Vec128< float, N > RearrangeToOddPlusEven(const Vec128< float, N > sum0, const Vec128< float, N > sum1)
Definition: arm_neon-inl.h:4412
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:1020
HWY_API V InterleaveUpper(Simd< T, N, 0 >, V a, V b)
Definition: arm_neon-inl.h:4256
HWY_API Vec128< T, N > GatherOffset(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:5020
HWY_API void LoadInterleaved3(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1, Vec128< T, N > &v2)
Definition: arm_neon-inl.h:6387
HWY_API Vec128< T, N > IfThenZeroElse(const Mask128< T, N > mask, const Vec128< T, N > no)
Definition: arm_neon-inl.h:2260
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1986
HWY_INLINE HWY_MAYBE_UNUSED constexpr int Pow2(D)
Definition: ops/shared-inl.h:271
HWY_INLINE VFromD< D > Max128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6700
HWY_API Vec128< int32_t, N > NearestInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3497
HWY_API Vec128< float > ApproximateReciprocal(const Vec128< float > v)
Definition: arm_neon-inl.h:1734
HWY_API Vec32< uint8_t > U8FromU32(const Vec128< uint32_t > v)
Definition: arm_neon-inl.h:3287
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition: arm_neon-inl.h:4013
HWY_API TFromV< V > GetLane(const V v)
Definition: arm_neon-inl.h:1076
HWY_API void ScatterIndex(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:5002
HWY_API Vec128< float, N > NegMulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1832
HWY_API Vec128< uint16_t > PromoteTo(Full128< uint16_t >, const Vec64< uint8_t > v)
Definition: arm_neon-inl.h:2965
HWY_API Vec128< T, N > Or3(Vec128< T, N > o1, Vec128< T, N > o2, Vec128< T, N > o3)
Definition: arm_neon-inl.h:2033
decltype(Zero(D())) VFromD
Definition: arm_neon-inl.h:1030
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition: arm_neon-inl.h:2765
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:4678
HWY_API Vec128< int16_t > MulFixedPoint15(Vec128< int16_t > a, Vec128< int16_t > b)
Definition: arm_neon-inl.h:1720
HWY_API Vec128< T > Shuffle0123(const Vec128< T > v)
Definition: arm_neon-inl.h:4153
HWY_API Vec128< float, N > Trunc(const Vec128< float, N > v)
Definition: arm_neon-inl.h:3425
typename D::Half Half
Definition: ops/shared-inl.h:227
HWY_API Vec128< T, N > MinOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:5338
HWY_API Vec128< T, N > ShiftRightBytes(Simd< T, N, 0 >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3707
HWY_API size_t CompressStore(Vec128< T, N > v, const Mask128< T, N > mask, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6248
typename D::template Repartition< T > Repartition
Definition: ops/shared-inl.h:218
HWY_API Vec128< int8_t > Abs(const Vec128< int8_t > v)
Definition: arm_neon-inl.h:2146
HWY_API Vec128< float > ConvertTo(Full128< float >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:3327
N
Definition: rvv-inl.h:1998
HWY_API Vec128< float, N > Sqrt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:1913
HWY_API size_t CompressBitsStore(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6273
HWY_API Vec128< uint32_t, N > RotateRight(const Vec128< uint32_t, N > v)
Definition: arm_neon-inl.h:1444
HWY_API Mask128< T, N > IsFinite(const Vec128< T, N > v)
Definition: arm_neon-inl.h:3521
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition: arm_neon-inl.h:1964
HWY_API Vec128< uint64_t > SumsOf8(const Vec128< uint8_t > v)
Definition: arm_neon-inl.h:1361
HWY_API Vec128< float > ApproximateReciprocalSqrt(const Vec128< float > v)
Definition: arm_neon-inl.h:1885
HWY_API void LoadInterleaved4(Simd< T, N, 0 >, const T *HWY_RESTRICT unaligned, Vec128< T, N > &v0, Vec128< T, N > &v1, Vec128< T, N > &v2, Vec128< T, N > &v3)
Definition: arm_neon-inl.h:6428
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition: arm_neon-inl.h:4712
HWY_API size_t CompressBlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6257
HWY_API Vec128< T, N > Reverse4(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4090
HWY_API void StoreInterleaved2(const Vec128< T, N > v0, const Vec128< T, N > v1, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6517
HWY_API Vec128< T, 1 > Reverse(Simd< T, 1, 0 >, const Vec128< T, 1 > v)
Definition: arm_neon-inl.h:4030
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2934
HWY_API Vec128< T, 1 > InsertLane(const Vec128< T, 1 > v, size_t i, T t)
Definition: arm_neon-inl.h:1225
HWY_INLINE Mask128< T, N > Lt128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6651
HWY_API Vec128< T, N > SaturatedSub(Vec128< T, N > a, const Vec128< T, N > b)
Definition: emu128-inl.h:608
HWY_API Vec128< T, N > ShiftLeft(Vec128< T, N > v)
Definition: emu128-inl.h:376
HWY_API Vec128< uint16_t > Broadcast(const Vec128< uint16_t > v)
Definition: arm_neon-inl.h:3885
const vfloat64m1_t v
Definition: rvv-inl.h:1998
HWY_API V Trunc(const V v)
Definition: rvv-inl.h:3001
HWY_API Vec128< float > AbsDiff(const Vec128< float > a, const Vec128< float > b)
Definition: arm_neon-inl.h:1773
HWY_API Vec128< T, N > ShiftRightLanes(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3713
HWY_API V CombineShiftRightLanes(const D d, const V hi, V lo)
Definition: rvv-inl.h:2506
HWY_API void StoreInterleaved3(const Vec128< T, N > v0, const Vec128< T, N > v1, const Vec128< T, N > v2, Simd< T, N, 0 >, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:6549
typename D::T TFromD
Definition: ops/shared-inl.h:203
HWY_API VI TableLookupBytesOr0(const V bytes, const VI from)
Definition: arm_neon-inl.h:4977
HWY_API Vec128< T, 1 > Compress(Vec128< T, 1 > v, Mask128< T, 1 >)
Definition: arm_neon-inl.h:6174
HWY_API svbool_t Ge(const V a, const V b)
Definition: arm_sve-inl.h:885
HWY_API Vec128< float, N > NegMulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1861
Definition: aligned_allocator.h:27
constexpr T MantissaEnd()
Definition: base.h:753
HWY_API constexpr bool IsSame()
Definition: base.h:396
typename EnableIfT< Condition >::type EnableIf
Definition: base.h:383
constexpr size_t CeilLog2(TI x)
Definition: base.h:899
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:593
#define HWY_IF_LANE_SIZE_D(D, bytes)
Definition: ops/shared-inl.h:250
#define HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:344
#define HWY_RVV_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1412
#define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1574
#define HWY_RVV_CAST_U8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:569
#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1025
#define HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:339
#define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:510
#define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1315
#define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:299
#define HWY_RVV_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1383
#define HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:353
#define HWY_RVV_LOAD2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1448
#define HWY_RVV_CAST_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:612
#define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:293
#define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:370
#define HWY_RVV_BLENDED_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1341
#define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1581
#define HWY_RVV_REDUCE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2779
#define HWY_RVV_CAST_VIRT_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:628
#define HWY_RVV_SLIDE1(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2197
#define HWY_RVV_NEAREST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2051
#define HWY_RVV_STOREN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1354
#define HWY_RVV_EXT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:541
#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:917
#define HWY_RVV_FMA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:997
#define HWY_RVV_DEMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1632
#define HWY_RVV_LANES(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:409
#define HWY_RVV_PROMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1565
#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:908
#define HWY_RVV_STORE2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1496
#define HWY_RVV_SHIFT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:853
#define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1175
#define HWY_RVV_IF_POW2_IN(D, min, max)
Definition: rvv-inl.h:39
#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1123
#define HWY_RVV_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2318
#define HWY_RVV_LOAD3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1463
#define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP)
Definition: rvv-inl.h:59
#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:447
#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:311
#define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:301
#define HWY_RVV_STORE4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1526
#define HWY_RVV_FOREACH_UI163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:349
#define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:463
#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:440
#define HWY_RVV_STORE3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1511
#define HWY_SPECIALIZE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:394
#define HWY_RVV_LOAD_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1253
#define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:287
#define HWY_RVV_SLIDE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2093
#define HWY_RVV_COMPRESS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2451
#define HWY_RVV_FOREACH(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:379
#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:472
#define HWY_RVV_DEMOTE_F(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1980
#define HWY_RVV_TRUNC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:532
#define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:313
#define HWY_RVV_CAST_I8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:583
#define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1101
#define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:364
#define HWY_RVV_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1279
#define HWY_RVV_DEMOTE_I_TO_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1652
#define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:291
#define HWY_RVV_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:703
#define HWY_RVV_CAST_VIRT_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:644
#define HWY_RVV_FOREACH_UI3264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:335
#define HWY_RVV_IF_THEN_ZERO_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1143
#define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_LOAD4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1479
#define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1230
#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_EXT_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:554
#define HWY_RVV_CAST_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:598
#define HWY_RVV_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1328
#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2212
#define HWY_RVV_WIDEN_MACC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:3168
#define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1034
#define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:358
#define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:375
#define HWY_RVV_CONVERT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2028
#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:455
#define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:289
#define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1265
#define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:297
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
@ value
Definition: arm_neon-inl.h:5730
Definition: ops/shared-inl.h:52
uint16_t bits
Definition: base.h:297