17#if defined(HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
20#undef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
22#define HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
51template <
class D,
class Func,
typename T = TFromD<D>>
54 using TU =
TFromD<
decltype(du)>;
58 Vec<
decltype(du)> vidx =
Iota(du, 0);
59 for (; idx +
N <= count; idx +=
N) {
61 vidx =
Add(vidx,
Set(du,
static_cast<TU
>(
N)));
67#if HWY_MEM_OPS_MIGHT_FAULT
71 for (; idx < count; ++idx) {
72 StoreU(func(d1,
Set(du1,
static_cast<TU
>(idx))), d1, out + idx);
75 const size_t remaining = count - idx;
84template <
class D,
class Func,
typename T = TFromD<D>>
89 for (; idx +
N <= count; idx +=
N) {
97#if HWY_MEM_OPS_MIGHT_FAULT
100 for (; idx < count; ++idx) {
101 using V1 =
Vec<
decltype(d1)>;
102 const V1
v =
LoadU(d1, inout + idx);
103 StoreU(func(d1,
v), d1, inout + idx);
106 const size_t remaining = count - idx;
116template <
class D,
class Func,
typename T = TFromD<D>>
122 for (; idx +
N <= count; idx +=
N) {
131#if HWY_MEM_OPS_MIGHT_FAULT
134 for (; idx < count; ++idx) {
135 using V1 =
Vec<
decltype(d1)>;
136 const V1
v =
LoadU(d1, inout + idx);
137 const V1 v1 =
LoadU(d1, in1 + idx);
138 StoreU(func(d1,
v, v1), d1, inout + idx);
141 const size_t remaining = count - idx;
152template <
class D,
class Func,
typename T = TFromD<D>>
159 for (; idx +
N <= count; idx +=
N) {
163 StoreU(func(
d,
v, v1, v2),
d, inout + idx);
169#if HWY_MEM_OPS_MIGHT_FAULT
172 for (; idx < count; ++idx) {
173 using V1 =
Vec<
decltype(d1)>;
174 const V1
v =
LoadU(d1, inout + idx);
175 const V1 v1 =
LoadU(d1, in1 + idx);
176 const V1 v2 =
LoadU(d1, in2 + idx);
177 StoreU(func(d1,
v, v1, v2), d1, inout + idx);
180 const size_t remaining = count - idx;
190template <
class D,
typename T = TFromD<D>>
197 for (; idx +
N <= count; idx +=
N) {
205#if HWY_MEM_OPS_MIGHT_FAULT
208 const Vec<
decltype(d1)> old_v1 =
Set(d1, old_t);
209 const Vec<
decltype(d1)> new_v1 =
Set(d1, new_t);
210 for (; idx < count; ++idx) {
211 using V1 =
Vec<
decltype(d1)>;
212 const V1 v1 =
LoadU(d1, inout + idx);
216 const size_t remaining = count - idx;
224template <
class D,
class Func,
typename T = TFromD<D>>
231 for (; idx +
N <= count; idx +=
N) {
239#if HWY_MEM_OPS_MIGHT_FAULT
242 const Vec<
decltype(d1)> new_v1 =
Set(d1, new_t);
243 for (; idx < count; ++idx) {
244 using V1 =
Vec<
decltype(d1)>;
245 const V1
v =
LoadU(d1, inout + idx);
249 const size_t remaining = count - idx;
#define HWY_RESTRICT
Definition: base.h:64
#define HWY_DASSERT(condition)
Definition: base.h:238
#define HWY_UNLIKELY(expr)
Definition: base.h:76
HWY_INLINE Vec128< T, N > Add(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition: emu128-inl.h:535
d
Definition: rvv-inl.h:1998
void Generate(D d, T *HWY_RESTRICT out, size_t count, const Func &func)
Definition: transform-inl.h:52
void ReplaceIf(D d, T *HWY_RESTRICT inout, size_t count, T new_t, const Func &func)
Definition: transform-inl.h:225
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2456
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition: ops/shared-inl.h:212
typename detail::CappedTagChecker< T, kLimit >::type CappedTag
Definition: ops/shared-inl.h:184
void Transform2(D d, T *HWY_RESTRICT inout, size_t count, const T *HWY_RESTRICT in1, const T *HWY_RESTRICT in2, const Func &func)
Definition: transform-inl.h:153
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N, 0 > d, const T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2758
HWY_API void BlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2941
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:243
HWY_API Vec128< T, N > IfThenElse(const Mask128< T, N > mask, const Vec128< T, N > yes, const Vec128< T, N > no)
Definition: emu128-inl.h:303
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2772
void Replace(D d, T *HWY_RESTRICT inout, size_t count, T new_t, T old_t)
Definition: transform-inl.h:191
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:322
Vec128< T, N > Iota(const Simd< T, N, 0 > d, const T2 first)
Definition: arm_neon-inl.h:1049
void Transform(D d, T *HWY_RESTRICT inout, size_t count, const Func &func)
Definition: transform-inl.h:85
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2591
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:46
N
Definition: rvv-inl.h:1998
void Transform1(D d, T *HWY_RESTRICT inout, size_t count, const T *HWY_RESTRICT in1, const Func &func)
Definition: transform-inl.h:117
const vfloat64m1_t v
Definition: rvv-inl.h:1998
typename D::T TFromD
Definition: ops/shared-inl.h:203
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:40
Definition: aligned_allocator.h:27
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:105
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82