36 #ifndef C7524_DSPLIB_INLINES_H
37 #define C7524_DSPLIB_INLINES_H 1
47 #include <c7x_scalable.h>
50 template <
typename X,
typename Y>
inline typename c7x::make_full_vector<X>::type convert_long_to_int(Y vec);
52 template <>
inline c7x::make_full_vector<int32_t>::type convert_long_to_int<int8, long4>(long4 vec)
54 typedef typename c7x::make_full_vector<int32_t>::type vecRet;
55 vecRet v8bits = __as_int8(vec);
59 template <>
inline c7x::make_full_vector<uint32_t>::type convert_long_to_int<uint8, ulong4>(ulong4 vec)
61 typedef typename c7x::make_full_vector<uint32_t>::type vecRet;
62 vecRet v8bits = __as_uint8(vec);
67 template <
typename X,
typename Y>
68 inline typename c7x::make_full_vector<X>::type convert_char_to_short(Y vecIn,
bool typeIndex);
71 inline c7x::make_full_vector<int16_t>::type convert_char_to_short<short16, char16>(char16 vecIn,
bool typeIndex)
73 typedef typename c7x::make_full_vector<int16_t>::type vecRet;
76 uchar16 vecInConv = __as_uchar16(vecIn);
77 vecOut = __convert_short16(vecInConv);
80 vecOut = __convert_short16(vecIn);
86 inline c7x::make_full_vector<uint16_t>::type convert_char_to_short<ushort16, uchar16>(uchar16 vecIn,
bool typeIndex)
88 typedef typename c7x::make_full_vector<uint16_t>::type vecRet;
89 vecRet vecOut = __convert_ushort16(vecIn);
94 template <
typename X,
typename Y>
inline void mul_char_to_short(Y vecIn1, Y vecIn2, X vecOut1, X vecOut2);
97 inline void mul_char_to_short<short16 &, char32>(c7x::char_vec vecIn1,
99 c7x::short_vec &vecOut1Short,
100 c7x::short_vec &vecOut2Short)
102 short16 vecInShort1 = __as_short16(vecIn1);
103 short16 vecInShort2 = __as_short16(vecIn2);
104 vecInShort2 = __shift_right(vecInShort2, c7x::short_vec(8));
105 short16 vecInShortOdd = __shift_right(vecInShort1, c7x::short_vec(8));
106 short16 vecInShortEven = __shift_right(__shift_left(vecInShort1, c7x::short_vec(8)), c7x::short_vec(8));
108 vecOut1Short = vecInShortEven * vecInShort2;
109 vecOut2Short = vecInShortOdd * vecInShort2;
113 inline void mul_char_to_short<ushort16 &, uchar32>(c7x::uchar_vec vecIn1,
114 c7x::uchar_vec vecIn2,
115 c7x::ushort_vec &vecOut1uShort,
116 c7x::ushort_vec &vecOut2uShort)
118 __vmpyubh_vvw(vecIn1, vecIn2, vecOut1uShort, vecOut2uShort);
122 template <
typename V,
typename W>
123 inline void c7x_horizontal_max_with_index(V maxValVec, V vIdx, W *maxVal,
int *maxIdx);
126 inline void c7x_horizontal_max_with_index(c7x::float_vec maxValVec, c7x::float_vec vIdx,
float *maxVal,
int *maxIdx)
129 vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
130 float4 maxValVec1 = (float4) __select(vpMask, maxValVec.odd(), maxValVec.even());
131 float4 vIdx1 = (float4) __select(vpMask, vIdx.odd(), vIdx.even());
133 vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
134 float2 maxValVec2 = (float2) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
135 float2 vIdx2 = (float2) __select(vpMask, vIdx1.odd(), vIdx1.even());
137 vpMask = __cmp_lt_pred(maxValVec2.even(), maxValVec2.odd());
138 *maxVal = (float) __select(vpMask, maxValVec2.odd(), maxValVec2.even());
139 *maxIdx = ((int) __select(vpMask, vIdx2.odd(), vIdx2.even()));
143 inline void c7x_horizontal_max_with_index(c7x::double_vec maxValVec, c7x::double_vec vIdx,
double *maxVal,
int *maxIdx)
146 vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
147 double2 maxValVec1 = (double2) __select(vpMask, maxValVec.odd(), maxValVec.even());
148 double2 vIdx1 = (double2) __select(vpMask, vIdx.odd(), vIdx.even());
150 vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
151 *maxVal = (double) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
152 *maxIdx = (int) (__select(vpMask, vIdx1.odd(), vIdx1.even()));
156 inline void c7x_horizontal_max_with_index(c7x::char_vec maxValVec, c7x::char_vec vIdx, int8_t *maxVal,
int *maxIdx)
158 typedef typename c7x::make_full_vector<int8_t>::type vec;
161 char16 maxValVec1 = maxValVec.even();
162 maxValVec1 = __max(maxValVec.odd(), maxValVec1);
164 char8 maxValVec2 = maxValVec1.even();
165 maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
167 char4 maxValVec3 = maxValVec2.even();
168 maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
170 char2 maxValVec4 = maxValVec3.even();
171 maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
173 int8_t maxValVec5 = maxValVec4.even();
174 maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
175 *maxVal = (int8_t) maxValVec5;
177 int8_t maxValScaler = (int8_t) maxValVec5;
179 char32 zero_vec = vec(0);
180 char32 diff_vector = vec(maxValScaler) - maxValVec;
181 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
182 char32 masked_indices = __select(vpMask, vIdx, vec(-1));
183 uchar32 umasked_indices = __as_uchar32(masked_indices);
185 uchar16 vIdx1 = (uchar16) __min(umasked_indices.even(), umasked_indices.odd());
186 uchar8 vIdx2 = (uchar8) __min(vIdx1.even(), vIdx1.odd());
187 uchar4 vIdx3 = (uchar4) __min(vIdx2.even(), vIdx2.odd());
188 uchar2 vIdx4 = (uchar2) __min(vIdx3.even(), vIdx3.odd());
189 *maxIdx = (uint8_t) __min(vIdx4.even(), vIdx4.odd());
193 inline void c7x_horizontal_max_with_index(c7x::short_vec maxValVec, c7x::short_vec vIdx,
short *maxVal,
int *maxIdx)
196 typedef typename c7x::make_full_vector<int16_t>::type vec;
198 vec sortIn = __sort_desc(maxValVec);
199 *maxVal = (short) sortIn.s[0];
200 short maxValScaler = (
short) sortIn.s[0];
202 short16 zero_vec = vec(0);
203 short16 diff_vector = vec(maxValScaler) - maxValVec;
204 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
205 short16 masked_indices = __select(vpMask, vIdx, vec(255));
207 short8 vIdx1 = (short8) __min(masked_indices.even(), masked_indices.odd());
208 short4 vIdx2 = (short4) __min(vIdx1.even(), vIdx1.odd());
209 short2 vIdx3 = (short2) __min(vIdx2.even(), vIdx2.odd());
210 *maxIdx = (uint16_t) __min(vIdx3.even(), vIdx3.odd());
214 inline void c7x_horizontal_max_with_index(c7x::int_vec maxValVec, c7x::int_vec vIdx,
int *maxVal,
int *maxIdx)
217 typedef typename c7x::make_full_vector<int32_t>::type vec;
219 vec sortIn = __sort_desc(maxValVec);
221 *maxVal = (int) sortIn.s[0];
223 int maxValScaler = (
int) sortIn.s[0];
225 int8 zero_vec = vec(0);
226 int8 diff_vector = vec(maxValScaler) - maxValVec;
227 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
228 int8 masked_indices = __select(vpMask, vIdx, vec(255));
229 int4 vIdx1 = (int4) __min(masked_indices.even(), masked_indices.odd());
230 int2 vIdx2 = (int2) __min(vIdx1.even(), vIdx1.odd());
231 *maxIdx = (int) __min(vIdx2.even(), vIdx2.odd());
235 inline void c7x_horizontal_max_with_index(c7x::long_vec maxValVec, c7x::long_vec vIdx,
long *maxVal,
int *maxIdx)
238 typedef typename c7x::make_full_vector<int64_t>::type vec;
240 long2 maxValVec1 = maxValVec.even();
241 maxValVec1 = __max(maxValVec.odd(), maxValVec1);
243 long maxValVec2 = maxValVec1.even();
244 maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
245 *maxVal = (long) maxValVec2;
247 long maxValScaler = (long) maxValVec2;
249 long4 zero_vec = vec(0);
250 long4 diff_vector = vec(maxValScaler) - maxValVec;
251 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
252 long4 maxIdxVec = vec(255);
253 long4 masked_indices = __select(vpMask, vIdx, maxIdxVec);
255 long2 vIdx1 = (long2) __min(masked_indices.even(), masked_indices.odd());
256 *maxIdx = (long) __min(vIdx1.even(), vIdx1.odd());
260 inline void c7x_horizontal_max_with_index(c7x::uchar_vec maxValVec, c7x::uchar_vec vIdx, uchar *maxVal,
int *maxIdx)
263 typedef typename c7x::make_full_vector<uint8_t>::type vec;
265 uchar16 maxValVec1 = maxValVec.even();
266 maxValVec1 = __max(maxValVec.odd(), maxValVec1);
268 uchar8 maxValVec2 = maxValVec1.even();
269 maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
271 uchar4 maxValVec3 = maxValVec2.even();
272 maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
274 uchar2 maxValVec4 = maxValVec3.even();
275 maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
277 uchar maxValVec5 = maxValVec4.even();
278 maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
279 *maxVal = (uchar) maxValVec5;
281 uchar maxValScaler = (uchar) maxValVec5;
282 uchar32 zero_vec = vec(0);
283 uchar32 diff_vector = vec(maxValScaler) - maxValVec;
284 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
285 uchar32 masked_indices = __select(vpMask, vIdx, vec(255));
287 uchar16 vIdx1 = (uchar16) __min(masked_indices.even(), masked_indices.odd());
288 uchar8 vIdx2 = (uchar8) __min(vIdx1.even(), vIdx1.odd());
289 uchar4 vIdx3 = (uchar4) __min(vIdx2.even(), vIdx2.odd());
290 uchar2 vIdx4 = (uchar2) __min(vIdx3.even(), vIdx3.odd());
291 *maxIdx = (uint8_t) __min(vIdx4.even(), vIdx4.odd());
295 inline void c7x_horizontal_max_with_index(c7x::ushort_vec maxValVec, c7x::ushort_vec vIdx, ushort *maxVal,
int *maxIdx)
298 typedef typename c7x::make_full_vector<uint16_t>::type vec;
300 vec sortIn = __sort_desc(maxValVec);
301 *maxVal = (ushort) sortIn.s[0];
302 ushort maxValScaler = (ushort) sortIn.s[0];
304 ushort16 zero_vec = vec(0);
305 ushort16 diff_vector = vec(maxValScaler) - maxValVec;
306 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
307 ushort16 masked_indices = __select(vpMask, vIdx, vec(255));
309 ushort8 vIdx1 = (ushort8) __min(masked_indices.even(), masked_indices.odd());
310 ushort4 vIdx2 = (ushort4) __min(vIdx1.even(), vIdx1.odd());
311 ushort2 vIdx3 = (ushort2) __min(vIdx2.even(), vIdx2.odd());
312 *maxIdx = (uint16_t) __min(vIdx3.even(), vIdx3.odd());
316 inline void c7x_horizontal_max_with_index(c7x::uint_vec maxValVec, c7x::uint_vec vIdx, uint *maxVal,
int *maxIdx)
319 typedef typename c7x::make_full_vector<uint32_t>::type vec;
321 vec sortIn = __sort_desc(maxValVec);
323 *maxVal = (uint) sortIn.s[0];
325 uint maxValScaler = (uint) sortIn.s[0];
327 uint8 zero_vec = vec(0);
328 uint8 diff_vector = vec(maxValScaler) - maxValVec;
329 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
330 uint8 masked_indices = __select(vpMask, vIdx, vec(255));
331 uint4 vIdx1 = (uint4) __min(masked_indices.even(), masked_indices.odd());
332 uint2 vIdx2 = (uint2) __min(vIdx1.even(), vIdx1.odd());
333 *maxIdx = (uint) __min(vIdx2.even(), vIdx2.odd());
337 inline void c7x_horizontal_max_with_index(c7x::ulong_vec maxValVec, c7x::ulong_vec vIdx, ulong *maxVal,
int *maxIdx)
340 typedef typename c7x::make_full_vector<uint64_t>::type vec;
341 ulong2 maxValVec1 = maxValVec.even();
342 maxValVec1 = __max(maxValVec.odd(), maxValVec1);
344 ulong maxValVec2 = maxValVec1.even();
345 maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
346 *maxVal = (ulong) maxValVec2;
348 ulong maxValScaler = (ulong) maxValVec2;
350 ulong4 zero_vec = vec(0);
351 ulong4 diff_vector = vec(maxValScaler) - maxValVec;
352 vpMask = __cmp_eq_pred(diff_vector, zero_vec);
353 ulong4 maxIdxVec = vec(255);
354 ulong4 masked_indices = __select(vpMask, vIdx, maxIdxVec);
356 ulong2 vIdx1 = (ulong2) __min(masked_indices.even(), masked_indices.odd());
357 *maxIdx = (ulong) __min(vIdx1.even(), vIdx1.odd());
361 template <
typename V>
inline uint64_t c7x_horizontal_max(V vin);
363 template <>
inline uint64_t c7x_horizontal_max(c7x::uchar_vec vin)
365 uint32_t retVal1 = (uint32_t) (__sort_desc(c7x::as_ushort_vec(vin)).s0());
366 uint32_t retVal2 = (uint32_t) (__sort_desc(__shift_left(c7x::as_ushort_vec(vin), (ushort16) (8))).s0());
367 return (uint64_t) (retVal1 > retVal2 ? ((retVal1 >> 8U) & 0xFFU) : ((retVal2 >> 8U) & 0xFFU));
369 template <>
inline uint64_t c7x_horizontal_max(c7x::ushort_vec vin)
371 uint64_t retVal = (uint64_t) (__sort_desc((ushort16) vin).s0());
374 template <>
inline uint64_t c7x_horizontal_max(c7x::uint_vec vin)
376 uint64_t retVal = (uint64_t) (__sort_desc((uint8) vin).s0());
379 template <>
inline uint64_t c7x_horizontal_max(c7x::ulong_vec vin)
381 ulong2 m1 = __max(vin.even(), vin.odd());
382 uint64_t retVal = (uint64_t) __max(m1.even(), m1.odd());
386 template <typename dataType, typename V = typename c7x::make_full_vector<dataType>::type>
387 inline dataType c7x_horizontal_max_fp(V vin);
389 template <>
inline float c7x_horizontal_max_fp(c7x::float_vec vin)
392 float4 vin1 = __max(vin.hi(), vin.lo());
393 float2 vin2 = __max(vin1.hi(), vin1.lo());
394 float maxVal = __max(vin2.hi(), vin2.lo());
398 template <>
inline double c7x_horizontal_max_fp(c7x::double_vec vin)
400 double2 vin1 = __max(vin.hi(), vin.lo());
401 double maxVal = __max(vin1.hi(), vin1.lo());
405 template <
typename dataType,
typename V>
inline dataType c7x_horizontal_min_fp(V vin);
406 template <>
inline float c7x_horizontal_min_fp(c7x::float_vec vin)
409 float4 vin1 = __min(vin.hi(), vin.lo());
410 float2 vin2 = __min(vin1.hi(), vin1.lo());
411 float minVal = __min(vin2.hi(), vin2.lo());
415 template <>
inline double c7x_horizontal_min_fp(c7x::double_vec vin)
417 double2 vin1 = __min(vin.hi(), vin.lo());
418 double minVal = __min(vin1.hi(), vin1.lo());
422 template <
typename V,
typename W>
inline void c7x_horizontal_add(V inVec, W *horizontalSum);
424 template <>
inline void c7x_horizontal_add(c7x::float_vec inVec,
float *horizontalSum)
426 float4 inVec1 = inVec.hi() + inVec.lo();
427 float2 inVec2 = inVec1.hi() + inVec1.lo();
428 *horizontalSum = inVec2.hi() + inVec2.lo();
431 template <>
inline void c7x_horizontal_add(c7x::double_vec inVec,
double *horizontalSum)
433 double2 inVec1 = inVec.hi() + inVec.lo();
434 *horizontalSum = inVec1.hi() + inVec1.lo();
437 template <
typename V,
typename W>
438 inline void c7x_horizontal_min_with_index(V minValVec, V vIdx, W *minVal,
int *minIdx);
441 inline void c7x_horizontal_min_with_index(c7x::float_vec minValVec, c7x::float_vec vIdx,
float *minVal,
int *minIdx)
444 vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
445 float4 minValVec1 = (float4) __select(vpMask, minValVec.even(), minValVec.odd());
446 float4 vIdx1 = (float4) __select(vpMask, vIdx.even(), vIdx.odd());
448 vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
449 float2 minValVec2 = (float2) __select(vpMask, minValVec1.even(), minValVec1.odd());
450 float2 vIdx2 = (float2) __select(vpMask, vIdx1.even(), vIdx1.odd());
452 vpMask = __cmp_lt_pred(minValVec2.even(), minValVec2.odd());
453 *minVal = (float) __select(vpMask, minValVec2.even(), minValVec2.odd());
454 *minIdx = ((int) __select(vpMask, vIdx2.even(), vIdx2.odd()));
458 inline void c7x_horizontal_min_with_index(c7x::double_vec minValVec, c7x::double_vec vIdx,
double *minVal,
int *minIdx)
461 vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
462 double2 minValVec1 = (double2) __select(vpMask, minValVec.even(), minValVec.odd());
463 double2 vIdx1 = (double2) __select(vpMask, vIdx.even(), vIdx.odd());
465 vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
466 *minVal = (double) __select(vpMask, minValVec1.even(), minValVec1.odd());
467 *minIdx = (int) (__select(vpMask, vIdx1.even(), vIdx1.odd()));