DSPLIB User Guide
c7524/DSPLIB_inlines.h
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 #ifndef C7524_DSPLIB_INLINES_H
37 #define C7524_DSPLIB_INLINES_H 1
38 
39 /*******************************************************************************
40  *
41  * Inlined functions
42  *
43  ******************************************************************************/
44 
45 /******************************************************************************/
46 #ifdef __cplusplus
47 #include <c7x_scalable.h>
48 
49 // Convert long to int
50 template <typename X, typename Y> inline typename c7x::make_full_vector<X>::type convert_long_to_int(Y vec);
51 
52 template <> inline c7x::make_full_vector<int32_t>::type convert_long_to_int<int8, long4>(long4 vec)
53 {
54  typedef typename c7x::make_full_vector<int32_t>::type vecRet;
55  vecRet v8bits = __as_int8(vec);
56  return v8bits;
57 }
58 
59 template <> inline c7x::make_full_vector<uint32_t>::type convert_long_to_int<uint8, ulong4>(ulong4 vec)
60 {
61  typedef typename c7x::make_full_vector<uint32_t>::type vecRet;
62  vecRet v8bits = __as_uint8(vec);
63  return v8bits;
64 }
65 
66 // Convert char to short
67 template <typename X, typename Y>
68 inline typename c7x::make_full_vector<X>::type convert_char_to_short(Y vecIn, bool typeIndex);
69 
70 template <>
71 inline c7x::make_full_vector<int16_t>::type convert_char_to_short<short16, char16>(char16 vecIn, bool typeIndex)
72 {
73  typedef typename c7x::make_full_vector<int16_t>::type vecRet;
74  vecRet vecOut;
75  if (typeIndex) {
76  uchar16 vecInConv = __as_uchar16(vecIn);
77  vecOut = __convert_short16(vecInConv);
78  }
79  else {
80  vecOut = __convert_short16(vecIn);
81  }
82  return vecOut;
83 }
84 
85 template <>
86 inline c7x::make_full_vector<uint16_t>::type convert_char_to_short<ushort16, uchar16>(uchar16 vecIn, bool typeIndex)
87 {
88  typedef typename c7x::make_full_vector<uint16_t>::type vecRet;
89  vecRet vecOut = __convert_ushort16(vecIn);
90  return vecOut;
91 }
92 
93 // Multiply 2 char and result in short
94 template <typename X, typename Y> inline void mul_char_to_short(Y vecIn1, Y vecIn2, X vecOut1, X vecOut2);
95 
96 template <>
97 inline void mul_char_to_short<short16 &, char32>(c7x::char_vec vecIn1,
98  c7x::char_vec vecIn2,
99  c7x::short_vec &vecOut1Short,
100  c7x::short_vec &vecOut2Short)
101 {
102  short16 vecInShort1 = __as_short16(vecIn1);
103  short16 vecInShort2 = __as_short16(vecIn2);
104  vecInShort2 = __shift_right(vecInShort2, c7x::short_vec(8));
105  short16 vecInShortOdd = __shift_right(vecInShort1, c7x::short_vec(8));
106  short16 vecInShortEven = __shift_right(__shift_left(vecInShort1, c7x::short_vec(8)), c7x::short_vec(8));
107 
108  vecOut1Short = vecInShortEven * vecInShort2;
109  vecOut2Short = vecInShortOdd * vecInShort2;
110 }
111 
112 template <>
113 inline void mul_char_to_short<ushort16 &, uchar32>(c7x::uchar_vec vecIn1,
114  c7x::uchar_vec vecIn2,
115  c7x::ushort_vec &vecOut1uShort,
116  c7x::ushort_vec &vecOut2uShort)
117 {
118  __vmpyubh_vvw(vecIn1, vecIn2, vecOut1uShort, vecOut2uShort);
119 }
120 
121 // Horizontal max computation with index
122 template <typename V, typename W>
123 inline void c7x_horizontal_max_with_index(V maxValVec, V vIdx, W *maxVal, int *maxIdx);
124 
125 template <>
126 inline void c7x_horizontal_max_with_index(c7x::float_vec maxValVec, c7x::float_vec vIdx, float *maxVal, int *maxIdx)
127 {
128  __vpred vpMask;
129  vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
130  float4 maxValVec1 = (float4) __select(vpMask, maxValVec.odd(), maxValVec.even());
131  float4 vIdx1 = (float4) __select(vpMask, vIdx.odd(), vIdx.even());
132 
133  vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
134  float2 maxValVec2 = (float2) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
135  float2 vIdx2 = (float2) __select(vpMask, vIdx1.odd(), vIdx1.even());
136 
137  vpMask = __cmp_lt_pred(maxValVec2.even(), maxValVec2.odd());
138  *maxVal = (float) __select(vpMask, maxValVec2.odd(), maxValVec2.even());
139  *maxIdx = ((int) __select(vpMask, vIdx2.odd(), vIdx2.even()));
140 }
141 
142 template <>
143 inline void c7x_horizontal_max_with_index(c7x::double_vec maxValVec, c7x::double_vec vIdx, double *maxVal, int *maxIdx)
144 {
145  __vpred vpMask;
146  vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
147  double2 maxValVec1 = (double2) __select(vpMask, maxValVec.odd(), maxValVec.even());
148  double2 vIdx1 = (double2) __select(vpMask, vIdx.odd(), vIdx.even());
149 
150  vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
151  *maxVal = (double) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
152  *maxIdx = (int) (__select(vpMask, vIdx1.odd(), vIdx1.even()));
153 }
154 
155 template <>
156 inline void c7x_horizontal_max_with_index(c7x::char_vec maxValVec, c7x::char_vec vIdx, int8_t *maxVal, int *maxIdx)
157 {
158  typedef typename c7x::make_full_vector<int8_t>::type vec;
159  __vpred vpMask;
160 
161  char16 maxValVec1 = maxValVec.even();
162  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
163 
164  char8 maxValVec2 = maxValVec1.even();
165  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
166 
167  char4 maxValVec3 = maxValVec2.even();
168  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
169 
170  char2 maxValVec4 = maxValVec3.even();
171  maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
172 
173  int8_t maxValVec5 = maxValVec4.even();
174  maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
175  *maxVal = (int8_t) maxValVec5;
176 
177  int8_t maxValScaler = (int8_t) maxValVec5;
178 
179  char32 zero_vec = vec(0);
180  char32 diff_vector = vec(maxValScaler) - maxValVec;
181  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
182  char32 masked_indices = __select(vpMask, vIdx, vec(-1));
183  uchar32 umasked_indices = __as_uchar32(masked_indices);
184 
185  uchar16 vIdx1 = (uchar16) __min(umasked_indices.even(), umasked_indices.odd());
186  uchar8 vIdx2 = (uchar8) __min(vIdx1.even(), vIdx1.odd());
187  uchar4 vIdx3 = (uchar4) __min(vIdx2.even(), vIdx2.odd());
188  uchar2 vIdx4 = (uchar2) __min(vIdx3.even(), vIdx3.odd());
189  *maxIdx = (uint8_t) __min(vIdx4.even(), vIdx4.odd());
190 }
191 
192 template <>
193 inline void c7x_horizontal_max_with_index(c7x::short_vec maxValVec, c7x::short_vec vIdx, short *maxVal, int *maxIdx)
194 {
195  __vpred vpMask;
196  typedef typename c7x::make_full_vector<int16_t>::type vec;
197 
198  vec sortIn = __sort_desc(maxValVec);
199  *maxVal = (short) sortIn.s[0];
200  short maxValScaler = (short) sortIn.s[0];
201 
202  short16 zero_vec = vec(0);
203  short16 diff_vector = vec(maxValScaler) - maxValVec;
204  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
205  short16 masked_indices = __select(vpMask, vIdx, vec(255));
206 
207  short8 vIdx1 = (short8) __min(masked_indices.even(), masked_indices.odd());
208  short4 vIdx2 = (short4) __min(vIdx1.even(), vIdx1.odd());
209  short2 vIdx3 = (short2) __min(vIdx2.even(), vIdx2.odd());
210  *maxIdx = (uint16_t) __min(vIdx3.even(), vIdx3.odd());
211 }
212 
213 template <>
214 inline void c7x_horizontal_max_with_index(c7x::int_vec maxValVec, c7x::int_vec vIdx, int *maxVal, int *maxIdx)
215 {
216  __vpred vpMask;
217  typedef typename c7x::make_full_vector<int32_t>::type vec;
218 
219  vec sortIn = __sort_desc(maxValVec);
220  ;
221  *maxVal = (int) sortIn.s[0];
222 
223  int maxValScaler = (int) sortIn.s[0];
224 
225  int8 zero_vec = vec(0);
226  int8 diff_vector = vec(maxValScaler) - maxValVec;
227  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
228  int8 masked_indices = __select(vpMask, vIdx, vec(255));
229  int4 vIdx1 = (int4) __min(masked_indices.even(), masked_indices.odd());
230  int2 vIdx2 = (int2) __min(vIdx1.even(), vIdx1.odd());
231  *maxIdx = (int) __min(vIdx2.even(), vIdx2.odd());
232 }
233 
234 template <>
235 inline void c7x_horizontal_max_with_index(c7x::long_vec maxValVec, c7x::long_vec vIdx, long *maxVal, int *maxIdx)
236 {
237  __vpred vpMask;
238  typedef typename c7x::make_full_vector<int64_t>::type vec;
239 
240  long2 maxValVec1 = maxValVec.even();
241  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
242 
243  long maxValVec2 = maxValVec1.even();
244  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
245  *maxVal = (long) maxValVec2;
246 
247  long maxValScaler = (long) maxValVec2;
248 
249  long4 zero_vec = vec(0);
250  long4 diff_vector = vec(maxValScaler) - maxValVec;
251  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
252  long4 maxIdxVec = vec(255);
253  long4 masked_indices = __select(vpMask, vIdx, maxIdxVec);
254 
255  long2 vIdx1 = (long2) __min(masked_indices.even(), masked_indices.odd());
256  *maxIdx = (long) __min(vIdx1.even(), vIdx1.odd());
257 }
258 
259 template <>
260 inline void c7x_horizontal_max_with_index(c7x::uchar_vec maxValVec, c7x::uchar_vec vIdx, uchar *maxVal, int *maxIdx)
261 {
262  __vpred vpMask;
263  typedef typename c7x::make_full_vector<uint8_t>::type vec;
264 
265  uchar16 maxValVec1 = maxValVec.even();
266  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
267 
268  uchar8 maxValVec2 = maxValVec1.even();
269  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
270 
271  uchar4 maxValVec3 = maxValVec2.even();
272  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
273 
274  uchar2 maxValVec4 = maxValVec3.even();
275  maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
276 
277  uchar maxValVec5 = maxValVec4.even();
278  maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
279  *maxVal = (uchar) maxValVec5;
280 
281  uchar maxValScaler = (uchar) maxValVec5;
282  uchar32 zero_vec = vec(0);
283  uchar32 diff_vector = vec(maxValScaler) - maxValVec;
284  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
285  uchar32 masked_indices = __select(vpMask, vIdx, vec(255));
286 
287  uchar16 vIdx1 = (uchar16) __min(masked_indices.even(), masked_indices.odd());
288  uchar8 vIdx2 = (uchar8) __min(vIdx1.even(), vIdx1.odd());
289  uchar4 vIdx3 = (uchar4) __min(vIdx2.even(), vIdx2.odd());
290  uchar2 vIdx4 = (uchar2) __min(vIdx3.even(), vIdx3.odd());
291  *maxIdx = (uint8_t) __min(vIdx4.even(), vIdx4.odd());
292 }
293 
294 template <>
295 inline void c7x_horizontal_max_with_index(c7x::ushort_vec maxValVec, c7x::ushort_vec vIdx, ushort *maxVal, int *maxIdx)
296 {
297  __vpred vpMask;
298  typedef typename c7x::make_full_vector<uint16_t>::type vec;
299 
300  vec sortIn = __sort_desc(maxValVec);
301  *maxVal = (ushort) sortIn.s[0];
302  ushort maxValScaler = (ushort) sortIn.s[0];
303 
304  ushort16 zero_vec = vec(0);
305  ushort16 diff_vector = vec(maxValScaler) - maxValVec;
306  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
307  ushort16 masked_indices = __select(vpMask, vIdx, vec(255));
308 
309  ushort8 vIdx1 = (ushort8) __min(masked_indices.even(), masked_indices.odd());
310  ushort4 vIdx2 = (ushort4) __min(vIdx1.even(), vIdx1.odd());
311  ushort2 vIdx3 = (ushort2) __min(vIdx2.even(), vIdx2.odd());
312  *maxIdx = (uint16_t) __min(vIdx3.even(), vIdx3.odd());
313 }
314 
315 template <>
316 inline void c7x_horizontal_max_with_index(c7x::uint_vec maxValVec, c7x::uint_vec vIdx, uint *maxVal, int *maxIdx)
317 {
318  __vpred vpMask;
319  typedef typename c7x::make_full_vector<uint32_t>::type vec;
320 
321  vec sortIn = __sort_desc(maxValVec);
322  ;
323  *maxVal = (uint) sortIn.s[0];
324 
325  uint maxValScaler = (uint) sortIn.s[0];
326 
327  uint8 zero_vec = vec(0);
328  uint8 diff_vector = vec(maxValScaler) - maxValVec;
329  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
330  uint8 masked_indices = __select(vpMask, vIdx, vec(255));
331  uint4 vIdx1 = (uint4) __min(masked_indices.even(), masked_indices.odd());
332  uint2 vIdx2 = (uint2) __min(vIdx1.even(), vIdx1.odd());
333  *maxIdx = (uint) __min(vIdx2.even(), vIdx2.odd());
334 }
335 
336 template <>
337 inline void c7x_horizontal_max_with_index(c7x::ulong_vec maxValVec, c7x::ulong_vec vIdx, ulong *maxVal, int *maxIdx)
338 {
339  __vpred vpMask;
340  typedef typename c7x::make_full_vector<uint64_t>::type vec;
341  ulong2 maxValVec1 = maxValVec.even();
342  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
343 
344  ulong maxValVec2 = maxValVec1.even();
345  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
346  *maxVal = (ulong) maxValVec2;
347 
348  ulong maxValScaler = (ulong) maxValVec2;
349 
350  ulong4 zero_vec = vec(0);
351  ulong4 diff_vector = vec(maxValScaler) - maxValVec;
352  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
353  ulong4 maxIdxVec = vec(255);
354  ulong4 masked_indices = __select(vpMask, vIdx, maxIdxVec);
355 
356  ulong2 vIdx1 = (ulong2) __min(masked_indices.even(), masked_indices.odd());
357  *maxIdx = (ulong) __min(vIdx1.even(), vIdx1.odd());
358 }
359 
360 // Horizontal max computation
361 template <typename V> inline uint64_t c7x_horizontal_max(V vin);
362 
363 template <> inline uint64_t c7x_horizontal_max(c7x::uchar_vec vin)
364 {
365  uint32_t retVal1 = (uint32_t) (__sort_desc(c7x::as_ushort_vec(vin)).s0());
366  uint32_t retVal2 = (uint32_t) (__sort_desc(__shift_left(c7x::as_ushort_vec(vin), (ushort16) (8))).s0());
367  return (uint64_t) (retVal1 > retVal2 ? ((retVal1 >> 8U) & 0xFFU) : ((retVal2 >> 8U) & 0xFFU));
368 }
369 template <> inline uint64_t c7x_horizontal_max(c7x::ushort_vec vin)
370 {
371  uint64_t retVal = (uint64_t) (__sort_desc((ushort16) vin).s0());
372  return retVal;
373 }
374 template <> inline uint64_t c7x_horizontal_max(c7x::uint_vec vin)
375 {
376  uint64_t retVal = (uint64_t) (__sort_desc((uint8) vin).s0());
377  return retVal;
378 }
379 template <> inline uint64_t c7x_horizontal_max(c7x::ulong_vec vin)
380 {
381  ulong2 m1 = __max(vin.even(), vin.odd());
382  uint64_t retVal = (uint64_t) __max(m1.even(), m1.odd());
383  return retVal;
384 }
385 
386 template <typename dataType, typename V = typename c7x::make_full_vector<dataType>::type>
387 inline dataType c7x_horizontal_max_fp(V vin);
388 
389 template <> inline float c7x_horizontal_max_fp(c7x::float_vec vin)
390 {
391 
392  float4 vin1 = __max(vin.hi(), vin.lo());
393  float2 vin2 = __max(vin1.hi(), vin1.lo());
394  float maxVal = __max(vin2.hi(), vin2.lo());
395  return maxVal;
396 }
397 
398 template <> inline double c7x_horizontal_max_fp(c7x::double_vec vin)
399 {
400  double2 vin1 = __max(vin.hi(), vin.lo());
401  double maxVal = __max(vin1.hi(), vin1.lo());
402  return maxVal;
403 }
404 
405 template <typename dataType, typename V> inline dataType c7x_horizontal_min_fp(V vin);
406 template <> inline float c7x_horizontal_min_fp(c7x::float_vec vin)
407 {
408 
409  float4 vin1 = __min(vin.hi(), vin.lo());
410  float2 vin2 = __min(vin1.hi(), vin1.lo());
411  float minVal = __min(vin2.hi(), vin2.lo());
412  return minVal;
413 }
414 
415 template <> inline double c7x_horizontal_min_fp(c7x::double_vec vin)
416 {
417  double2 vin1 = __min(vin.hi(), vin.lo());
418  double minVal = __min(vin1.hi(), vin1.lo());
419  return minVal;
420 }
421 
422 template <typename V, typename W> inline void c7x_horizontal_add(V inVec, W *horizontalSum);
423 
424 template <> inline void c7x_horizontal_add(c7x::float_vec inVec, float *horizontalSum)
425 {
426  float4 inVec1 = inVec.hi() + inVec.lo();
427  float2 inVec2 = inVec1.hi() + inVec1.lo();
428  *horizontalSum = inVec2.hi() + inVec2.lo();
429 }
430 
431 template <> inline void c7x_horizontal_add(c7x::double_vec inVec, double *horizontalSum)
432 {
433  double2 inVec1 = inVec.hi() + inVec.lo();
434  *horizontalSum = inVec1.hi() + inVec1.lo();
435 }
436 
437 template <typename V, typename W>
438 inline void c7x_horizontal_min_with_index(V minValVec, V vIdx, W *minVal, int *minIdx);
439 
440 template <>
441 inline void c7x_horizontal_min_with_index(c7x::float_vec minValVec, c7x::float_vec vIdx, float *minVal, int *minIdx)
442 {
443  __vpred vpMask;
444  vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
445  float4 minValVec1 = (float4) __select(vpMask, minValVec.even(), minValVec.odd());
446  float4 vIdx1 = (float4) __select(vpMask, vIdx.even(), vIdx.odd());
447 
448  vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
449  float2 minValVec2 = (float2) __select(vpMask, minValVec1.even(), minValVec1.odd());
450  float2 vIdx2 = (float2) __select(vpMask, vIdx1.even(), vIdx1.odd());
451 
452  vpMask = __cmp_lt_pred(minValVec2.even(), minValVec2.odd());
453  *minVal = (float) __select(vpMask, minValVec2.even(), minValVec2.odd());
454  *minIdx = ((int) __select(vpMask, vIdx2.even(), vIdx2.odd()));
455 }
456 
457 template <>
458 inline void c7x_horizontal_min_with_index(c7x::double_vec minValVec, c7x::double_vec vIdx, double *minVal, int *minIdx)
459 {
460  __vpred vpMask;
461  vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
462  double2 minValVec1 = (double2) __select(vpMask, minValVec.even(), minValVec.odd());
463  double2 vIdx1 = (double2) __select(vpMask, vIdx.even(), vIdx.odd());
464 
465  vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
466  *minVal = (double) __select(vpMask, minValVec1.even(), minValVec1.odd());
467  *minIdx = (int) (__select(vpMask, vIdx1.even(), vIdx1.odd()));
468 }
469 #endif
470 
471 #endif