DSPLIB User Guide
c7100/DSPLIB_inlines.h
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 #ifndef C7100_MMA_INLINES_H
37 #define C7100_MMA_INLINES_H
38 
39 /*******************************************************************************
40  *
41  * Inlined functions
42  *
43  ******************************************************************************/
44 #ifdef __cplusplus
45 #include <c7x_scalable.h>
46 
47 /* Horizontal maximum value implementation */
48 template <typename V> inline uint64_t c7x_horizontal_max(V vin);
49 
50 template <> inline uint64_t c7x_horizontal_max(c7x::uchar_vec vin)
51 {
52  uchar32 m1 = __max(vin.even(), vin.odd());
53  uchar16 m2 = __max(m1.even(), m1.odd());
54  uchar8 m3 = __max(m2.even(), m2.odd());
55  uchar4 m4 = __max(m3.even(), m3.odd());
56  uchar2 m5 = __max(m4.even(), m4.odd());
57  uint64_t retVal = (uint64_t) __max(m5.even(), m5.odd());
58  return retVal;
59 }
60 template <> inline uint64_t c7x_horizontal_max(c7x::ushort_vec vin)
61 {
62  ushort16 m1 = __max(vin.even(), vin.odd());
63  ushort8 m2 = __max(m1.even(), m1.odd());
64  ushort4 m3 = __max(m2.even(), m2.odd());
65  ushort2 m4 = __max(m3.even(), m3.odd());
66  uint64_t retVal = (uint64_t) __max(m4.even(), m4.odd());
67  return retVal;
68 }
69 template <> inline uint64_t c7x_horizontal_max(c7x::uint_vec vin)
70 {
71  uint64_t retVal = (uint64_t) (__sort_asc((uint16) vin).sf());
72  return retVal;
73 }
74 template <> inline uint64_t c7x_horizontal_max(c7x::ulong_vec vin)
75 {
76  ulong4 m1 = __max(vin.even(), vin.odd());
77  ulong2 m2 = __max(m1.even(), m1.odd());
78  uint64_t retVal = (uint64_t) __max(m2.even(), m2.odd());
79  return retVal;
80 }
81 
82 // Horizontal max computation with index
83 
84 template <typename V, typename W>
85 inline void c7x_horizontal_max_with_index(V maxValVec, V vIdx, W *maxVal, int *maxIdx);
86 
87 template <>
88 inline void c7x_horizontal_max_with_index(c7x::float_vec maxValVec, c7x::float_vec vIdx, float *maxVal, int *maxIdx)
89 {
90  __vpred vpMask;
91  vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
92  float8 maxValVec1 = (float8) __select(vpMask, maxValVec.odd(), maxValVec.even());
93  float8 vIdx1 = (float8) __select(vpMask, vIdx.odd(), vIdx.even());
94 
95  vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
96  float4 maxValVec2 = (float4) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
97  float4 vIdx2 = (float4) __select(vpMask, vIdx1.odd(), vIdx1.even());
98 
99  vpMask = __cmp_lt_pred(maxValVec2.even(), maxValVec2.odd());
100  float2 maxValVec3 = (float2) __select(vpMask, maxValVec2.odd(), maxValVec2.even());
101  float2 vIdx3 = (float2) __select(vpMask, vIdx2.odd(), vIdx2.even());
102 
103  vpMask = __cmp_lt_pred(maxValVec3.even(), maxValVec3.odd());
104  *maxVal = (float) __select(vpMask, maxValVec3.odd(), maxValVec3.even());
105  *maxIdx = ((int) __select(vpMask, vIdx3.odd(), vIdx3.even()));
106 }
107 
108 template <>
109 inline void c7x_horizontal_max_with_index(c7x::double_vec maxValVec, c7x::double_vec vIdx, double *maxVal, int *maxIdx)
110 {
111  __vpred vpMask;
112  vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
113  double4 maxValVec1 = (double4) __select(vpMask, maxValVec.odd(), maxValVec.even());
114  double4 vIdx1 = (double4) __select(vpMask, vIdx.odd(), vIdx.even());
115 
116  vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
117  double2 maxValVec2 = (double2) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
118  double2 vIdx2 = (double2) __select(vpMask, vIdx1.odd(), vIdx1.even());
119 
120  vpMask = __cmp_lt_pred(maxValVec2.even(), maxValVec2.odd());
121  *maxVal = (double) __select(vpMask, maxValVec2.odd(), maxValVec2.even());
122  *maxIdx = ((int) __select(vpMask, vIdx2.odd(), vIdx2.even()));
123 }
124 
125 template <>
126 inline void c7x_horizontal_max_with_index(c7x::char_vec maxValVec, c7x::char_vec vIdx, int8_t *maxVal, int *maxIdx)
127 {
128  __vpred vpMask;
129  typedef typename c7x::make_full_vector<int8_t>::type vec;
130 
131  char32 maxValVec1 = maxValVec.even();
132  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
133 
134  char16 maxValVec2 = maxValVec1.even();
135  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
136 
137  char8 maxValVec3 = maxValVec2.even();
138  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
139 
140  char4 maxValVec4 = maxValVec3.even();
141  maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
142 
143  char2 maxValVec5 = maxValVec4.even();
144  maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
145 
146  int8_t maxValVec6 = maxValVec5.even();
147  maxValVec6 = __max(maxValVec5.odd(), maxValVec6);
148  *maxVal = (int8_t) maxValVec6;
149  int8_t maxValScaler = (int8_t) maxValVec6;
150 
151  char64 zero_vec = vec(0);
152  char64 diff_vector = vec(maxValScaler) - maxValVec;
153  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
154  char64 masked_indices = __select(vpMask, vIdx, vec(-1));
155  uchar64 umasked_indices = __as_uchar64(masked_indices);
156 
157  uchar32 vIdx1 = (uchar32) __min(umasked_indices.even(), umasked_indices.odd());
158  uchar16 vIdx2 = (uchar16) __min(vIdx1.even(), vIdx1.odd());
159  uchar8 vIdx3 = (uchar8) __min(vIdx2.even(), vIdx2.odd());
160  uchar4 vIdx4 = (uchar4) __min(vIdx3.even(), vIdx3.odd());
161  uchar2 vIdx5 = (uchar2) __min(vIdx4.even(), vIdx4.odd());
162  *maxIdx = (uint8_t) __min(vIdx5.even(), vIdx5.odd());
163 }
164 
165 template <>
166 inline void c7x_horizontal_max_with_index(c7x::short_vec maxValVec, c7x::short_vec vIdx, int16_t *maxVal, int *maxIdx)
167 {
168  __vpred vpMask;
169  typedef typename c7x::make_full_vector<int16_t>::type vec;
170 
171  vec sortIn = __vdsortdd16h_vv(maxValVec);
172  *maxVal = (short) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
173 
174  short maxValScaler = (short) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
175 
176  short32 zero_vec = vec(0);
177  short32 diff_vector = vec(maxValScaler) - maxValVec;
178  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
179  short32 masked_indices = __select(vpMask, vIdx, vec(255));
180  short32 sorted_indices = __vdsortii16h_vv(masked_indices);
181 
182  *maxIdx = (sorted_indices.s[0] < sorted_indices.s[16]) ? sorted_indices.s[0] : sorted_indices.s[16];
183 }
184 
185 template <>
186 inline void c7x_horizontal_max_with_index(c7x::int_vec maxValVec, c7x::int_vec vIdx, int32_t *maxVal, int *maxIdx)
187 {
188  __vpred vpMask;
189  typedef typename c7x::make_full_vector<int32_t>::type vec;
190 
191  vec sortIn = __sort_desc(maxValVec);
192  *maxVal = (int) sortIn.s[0];
193 
194  int maxValScaler = (int) sortIn.s[0];
195 
196  int16 zero_vec = vec(0);
197  int16 diff_vector = vec(maxValScaler) - maxValVec;
198  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
199  int16 masked_indices = __select(vpMask, vIdx, vec(255));
200  int16 sorted_indices = __sort_asc(masked_indices);
201 
202  *maxIdx = sorted_indices.s[0];
203 }
204 
205 template <>
206 inline void c7x_horizontal_max_with_index(c7x::long_vec maxValVec, c7x::long_vec vIdx, int64_t *maxVal, int *maxIdx)
207 {
208  __vpred vpMask;
209  typedef typename c7x::make_full_vector<int64_t>::type vec;
210 
211  long4 maxValVec1 = maxValVec.even();
212  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
213 
214  long2 maxValVec2 = maxValVec1.even();
215  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
216 
217  long maxValVec3 = maxValVec2.even();
218  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
219  *maxVal = (long) maxValVec3;
220 
221  long maxValScaler = (long) maxValVec3;
222 
223  long8 zero_vec = vec(0);
224  long8 diff_vector = vec(maxValScaler) - maxValVec;
225  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
226  long8 maxIdxVec = vec(255);
227  long8 masked_indices = __select(vpMask, vIdx, maxIdxVec);
228 
229  long4 vIdx1 = (long4) __min(masked_indices.even(), masked_indices.odd());
230  long2 vIdx2 = (long2) __min(vIdx1.even(), vIdx1.odd());
231  *maxIdx = (long) __min(vIdx2.even(), vIdx2.odd());
232 }
233 
234 template <>
235 inline void c7x_horizontal_max_with_index(c7x::uchar_vec maxValVec, c7x::uchar_vec vIdx, uint8_t *maxVal, int *maxIdx)
236 {
237  __vpred vpMask;
238  typedef typename c7x::make_full_vector<uint8_t>::type vec;
239 
240  uchar32 maxValVec1 = maxValVec.even();
241  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
242 
243  uchar16 maxValVec2 = maxValVec1.even();
244  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
245 
246  uchar8 maxValVec3 = maxValVec2.even();
247  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
248 
249  uchar4 maxValVec4 = maxValVec3.even();
250  maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
251 
252  uchar2 maxValVec5 = maxValVec4.even();
253  maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
254 
255  uint8_t maxValVec6 = maxValVec5.even();
256  maxValVec6 = __max(maxValVec5.odd(), maxValVec6);
257  *maxVal = (uint8_t) maxValVec6;
258  uint8_t maxValScaler = (uint8_t) maxValVec6;
259 
260  uchar64 zero_vec = vec(0);
261  uchar64 diff_vector = vec(maxValScaler) - maxValVec;
262  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
263  uchar64 maxIdxVec = vec(255);
264  uchar64 masked_indices = __select(vpMask, vIdx, maxIdxVec);
265 
266  uchar32 vIdx1 = (uchar32) __min(masked_indices.even(), masked_indices.odd());
267  uchar16 vIdx2 = (uchar16) __min(vIdx1.even(), vIdx1.odd());
268  uchar8 vIdx3 = (uchar8) __min(vIdx2.even(), vIdx2.odd());
269  uchar4 vIdx4 = (uchar4) __min(vIdx3.even(), vIdx3.odd());
270  uchar2 vIdx5 = (uchar2) __min(vIdx4.even(), vIdx4.odd());
271  *maxIdx = (int) __min(vIdx5.even(), vIdx5.odd());
272 }
273 
274 template <>
275 inline void
276 c7x_horizontal_max_with_index(c7x::ushort_vec maxValVec, c7x::ushort_vec vIdx, uint16_t *maxVal, int *maxIdx)
277 {
278  __vpred vpMask;
279  typedef typename c7x::make_full_vector<uint16_t>::type vec;
280 
281  vec sortIn = __vdsortddu16h_vv(maxValVec);
282  *maxVal = (ushort) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
283 
284  ushort maxValScaler = (ushort) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
285 
286  ushort32 zero_vec = vec(0);
287  ushort32 diff_vector = vec(maxValScaler) - maxValVec;
288  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
289  ushort32 masked_indices = __select(vpMask, vIdx, vec(255));
290  ushort32 sorted_indices = __vdsortiiu16h_vv(masked_indices);
291 
292  *maxIdx = (sorted_indices.s[0] < sorted_indices.s[16]) ? sorted_indices.s[0] : sorted_indices.s[16];
293 }
294 
295 template <>
296 inline void c7x_horizontal_max_with_index(c7x::uint_vec maxValVec, c7x::uint_vec vIdx, uint32_t *maxVal, int *maxIdx)
297 {
298  __vpred vpMask;
299  typedef typename c7x::make_full_vector<uint32_t>::type vec;
300 
301  vec sortIn = __sort_desc(maxValVec);
302  *maxVal = (uint) sortIn.s[0];
303 
304  uint maxValScaler = (uint) sortIn.s[0];
305 
306  uint16 zero_vec = vec(0);
307  uint16 diff_vector = vec(maxValScaler) - maxValVec;
308  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
309  uint16 masked_indices = __select(vpMask, vIdx, vec(255));
310  uint16 sorted_indices = __sort_asc(masked_indices);
311 
312  *maxIdx = sorted_indices.s[0];
313 }
314 
315 template <>
316 inline void c7x_horizontal_max_with_index(c7x::ulong_vec maxValVec, c7x::ulong_vec vIdx, uint64_t *maxVal, int *maxIdx)
317 {
318  __vpred vpMask;
319  typedef typename c7x::make_full_vector<uint64_t>::type vec;
320  ulong4 maxValVec1 = maxValVec.even();
321  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
322 
323  ulong2 maxValVec2 = maxValVec1.even();
324  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
325 
326  ulong maxValVec3 = maxValVec2.even();
327  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
328  *maxVal = (ulong) maxValVec3;
329 
330  ulong maxValScaler = (ulong) maxValVec3;
331 
332  ulong8 zero_vec = vec(0);
333  ulong8 diff_vector = vec(maxValScaler) - maxValVec;
334  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
335  ulong8 maxIdxVec = vec(255);
336  ulong8 masked_indices = __select(vpMask, vIdx, maxIdxVec);
337 
338  ulong4 vIdx1 = (ulong4) __min(masked_indices.even(), masked_indices.odd());
339  ulong2 vIdx2 = (ulong2) __min(vIdx1.even(), vIdx1.odd());
340  *maxIdx = (ulong) __min(vIdx2.even(), vIdx2.odd());
341 }
342 
343 // Convert long to int
344 
345 template <typename X, typename Y> inline typename c7x::make_full_vector<X>::type convert_long_to_int(Y vec);
346 
347 template <> inline c7x::make_full_vector<int32_t>::type convert_long_to_int<int16, long8>(long8 vec)
348 {
349  typedef typename c7x::make_full_vector<int32_t>::type vecRet;
350  vecRet v8bits = __as_int16(vec);
351  return v8bits;
352 }
353 
354 template <> inline c7x::make_full_vector<uint32_t>::type convert_long_to_int<uint16, ulong8>(ulong8 vec)
355 {
356  typedef typename c7x::make_full_vector<uint32_t>::type vecRet;
357  vecRet v8bits = __as_uint16(vec);
358  return v8bits;
359 }
360 
361 // Convert char to short
362 
363 template <typename X, typename Y>
364 inline typename c7x::make_full_vector<X>::type convert_char_to_short(Y vecIn, bool typeIndex);
365 
366 template <>
367 inline c7x::make_full_vector<int16_t>::type convert_char_to_short<short32, char32>(char32 vecIn, bool typeIndex)
368 {
369  typedef typename c7x::make_full_vector<int16_t>::type vecRet;
370  vecRet vecOut;
371  if (typeIndex) {
372  uchar32 vecInConv = __as_uchar32(vecIn);
373  vecOut = __convert_short32(vecInConv);
374  }
375  else {
376  vecOut = __convert_short32(vecIn);
377  }
378  return vecOut;
379 }
380 
381 template <>
382 inline c7x::make_full_vector<uint16_t>::type convert_char_to_short<ushort32, uchar32>(uchar32 vecIn, bool typeIndex)
383 {
384  typedef typename c7x::make_full_vector<uint16_t>::type vecRet;
385  vecRet vecOut = __convert_ushort32(vecIn);
386  return vecOut;
387 }
388 
389 // Multiply 2 char and result in short
390 
391 template <typename X, typename Y> inline void mul_char_to_short(Y vecIn1, Y vecIn2, X vecOut1, X vecOut2);
392 
393 template <>
394 inline void
395 mul_char_to_short<short32 &, char32>(char32 vecIn1, char32 vecIn2, c7x::short_vec &vecOut1Short, c7x::short_vec &vecOut2)
396 {
397  short32 vecInShort1 = __convert_short32(vecIn1);
398  short32 vecInShort2 = __convert_short32(vecIn2);
399  vecOut1Short = vecInShort1 * vecInShort2;
400 }
401 
402 template <>
403 inline void
404 mul_char_to_short<ushort32 &, uchar32>(uchar32 vecIn1, uchar32 vecIn2, c7x::ushort_vec &vecOut1uShort, c7x::ushort_vec &vecOut2)
405 {
406  vecOut1uShort = __mpy_ext(vecIn1, vecIn2);
407 }
408 
409 template <typename dataType, typename V> inline dataType c7x_horizontal_max_fp(V vin);
410 
411 template <> inline float c7x_horizontal_max_fp(c7x::float_vec vin)
412 {
413 
414  float8 vin1 = __max(vin.hi(), vin.lo());
415  float4 vin2 = __max(vin1.hi(), vin1.lo());
416  float2 vin3 = __max(vin2.hi(), vin2.lo());
417  float maxVal = __max(vin3.hi(), vin3.lo());
418  return maxVal;
419 }
420 
421 template <> inline double c7x_horizontal_max_fp(c7x::double_vec vin)
422 {
423  double4 vin1 = __max(vin.hi(), vin.lo());
424  double2 vin2 = __max(vin1.hi(), vin1.lo());
425  double maxVal = __max(vin2.hi(), vin2.lo());
426  return maxVal;
427 }
428 
429 template <typename dataType, typename V> inline dataType c7x_horizontal_min_fp(V vin);
430 template <> inline float c7x_horizontal_min_fp(c7x::float_vec vin)
431 {
432 
433  float8 vin1 = __min(vin.hi(), vin.lo());
434  float4 vin2 = __min(vin1.hi(), vin1.lo());
435  float2 vin3 = __min(vin2.hi(), vin2.lo());
436  float minVal = __min(vin3.hi(), vin3.lo());
437  return minVal;
438 }
439 
440 template <> inline double c7x_horizontal_min_fp(c7x::double_vec vin)
441 {
442  double4 vin1 = __min(vin.hi(), vin.lo());
443  double2 vin2 = __min(vin1.hi(), vin1.lo());
444  double minVal = __min(vin2.hi(), vin2.lo());
445  return minVal;
446 }
447 
448 
449 template <typename V, typename W> inline void c7x_horizontal_add(V inVec, W *horizontalSum);
450 
451 template <> inline void c7x_horizontal_add(c7x::float_vec inVec, float *horizontalSum)
452 {
453  float8 inVec1 = inVec.hi() + inVec.lo();
454  float4 inVec2 = inVec1.hi() + inVec1.lo();
455  float2 inVec3 = inVec2.hi() + inVec2.lo();
456  *horizontalSum = inVec3.hi() + inVec3.lo();
457 }
458 
459 template <> inline void c7x_horizontal_add(c7x::double_vec inVec, double *horizontalSum)
460 {
461  double4 inVec1 = inVec.hi() + inVec.lo();
462  double2 inVec2 = inVec1.hi() + inVec1.lo();
463  *horizontalSum = inVec2.hi() + inVec2.lo();
464 }
465 template <typename V, typename W>
466 inline void c7x_horizontal_min_with_index(V minValVec, V vIdx, W *minVal, int *minIdx);
467 template <>
468 inline void c7x_horizontal_min_with_index(c7x::float_vec minValVec, c7x::float_vec vIdx, float *minVal, int *minIdx)
469 {
470  __vpred vpMask;
471  vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
472  float8 minValVec1 = (float8) __select(vpMask, minValVec.even(), minValVec.odd());
473  float8 vIdx1 = (float8) __select(vpMask, vIdx.even(), vIdx.odd());
474 
475  vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
476  float4 minValVec2 = (float4) __select(vpMask, minValVec1.even(), minValVec1.odd());
477  float4 vIdx2 = (float4) __select(vpMask, vIdx1.even(), vIdx1.odd());
478 
479  vpMask = __cmp_lt_pred(minValVec2.even(), minValVec2.odd());
480  float2 minValVec3 = (float2) __select(vpMask, minValVec2.even(), minValVec2.odd());
481  float2 vIdx3 = (float2) __select(vpMask, vIdx2.even(), vIdx2.odd());
482 
483  vpMask = __cmp_lt_pred(minValVec3.even(), minValVec3.odd());
484  *minVal = (float) __select(vpMask, minValVec3.even(), minValVec3.odd());
485  *minIdx = ((int) __select(vpMask, vIdx3.even(), vIdx3.odd()));
486 }
487 
488 template <>
489 inline void c7x_horizontal_min_with_index(c7x::double_vec minValVec, c7x::double_vec vIdx, double *minVal, int *minIdx)
490 {
491  __vpred vpMask;
492  vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
493  double4 minValVec1 = (double4) __select(vpMask, minValVec.even(), minValVec.odd());
494  double4 vIdx1 = (double4) __select(vpMask, vIdx.even(), vIdx.odd());
495 
496  vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
497  double2 minValVec2 = (double2) __select(vpMask, minValVec1.even(), minValVec1.odd());
498  double2 vIdx2 = (double2) __select(vpMask, vIdx1.even(), vIdx1.odd());
499 
500  vpMask = __cmp_lt_pred(minValVec2.even(), minValVec2.odd());
501  *minVal = (double) __select(vpMask, minValVec2.even(), minValVec2.odd());
502  *minIdx = ((int) __select(vpMask, vIdx2.even(), vIdx2.odd()));
503 }
504 #endif
505 
506 /******************************************************************************/
507 
508 #endif