DSPLIB User Guide
c7120/DSPLIB_inlines.h
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 #ifndef C7120_MMA_INLINES_H
37 #define C7120_MMA_INLINES_H
38 
39 /*******************************************************************************
40  *
41  * Inlined functions
42  *
43  ******************************************************************************/
44 #ifdef __cplusplus
45 #include <c7x_scalable.h>
46 
47 // Horizontal max computation
48 
49 template <typename V> inline uint64_t c7x_horizontal_max(V vin);
50 
51 template <> inline uint64_t c7x_horizontal_max(c7x::uchar_vec vin)
52 {
53  uchar32 m1 = __max(vin.even(), vin.odd());
54  uchar16 m2 = __max(m1.even(), m1.odd());
55  uchar8 m3 = __max(m2.even(), m2.odd());
56  uchar4 m4 = __max(m3.even(), m3.odd());
57  uchar2 m5 = __max(m4.even(), m4.odd());
58  uint64_t retVal = (uint64_t) __max(m5.even(), m5.odd());
59  return retVal;
60 }
61 template <> inline uint64_t c7x_horizontal_max(c7x::ushort_vec vin)
62 {
63  ushort16 m1 = __max(vin.even(), vin.odd());
64  ushort8 m2 = __max(m1.even(), m1.odd());
65  ushort4 m3 = __max(m2.even(), m2.odd());
66  ushort2 m4 = __max(m3.even(), m3.odd());
67  uint64_t retVal = (uint64_t) __max(m4.even(), m4.odd());
68  return retVal;
69 }
70 template <> inline uint64_t c7x_horizontal_max(c7x::uint_vec vin)
71 {
72  uint64_t retVal = (uint64_t) (__sort_asc((uint16) vin).sf());
73  return retVal;
74 }
75 template <> inline uint64_t c7x_horizontal_max(c7x::ulong_vec vin)
76 {
77  ulong4 m1 = __max(vin.even(), vin.odd());
78  ulong2 m2 = __max(m1.even(), m1.odd());
79  uint64_t retVal = (uint64_t) __max(m2.even(), m2.odd());
80  return retVal;
81 }
82 
83 // Horizontal max computation with index
84 
85 template <typename V, typename W>
86 inline void c7x_horizontal_max_with_index(V maxValVec, V vIdx, W *maxVal, int *maxIdx);
87 
88 template <>
89 inline void c7x_horizontal_max_with_index(c7x::float_vec maxValVec, c7x::float_vec vIdx, float *maxVal, int *maxIdx)
90 {
91  __vpred vpMask;
92  vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
93  float8 maxValVec1 = (float8) __select(vpMask, maxValVec.odd(), maxValVec.even());
94  float8 vIdx1 = (float8) __select(vpMask, vIdx.odd(), vIdx.even());
95 
96  vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
97  float4 maxValVec2 = (float4) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
98  float4 vIdx2 = (float4) __select(vpMask, vIdx1.odd(), vIdx1.even());
99 
100  vpMask = __cmp_lt_pred(maxValVec2.even(), maxValVec2.odd());
101  float2 maxValVec3 = (float2) __select(vpMask, maxValVec2.odd(), maxValVec2.even());
102  float2 vIdx3 = (float2) __select(vpMask, vIdx2.odd(), vIdx2.even());
103 
104  vpMask = __cmp_lt_pred(maxValVec3.even(), maxValVec3.odd());
105  *maxVal = (float) __select(vpMask, maxValVec3.odd(), maxValVec3.even());
106  *maxIdx = ((int) __select(vpMask, vIdx3.odd(), vIdx3.even()));
107 }
108 
109 template <>
110 inline void c7x_horizontal_max_with_index(c7x::double_vec maxValVec, c7x::double_vec vIdx, double *maxVal, int *maxIdx)
111 {
112  __vpred vpMask;
113  vpMask = __cmp_lt_pred(maxValVec.even(), maxValVec.odd());
114  double4 maxValVec1 = (double4) __select(vpMask, maxValVec.odd(), maxValVec.even());
115  double4 vIdx1 = (double4) __select(vpMask, vIdx.odd(), vIdx.even());
116 
117  vpMask = __cmp_lt_pred(maxValVec1.even(), maxValVec1.odd());
118  double2 maxValVec2 = (double2) __select(vpMask, maxValVec1.odd(), maxValVec1.even());
119  double2 vIdx2 = (double2) __select(vpMask, vIdx1.odd(), vIdx1.even());
120 
121  vpMask = __cmp_lt_pred(maxValVec2.even(), maxValVec2.odd());
122  *maxVal = (double) __select(vpMask, maxValVec2.odd(), maxValVec2.even());
123  *maxIdx = ((int) __select(vpMask, vIdx2.odd(), vIdx2.even()));
124 }
125 
126 template <>
127 inline void c7x_horizontal_max_with_index(c7x::char_vec maxValVec, c7x::char_vec vIdx, int8_t *maxVal, int *maxIdx)
128 {
129  __vpred vpMask;
130  typedef typename c7x::make_full_vector<int8_t>::type vec;
131 
132  char32 maxValVec1 = maxValVec.even();
133  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
134 
135  char16 maxValVec2 = maxValVec1.even();
136  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
137 
138  char8 maxValVec3 = maxValVec2.even();
139  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
140 
141  char4 maxValVec4 = maxValVec3.even();
142  maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
143 
144  char2 maxValVec5 = maxValVec4.even();
145  maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
146 
147  int8_t maxValVec6 = maxValVec5.even();
148  maxValVec6 = __max(maxValVec5.odd(), maxValVec6);
149  *maxVal = (int8_t) maxValVec6;
150  int8_t maxValScaler = (int8_t) maxValVec6;
151 
152  char64 zero_vec = vec(0);
153  char64 diff_vector = vec(maxValScaler) - maxValVec;
154  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
155  char64 masked_indices = __select(vpMask, vIdx, vec(-1));
156  uchar64 umasked_indices = __as_uchar64(masked_indices);
157 
158  uchar32 vIdx1 = (uchar32) __min(umasked_indices.even(), umasked_indices.odd());
159  uchar16 vIdx2 = (uchar16) __min(vIdx1.even(), vIdx1.odd());
160  uchar8 vIdx3 = (uchar8) __min(vIdx2.even(), vIdx2.odd());
161  uchar4 vIdx4 = (uchar4) __min(vIdx3.even(), vIdx3.odd());
162  uchar2 vIdx5 = (uchar2) __min(vIdx4.even(), vIdx4.odd());
163  *maxIdx = (uint8_t) __min(vIdx5.even(), vIdx5.odd());
164 }
165 
166 template <>
167 inline void c7x_horizontal_max_with_index(c7x::short_vec maxValVec, c7x::short_vec vIdx, int16_t *maxVal, int *maxIdx)
168 {
169  __vpred vpMask;
170  typedef typename c7x::make_full_vector<int16_t>::type vec;
171 
172  vec sortIn = __vdsortdd16h_vv(maxValVec);
173  *maxVal = (short) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
174 
175  short maxValScaler = (short) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
176 
177  short32 zero_vec = vec(0);
178  short32 diff_vector = vec(maxValScaler) - maxValVec;
179  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
180  short32 masked_indices = __select(vpMask, vIdx, vec(255));
181  short32 sorted_indices = __vdsortii16h_vv(masked_indices);
182 
183  *maxIdx = (sorted_indices.s[0] < sorted_indices.s[16]) ? sorted_indices.s[0] : sorted_indices.s[16];
184 }
185 
186 template <>
187 inline void c7x_horizontal_max_with_index(c7x::int_vec maxValVec, c7x::int_vec vIdx, int32_t *maxVal, int *maxIdx)
188 {
189  __vpred vpMask;
190  typedef typename c7x::make_full_vector<int32_t>::type vec;
191 
192  vec sortIn = __sort_desc(maxValVec);
193  *maxVal = (int) sortIn.s[0];
194 
195  int maxValScaler = (int) sortIn.s[0];
196 
197  int16 zero_vec = vec(0);
198  int16 diff_vector = vec(maxValScaler) - maxValVec;
199  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
200  int16 masked_indices = __select(vpMask, vIdx, vec(255));
201  int16 sorted_indices = __sort_asc(masked_indices);
202 
203  *maxIdx = sorted_indices.s[0];
204 }
205 
206 template <>
207 inline void c7x_horizontal_max_with_index(c7x::long_vec maxValVec, c7x::long_vec vIdx, int64_t *maxVal, int *maxIdx)
208 {
209  __vpred vpMask;
210  typedef typename c7x::make_full_vector<int64_t>::type vec;
211 
212  long4 maxValVec1 = maxValVec.even();
213  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
214 
215  long2 maxValVec2 = maxValVec1.even();
216  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
217 
218  long maxValVec3 = maxValVec2.even();
219  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
220  *maxVal = (long) maxValVec3;
221 
222  long maxValScaler = (long) maxValVec3;
223 
224  long8 zero_vec = vec(0);
225  long8 diff_vector = vec(maxValScaler) - maxValVec;
226  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
227  long8 maxIdxVec = vec(255);
228  long8 masked_indices = __select(vpMask, vIdx, maxIdxVec);
229 
230  long4 vIdx1 = (long4) __min(masked_indices.even(), masked_indices.odd());
231  long2 vIdx2 = (long2) __min(vIdx1.even(), vIdx1.odd());
232  *maxIdx = (long) __min(vIdx2.even(), vIdx2.odd());
233 }
234 
235 template <>
236 inline void c7x_horizontal_max_with_index(c7x::uchar_vec maxValVec, c7x::uchar_vec vIdx, uint8_t *maxVal, int *maxIdx)
237 {
238  __vpred vpMask;
239  typedef typename c7x::make_full_vector<uint8_t>::type vec;
240 
241  uchar32 maxValVec1 = maxValVec.even();
242  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
243 
244  uchar16 maxValVec2 = maxValVec1.even();
245  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
246 
247  uchar8 maxValVec3 = maxValVec2.even();
248  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
249 
250  uchar4 maxValVec4 = maxValVec3.even();
251  maxValVec4 = __max(maxValVec3.odd(), maxValVec4);
252 
253  uchar2 maxValVec5 = maxValVec4.even();
254  maxValVec5 = __max(maxValVec4.odd(), maxValVec5);
255 
256  uint8_t maxValVec6 = maxValVec5.even();
257  maxValVec6 = __max(maxValVec5.odd(), maxValVec6);
258  *maxVal = (uint8_t) maxValVec6;
259  uint8_t maxValScaler = (uint8_t) maxValVec6;
260 
261  uchar64 zero_vec = vec(0);
262  uchar64 diff_vector = vec(maxValScaler) - maxValVec;
263  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
264  uchar64 maxIdxVec = vec(255);
265  uchar64 masked_indices = __select(vpMask, vIdx, maxIdxVec);
266 
267  uchar32 vIdx1 = (uchar32) __min(masked_indices.even(), masked_indices.odd());
268  uchar16 vIdx2 = (uchar16) __min(vIdx1.even(), vIdx1.odd());
269  uchar8 vIdx3 = (uchar8) __min(vIdx2.even(), vIdx2.odd());
270  uchar4 vIdx4 = (uchar4) __min(vIdx3.even(), vIdx3.odd());
271  uchar2 vIdx5 = (uchar2) __min(vIdx4.even(), vIdx4.odd());
272  *maxIdx = (int) __min(vIdx5.even(), vIdx5.odd());
273 }
274 
275 template <>
276 inline void
277 c7x_horizontal_max_with_index(c7x::ushort_vec maxValVec, c7x::ushort_vec vIdx, uint16_t *maxVal, int *maxIdx)
278 {
279  __vpred vpMask;
280  typedef typename c7x::make_full_vector<uint16_t>::type vec;
281 
282  vec sortIn = __vdsortddu16h_vv(maxValVec);
283  *maxVal = (ushort) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
284 
285  ushort maxValScaler = (ushort) (sortIn.lo().s[0] > sortIn.hi().s[0] ? sortIn.lo().s[0] : sortIn.hi().s[0]);
286 
287  ushort32 zero_vec = vec(0);
288  ushort32 diff_vector = vec(maxValScaler) - maxValVec;
289  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
290  ushort32 masked_indices = __select(vpMask, vIdx, vec(255));
291  ushort32 sorted_indices = __vdsortiiu16h_vv(masked_indices);
292 
293  *maxIdx = (sorted_indices.s[0] < sorted_indices.s[16]) ? sorted_indices.s[0] : sorted_indices.s[16];
294 }
295 
296 template <>
297 inline void c7x_horizontal_max_with_index(c7x::uint_vec maxValVec, c7x::uint_vec vIdx, uint32_t *maxVal, int *maxIdx)
298 {
299  __vpred vpMask;
300  typedef typename c7x::make_full_vector<uint32_t>::type vec;
301 
302  vec sortIn = __sort_desc(maxValVec);
303  *maxVal = (uint) sortIn.s[0];
304 
305  uint maxValScaler = (uint) sortIn.s[0];
306 
307  uint16 zero_vec = vec(0);
308  uint16 diff_vector = vec(maxValScaler) - maxValVec;
309  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
310  uint16 masked_indices = __select(vpMask, vIdx, vec(255));
311  uint16 sorted_indices = __sort_asc(masked_indices);
312 
313  *maxIdx = sorted_indices.s[0];
314 }
315 
316 template <>
317 inline void c7x_horizontal_max_with_index(c7x::ulong_vec maxValVec, c7x::ulong_vec vIdx, uint64_t *maxVal, int *maxIdx)
318 {
319  __vpred vpMask;
320  typedef typename c7x::make_full_vector<uint64_t>::type vec;
321  ulong4 maxValVec1 = maxValVec.even();
322  maxValVec1 = __max(maxValVec.odd(), maxValVec1);
323 
324  ulong2 maxValVec2 = maxValVec1.even();
325  maxValVec2 = __max(maxValVec1.odd(), maxValVec2);
326 
327  ulong maxValVec3 = maxValVec2.even();
328  maxValVec3 = __max(maxValVec2.odd(), maxValVec3);
329  *maxVal = (ulong) maxValVec3;
330 
331  ulong maxValScaler = (ulong) maxValVec3;
332 
333  ulong8 zero_vec = vec(0);
334  ulong8 diff_vector = vec(maxValScaler) - maxValVec;
335  vpMask = __cmp_eq_pred(diff_vector, zero_vec);
336  ulong8 maxIdxVec = vec(255);
337  ulong8 masked_indices = __select(vpMask, vIdx, maxIdxVec);
338 
339  ulong4 vIdx1 = (ulong4) __min(masked_indices.even(), masked_indices.odd());
340  ulong2 vIdx2 = (ulong2) __min(vIdx1.even(), vIdx1.odd());
341  *maxIdx = (ulong) __min(vIdx2.even(), vIdx2.odd());
342 }
343 
344 // Convert long to int
345 
346 template <typename X, typename Y> inline typename c7x::make_full_vector<X>::type convert_long_to_int(Y vec);
347 
348 template <> inline c7x::make_full_vector<int32_t>::type convert_long_to_int<int16, long8>(long8 vec)
349 {
350  typedef typename c7x::make_full_vector<int32_t>::type vecRet;
351  vecRet v8bits = __as_int16(vec);
352  return v8bits;
353 }
354 
355 template <> inline c7x::make_full_vector<uint32_t>::type convert_long_to_int<uint16, ulong8>(ulong8 vec)
356 {
357  typedef typename c7x::make_full_vector<uint32_t>::type vecRet;
358  vecRet v8bits = __as_uint16(vec);
359  return v8bits;
360 }
361 
362 // Convert char to short
363 
364 template <typename X, typename Y>
365 inline typename c7x::make_full_vector<X>::type convert_char_to_short(Y vecIn, bool typeIndex);
366 
367 template <>
368 inline c7x::make_full_vector<int16_t>::type convert_char_to_short<short32, char32>(char32 vecIn, bool typeIndex)
369 {
370  typedef typename c7x::make_full_vector<int16_t>::type vecRet;
371  vecRet vecOut;
372  if (typeIndex) {
373  uchar32 vecInConv = __as_uchar32(vecIn);
374  vecOut = __convert_short32(vecInConv);
375  }
376  else {
377  vecOut = __convert_short32(vecIn);
378  }
379  return vecOut;
380 }
381 
382 template <>
383 inline c7x::make_full_vector<uint16_t>::type convert_char_to_short<ushort32, uchar32>(uchar32 vecIn, bool typeIndex)
384 {
385  typedef typename c7x::make_full_vector<uint16_t>::type vecRet;
386  vecRet vecOut = __convert_ushort32(vecIn);
387  return vecOut;
388 }
389 
390 // Multiply 2 char and result in short
391 
392 template <typename X, typename Y> inline void mul_char_to_short(Y vecIn1, Y vecIn2, X vecOut1, X vecOut2);
393 
394 template <>
395 inline void
396 mul_char_to_short<short32 &, char32>(char32 vecIn1, char32 vecIn2, c7x::short_vec &vecOut1Short, c7x::short_vec &vecOut2)
397 {
398  short32 vecInShort1 = __convert_short32(vecIn1);
399  short32 vecInShort2 = __convert_short32(vecIn2);
400  vecOut1Short = vecInShort1 * vecInShort2;
401 }
402 
403 template <>
404 inline void
405 mul_char_to_short<ushort32 &, uchar32>(uchar32 vecIn1, uchar32 vecIn2, c7x::ushort_vec &vecOut1uShort, c7x::ushort_vec &vecEx)
406 {
407  vecOut1uShort = __mpy_ext(vecIn1, vecIn2);
408 }
409 template <typename dataType, typename V> inline dataType c7x_horizontal_max_fp(V vin);
410 
411 template <> inline float c7x_horizontal_max_fp(c7x::float_vec vin)
412 {
413 
414  float8 vin1 = __max(vin.hi(), vin.lo());
415  float4 vin2 = __max(vin1.hi(), vin1.lo());
416  float2 vin3 = __max(vin2.hi(), vin2.lo());
417  float maxVal = __max(vin3.hi(), vin3.lo());
418  return maxVal;
419 }
420 
421 template <> inline double c7x_horizontal_max_fp(c7x::double_vec vin)
422 {
423  double4 vin1 = __max(vin.hi(), vin.lo());
424  double2 vin2 = __max(vin1.hi(), vin1.lo());
425  double maxVal = __max(vin2.hi(), vin2.lo());
426  return maxVal;
427 }
428 
429 template <typename dataType, typename V> inline dataType c7x_horizontal_min_fp(V vin);
430 template <> inline float c7x_horizontal_min_fp(c7x::float_vec vin)
431 {
432 
433  float8 vin1 = __min(vin.hi(), vin.lo());
434  float4 vin2 = __min(vin1.hi(), vin1.lo());
435  float2 vin3 = __min(vin2.hi(), vin2.lo());
436  float minVal = __min(vin3.hi(), vin3.lo());
437  return minVal;
438 }
439 
440 template <> inline double c7x_horizontal_min_fp(c7x::double_vec vin)
441 {
442  double4 vin1 = __min(vin.hi(), vin.lo());
443  double2 vin2 = __min(vin1.hi(), vin1.lo());
444  double minVal = __min(vin2.hi(), vin2.lo());
445  return minVal;
446 }
447 
448 
449 template <typename V, typename W> inline void c7x_horizontal_add(V inVec, W *horizontalSum);
450 
451 template <> inline void c7x_horizontal_add(c7x::float_vec inVec, float *horizontalSum)
452 {
453  float8 inVec1 = inVec.hi() + inVec.lo();
454  float4 inVec2 = inVec1.hi() + inVec1.lo();
455  float2 inVec3 = inVec2.hi() + inVec2.lo();
456  *horizontalSum = inVec3.hi() + inVec3.lo();
457 }
458 
459 template <> inline void c7x_horizontal_add(c7x::double_vec inVec, double *horizontalSum)
460 {
461  double4 inVec1 = inVec.hi() + inVec.lo();
462  double2 inVec2 = inVec1.hi() + inVec1.lo();
463  *horizontalSum = inVec2.hi() + inVec2.lo();
464 }
465 
466 template <typename V, typename W>
467 inline void c7x_horizontal_min_with_index(V minValVec, V vIdx, W *minVal, int *minIdx);
468 template <>
469 inline void c7x_horizontal_min_with_index(c7x::float_vec minValVec, c7x::float_vec vIdx, float *minVal, int *minIdx)
470 {
471  __vpred vpMask;
472  vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
473  float8 minValVec1 = (float8) __select(vpMask, minValVec.even(), minValVec.odd());
474  float8 vIdx1 = (float8) __select(vpMask, vIdx.even(), vIdx.odd());
475 
476  vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
477  float4 minValVec2 = (float4) __select(vpMask, minValVec1.even(), minValVec1.odd());
478  float4 vIdx2 = (float4) __select(vpMask, vIdx1.even(), vIdx1.odd());
479 
480  vpMask = __cmp_lt_pred(minValVec2.even(), minValVec2.odd());
481  float2 minValVec3 = (float2) __select(vpMask, minValVec2.even(), minValVec2.odd());
482  float2 vIdx3 = (float2) __select(vpMask, vIdx2.even(), vIdx2.odd());
483 
484  vpMask = __cmp_lt_pred(minValVec3.even(), minValVec3.odd());
485  *minVal = (float) __select(vpMask, minValVec3.even(), minValVec3.odd());
486  *minIdx = ((int) __select(vpMask, vIdx3.even(), vIdx3.odd()));
487 }
488 
489 template <>
490 inline void c7x_horizontal_min_with_index(c7x::double_vec minValVec, c7x::double_vec vIdx, double *minVal, int *minIdx)
491 {
492  __vpred vpMask;
493  vpMask = __cmp_lt_pred(minValVec.even(), minValVec.odd());
494  double4 minValVec1 = (double4) __select(vpMask, minValVec.even(), minValVec.odd());
495  double4 vIdx1 = (double4) __select(vpMask, vIdx.even(), vIdx.odd());
496 
497  vpMask = __cmp_lt_pred(minValVec1.even(), minValVec1.odd());
498  double2 minValVec2 = (double2) __select(vpMask, minValVec1.even(), minValVec1.odd());
499  double2 vIdx2 = (double2) __select(vpMask, vIdx1.even(), vIdx1.odd());
500 
501  vpMask = __cmp_lt_pred(minValVec2.even(), minValVec2.odd());
502  *minVal = (double) __select(vpMask, minValVec2.even(), minValVec2.odd());
503  *minIdx = ((int) __select(vpMask, vIdx2.even(), vIdx2.odd()));
504 }
505 #endif
506 
507 #endif