DSPLIB User Guide
DSPLIB_max_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date 10/2/22 Author: Asheesh Bhardwaj
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "../common/c71/DSPLIB_inlines.h"
48 #include "DSPLIB_max_priv.h"
49 #include <float.h>
50 #include <limits>
51 
52 #define UNROLL_FACTOR 4
53 
54 template <typename dataType>
56  const DSPLIB_bufParams1D_t *bufParamsIn,
57  const DSPLIB_bufParams1D_t *bufParamsOut,
58  const DSPLIB_max_InitArgs *pKerInitArgs)
59 {
61  __SE_TEMPLATE_v1 se0Params, se1Params;
62 
63  __SE_ELETYPE SE_ELETYPE;
64  __SE_VECLEN SE_VECLEN;
65 
66  DSPLIB_max_PrivArgs *pKerPrivArgs = (DSPLIB_max_PrivArgs *) handle;
67 
68  uint8_t *pBlock = pKerPrivArgs->bufPblock;
69  uint32_t blockSize = pKerPrivArgs->blockSize;
70 
71  typedef typename c7x::make_full_vector<dataType>::type vec;
72  uint32_t eleCount = c7x::element_count_of<vec>::value;
73  SE_VECLEN = c7x::se_veclen<vec>::value;
74  SE_ELETYPE = c7x::se_eletype<vec>::value;
75 
76  uint32_t length = blockSize;
77  uint32_t width = eleCount;
78 
79 #if DSPLIB_DEBUGPRINT
80  printf("Enter eleCount %d\n", eleCount);
81 #endif
82 
83  /**********************************************************************/
84  /* Prepare streaming engine 0,1 to fetch the input */
85  /**********************************************************************/
86  se0Params = __gen_SE_TEMPLATE_v1();
87 
88  // default SE0 parameters
89  se0Params.ICNT0 = width;
90  se0Params.ELETYPE = SE_ELETYPE;
91  se0Params.VECLEN = SE_VECLEN;
92  se0Params.DIMFMT = __SE_DIMFMT_1D;
93 
94  se1Params = __gen_SE_TEMPLATE_v1();
95 
96  // default SE1 parameters
97  se1Params.ICNT0 = width;
98  se1Params.ELETYPE = SE_ELETYPE;
99  se1Params.VECLEN = SE_VECLEN;
100  se1Params.DIMFMT = __SE_DIMFMT_1D;
101 
102  // printf("\nsetup se done\n");
103 
104  uint32_t numBlocks = length / width;
105  uint32_t remBlocksSize = length % width;
106  if (remBlocksSize){
107  numBlocks++;
108  }
109 
110  // case: length of input <= width
111  // one SE fetch is length elements, rest of vec filled with '0'
112  if (length <= width) {
113 
114  // printf("\ninit len < width\n");
115  // SE0 fetch length
116  se0Params.ICNT0 = length;
117  // SE1 not used
118  }
119 
120  // case: length of input is > width but < 2*width
121  // SE0 fetch is one full width, SE1 fetch is partial fetch, rest of vec filled with '0'
122  else if (length < 2 * width) {
123  // printf("\ninit between 1 and 2\n");
124 
125  // SE0 full fetch
126  se0Params.ICNT0 = width;
127  // SE1 partial fetch
128  se1Params.ICNT0 = remBlocksSize;
129  }
130 
131  // case: len >= 2*width but < 3*width
132  // SE0 and SE1 full fetch, remainder from direct memory
133  else if (length < 3 * width) {
134  // SE0 full fetch
135  se0Params.ICNT0 = width;
136  // SE1 full fetch
137  se1Params.ICNT0 = width;
138  }
139 
140  // case: len >= 3*width but < 4*width
141  // 3 full fetches - SE0 2 times, SE1 once
142  else if (length < 4 * width) {
143  // SE0 2 full fetches
144  se0Params.DIMFMT = __SE_DIMFMT_2D;
145  se0Params.DIM1 = 2 * width;
146  se0Params.ICNT1 = 2;
147  se0Params.ICNT0 = width;
148  // SE1 partial fetch
149  se1Params.ICNT0 = width;
150  }
151 
152  // case: len >= 4*width
153  // SE0 and SE1 fetches are full widths only
154  else {
155 
156  // printf("\ninside len > 4SIMD\n");
157  // SE0 Dim is 2D
158  se0Params.DIMFMT = __SE_DIMFMT_2D;
159  // SE0 jump length each get_adv is 2 widths
160  se0Params.DIM1 = 2 * width;
161  // SE only performs full fetches in multiples of UNROLL_FACTOR, i.e. 4
162  se0Params.ICNT1 = length / (((uint32_t)UNROLL_FACTOR >> (uint32_t)1) * width);
163  // SE0 fetches full widths
164  se0Params.ICNT0 = width;
165 
166  // SE1 fetches in same manner as SE0, but starts 1 width ahead
167  se1Params = se0Params;
168  }
169 
170  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
171  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE1_PARAM_OFFSET) = se1Params;
172  // printf("\ninit done\n");
173 
174  return status;
175 }
176 
178  const DSPLIB_bufParams1D_t *bufParamsIn,
179  const DSPLIB_bufParams1D_t *bufParamsOut,
180  const DSPLIB_max_InitArgs *pKerInitArgs);
181 
183  const DSPLIB_bufParams1D_t *bufParamsIn,
184  const DSPLIB_bufParams1D_t *bufParamsOut,
185  const DSPLIB_max_InitArgs *pKerInitArgs);
186 
188  const DSPLIB_bufParams1D_t *bufParamsIn,
189  const DSPLIB_bufParams1D_t *bufParamsOut,
190  const DSPLIB_max_InitArgs *pKerInitArgs);
191 
193  const DSPLIB_bufParams1D_t *bufParamsIn,
194  const DSPLIB_bufParams1D_t *bufParamsOut,
195  const DSPLIB_max_InitArgs *pKerInitArgs);
196 
198  const DSPLIB_bufParams1D_t *bufParamsIn,
199  const DSPLIB_bufParams1D_t *bufParamsOut,
200  const DSPLIB_max_InitArgs *pKerInitArgs);
201 
203  const DSPLIB_bufParams1D_t *bufParamsIn,
204  const DSPLIB_bufParams1D_t *bufParamsOut,
205  const DSPLIB_max_InitArgs *pKerInitArgs);
206 
208  const DSPLIB_bufParams1D_t *bufParamsIn,
209  const DSPLIB_bufParams1D_t *bufParamsOut,
210  const DSPLIB_max_InitArgs *pKerInitArgs);
211 
213  const DSPLIB_bufParams1D_t *bufParamsIn,
214  const DSPLIB_bufParams1D_t *bufParamsOut,
215  const DSPLIB_max_InitArgs *pKerInitArgs);
216 
217 template <typename dataType, int32_t dataIn>
218 DSPLIB_STATUS DSPLIB_max_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
219 {
220  DSPLIB_max_PrivArgs *pKerPrivArgs = (DSPLIB_max_PrivArgs *) handle;
221  uint32_t blockSize = pKerPrivArgs->blockSize;
222 
223  DSPLIB_STATUS status = DSPLIB_SUCCESS;
224 
225  __SE_TEMPLATE_v1 se0Params, se1Params;
226  // __SA_TEMPLATE_v1 sa0Params;
227 
228  dataType *restrict pInLocal = (dataType *) pIn;
229  dataType *restrict pOutLocal = (dataType *) pOut;
230 
231 #if DSPLIB_DEBUGPRINT
232  printf("Enter DSPLIB_max_exec_ci\n");
233 #endif
234 
235  typedef typename c7x::make_full_vector<dataType>::type vec;
236  uint32_t eleCount = c7x::element_count_of<vec>::value;
237 
238 #if DSPLIB_DEBUGPRINT
239  printf("Enter eleCount %d\n", eleCount);
240 #endif
241 
242  uint8_t *pBlock = pKerPrivArgs->bufPblock;
243  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
244  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE1_PARAM_OFFSET);
245 
246  uint32_t length = blockSize;
247  uint32_t width = eleCount;
248 
249  // Input samples
250  __SE0_OPEN(pInLocal, se0Params);
251  if (length > width){
252  __SE1_OPEN(pInLocal + eleCount, se1Params);
253  }
254 
255  // // Output samples
256  // __SA0_OPEN(sa0Params);
257 
258 #if DSPLIB_DEBUGPRINT
259  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
260 #endif
261 
262  // call loop logic method
263  vec maxVec = DSPLIB_max_loopLogic<dataType, vec>(blockSize, eleCount, pInLocal);
264 
265  dataType *currentMax = (dataType *) &maxVec;
266  dataType largest = *currentMax++;
267  dataType currentValue;
268 
269  for (size_t i = 1; i < c7x::element_count_of<vec>::value; i++) {
270  currentValue = *currentMax;
271 
272  if (currentValue > largest) {
273  largest = currentValue;
274  }
275 
276  currentMax++;
277  }
278 
279  *pOutLocal = largest;
280 
281  __SE0_CLOSE();
282  if (length > width){
283  __SE1_CLOSE();
284  }
285 
286  return status;
287 }
288 
289 template <>
291 DSPLIB_max_exec_ci<int8_t, DSPLIB_INT8>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
292 {
293 
294  DSPLIB_max_PrivArgs *pKerPrivArgs = (DSPLIB_max_PrivArgs *) handle;
295  uint32_t blockSize = pKerPrivArgs->blockSize;
296 
297  DSPLIB_STATUS status = DSPLIB_SUCCESS;
298 
299  __SE_TEMPLATE_v1 se0Params;
300 
301  int8_t *restrict pInLocal = (int8_t *) pIn;
302  int8_t *restrict pOutLocal = (int8_t *) pOut;
303 
304 #if DSPLIB_DEBUGPRINT
305  printf("Enter DSPLIB_max_exec_ci\n");
306 #endif
307 
308  typedef typename c7x::make_full_vector<int8_t>::type vec;
309  uint32_t eleCount = c7x::element_count_of<vec>::value;
310 
311 #if DSPLIB_DEBUGPRINT
312  printf("Enter eleCount %d\n", eleCount);
313 #endif
314 
315  uint8_t *pBlock = pKerPrivArgs->bufPblock;
316  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
317 
318  // Input samples
319  __SE0_OPEN(pInLocal, se0Params);
320  __SE1_OPEN(pInLocal + eleCount, se0Params);
321 
322  // call loop logic method
323  vec maxVec = DSPLIB_max_loopLogic<int8_t, vec>(blockSize, eleCount, pInLocal);
324 
325  c7x::short_vec lowShorts = __low_char_to_short(maxVec);
326  c7x::short_vec highShorts = __high_char_to_short(maxVec);
327  c7x::int_vec lowlowInts = __low_short_to_int(lowShorts);
328  c7x::int_vec highlowInts = __high_short_to_int(lowShorts);
329  c7x::int_vec lowhighInts = __low_short_to_int(highShorts);
330  c7x::int_vec highhighInts = __high_short_to_int(highShorts);
331  c7x::int_vec lowmax = __max(lowlowInts, highlowInts);
332  c7x::int_vec highmax = __max(lowhighInts, highhighInts);
333  c7x::int_vec maxOfInts = __max(lowmax, highmax);
334  maxOfInts = __sort_desc(maxOfInts); // this intrinsic contains valgrind errors
335  // convert back to char
336  *pOutLocal = (int8_t) maxOfInts.s[0];
337  // close SE0, SE1
338  __SE0_CLOSE();
339  __SE1_CLOSE();
340 
341  return status;
342 }
343 
344 template <>
346 DSPLIB_max_exec_ci<uint8_t, DSPLIB_UINT8>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
347 {
348 
349  DSPLIB_max_PrivArgs *pKerPrivArgs = (DSPLIB_max_PrivArgs *) handle;
350  uint32_t blockSize = pKerPrivArgs->blockSize;
351 
352  DSPLIB_STATUS status = DSPLIB_SUCCESS;
353 
354  __SE_TEMPLATE_v1 se0Params;
355 
356  uint8_t *restrict pInLocal = (uint8_t *) pIn;
357  uint8_t *restrict pOutLocal = (uint8_t *) pOut;
358 
359 #if DSPLIB_DEBUGPRINT
360  printf("Enter DSPLIB_max_exec_ci\n");
361 #endif
362 
363  typedef typename c7x::make_full_vector<uint8_t>::type vec;
364  uint32_t eleCount = c7x::element_count_of<vec>::value;
365 
366 #if DSPLIB_DEBUGPRINT
367  printf("Enter eleCount %d\n", eleCount);
368 #endif
369 
370  uint8_t *pBlock = pKerPrivArgs->bufPblock;
371  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
372 
373  // Input samples
374  __SE0_OPEN(pInLocal, se0Params);
375  __SE1_OPEN(pInLocal + eleCount, se0Params);
376 
377  // call loop logic method
378  vec maxVec = DSPLIB_max_loopLogic<uint8_t, vec>(blockSize, eleCount, pInLocal);
379 
380  c7x::ushort_vec lowShorts = __low_uchar_to_ushort(maxVec);
381  c7x::ushort_vec highShorts = __high_uchar_to_ushort(maxVec);
382  c7x::uint_vec maxOfInts = __max((__max((__low_ushort_to_uint(lowShorts)), (__high_ushort_to_uint(lowShorts)))),
383  (__max((__low_ushort_to_uint(highShorts)), (__high_ushort_to_uint(highShorts)))));
384  maxOfInts = __sort_desc(maxOfInts);
385  // convert back to char
386  *pOutLocal = (uint8_t) maxOfInts.s[0];
387  // close SE0, SE1
388  __SE0_CLOSE();
389  __SE1_CLOSE();
390 
391  return status;
392 }
393 
394 template <>
396 DSPLIB_max_exec_ci<int16_t, DSPLIB_INT16>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
397 {
398 
399  DSPLIB_max_PrivArgs *pKerPrivArgs = (DSPLIB_max_PrivArgs *) handle;
400  uint32_t blockSize = pKerPrivArgs->blockSize;
401 
402  DSPLIB_STATUS status = DSPLIB_SUCCESS;
403 
404  __SE_TEMPLATE_v1 se0Params;
405 
406  int16_t *restrict pInLocal = (int16_t *) pIn;
407  int16_t *restrict pOutLocal = (int16_t *) pOut;
408 
409 #if DSPLIB_DEBUGPRINT
410  printf("Enter DSPLIB_max_exec_ci\n");
411 #endif
412 
413  typedef typename c7x::make_full_vector<int16_t>::type vec;
414  uint32_t eleCount = c7x::element_count_of<vec>::value;
415 
416 #if DSPLIB_DEBUGPRINT
417  printf("Enter eleCount %d\n", eleCount);
418 #endif
419 
420  uint8_t *pBlock = pKerPrivArgs->bufPblock;
421  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
422 
423  // Input samples
424  __SE0_OPEN(pInLocal, se0Params);
425  __SE1_OPEN(pInLocal + eleCount, se0Params);
426 
427  // call loop logic method
428  vec maxVec = DSPLIB_max_loopLogic<int16_t, vec>(blockSize, eleCount, pInLocal);
429 
430  c7x::int_vec lowVec = __low_short_to_int(maxVec);
431  c7x::int_vec highVec = __high_short_to_int(maxVec);
432 
433  c7x::int_vec maxOfInts = __max(lowVec, highVec);
434  maxOfInts = __sort_desc(maxOfInts);
435  // convert back to char
436  *pOutLocal = (int16_t) maxOfInts.s[0];
437  // close SE0, SE1
438  __SE0_CLOSE();
439  __SE1_CLOSE();
440 
441  return status;
442 }
443 
444 template <>
446 DSPLIB_max_exec_ci<uint16_t, DSPLIB_UINT16>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
447 {
448 
449  DSPLIB_max_PrivArgs *pKerPrivArgs = (DSPLIB_max_PrivArgs *) handle;
450  uint32_t blockSize = pKerPrivArgs->blockSize;
451 
452  DSPLIB_STATUS status = DSPLIB_SUCCESS;
453 
454  __SE_TEMPLATE_v1 se0Params;
455 
456  uint16_t *restrict pInLocal = (uint16_t *) pIn;
457  uint16_t *restrict pOutLocal = (uint16_t *) pOut;
458 
459 #if DSPLIB_DEBUGPRINT
460  printf("Enter DSPLIB_max_exec_ci\n");
461 #endif
462 
463  typedef typename c7x::make_full_vector<uint16_t>::type vec;
464  uint32_t eleCount = c7x::element_count_of<vec>::value;
465 
466 #if DSPLIB_DEBUGPRINT
467  printf("Enter eleCount %d\n", eleCount);
468 #endif
469 
470  uint8_t *pBlock = pKerPrivArgs->bufPblock;
471  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
472 
473  // Input samples
474  __SE0_OPEN(pInLocal, se0Params);
475  __SE1_OPEN(pInLocal + eleCount, se0Params);
476 
477  // call loop logic method
478  vec maxVec = DSPLIB_max_loopLogic<uint16_t, vec>(blockSize, eleCount, pInLocal);
479 
480  c7x::uint_vec lowVec = __low_ushort_to_uint(maxVec);
481  c7x::uint_vec highVec = __high_ushort_to_uint(maxVec);
482  c7x::uint_vec maxOfInts = __max(lowVec, highVec);
483  maxOfInts = __sort_desc(maxOfInts);
484  // convert back to char
485  *pOutLocal = (uint16_t) maxOfInts.s[0];
486  // close SE0, SE1
487  __SE0_CLOSE();
488  __SE1_CLOSE();
489 
490  return status;
491 }
492 
494 DSPLIB_max_exec_ci<int32_t, DSPLIB_INT32>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut);
495 
497 DSPLIB_max_exec_ci<uint32_t, DSPLIB_UINT32>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut);
498 
500 DSPLIB_max_exec_ci<float, DSPLIB_FLOAT32>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut);
501 
503 DSPLIB_max_exec_ci<double, DSPLIB_FLOAT64>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut);
504 
505 template <typename T, typename vec> vec DSPLIB_max_loopLogic(size_t length, size_t width, T *pSrc)
506 {
507  vec maxVal0 = (vec) std::numeric_limits<T>::lowest();
508  vec maxVal1 = maxVal0;
509  vec maxVec = maxVal1; // vector that holds the maximum values at the end of the loops
510 
511  // can only fill part of one width
512  if (length <= width) {
513  // printf("\nlength <= width\n");
514  maxVec = c7x::strm_eng<0, vec>::get_adv();
515  // for (size_t i = 0; i < length; i++) {
516  // maxVec.s[i] = pSrc[i];
517  // }
518  // maxVec.print();
519  // fill the uninitialized values with MIN_VAL (different from ::min() because ::min() is 0 for float type)
520  for (size_t i = length; i < width; i++) {
521  maxVec.s[i] = std::numeric_limits<T>::lowest();
522  }
523 
524  // debug
525  // maxVec.print();
526  }
527  // can fill one with, but only part of a second
528  else if (length < 2 * width) {
529  // printf("\nbetween 1 and 2\n");
530 
531  vec inVec0 = c7x::strm_eng<0, vec>::get_adv();
532  maxVal0 = __max(inVec0, maxVal0);
533  vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
534  // fill the uninitialized values with MIN_VAL
535  size_t remElements = length % width;
536  for (size_t i = remElements; i < width; i++) {
537  inVec1.s[i] = std::numeric_limits<T>::lowest();
538  }
539  maxVal1 = __max(inVec1, maxVal1);
540  maxVec = __max(maxVal0, maxVal1);
541  }
542  // can fill two widths, but only part of a third
543  else if (length < 3 * width) {
544  // printf("\nbetween 2 and 3\n");
545 
546  vec inVec0 = c7x::strm_eng<0, vec>::get_adv();
547  vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
548  vec maxOfFirstTwo = __max(inVec0, inVec1);
549  vec inVec2 = *(vec *) (pSrc + length - width);
550  // fill the uninitialized values with MIN_VAL
551  // size_t remainder = length % width;
552  // for (size_t i = remainder; i < width; i++) {
553  // inVec2.s[i] = std::numeric_limits<T>::lowest();
554  // }
555  maxVec = __max(maxOfFirstTwo, inVec2);
556 
557  // debug
558  // maxVec.print();
559  }
560  // can fill three widths, but only part of a fourth
561  else if (length < 4 * width) {
562  // printf("\nbetween 3 and 4\n");
563 
564  vec inVec0 = c7x::strm_eng<0, vec>::get_adv();
565  vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
566  // inVec0.print();
567  // inVec1.print();
568  vec maxOfFirstTwo = __max(inVec0, inVec1);
569  // maxOfFirstTwo.print();
570 
571  vec inVec2 = c7x::strm_eng<0, vec>::get_adv();
572  vec inVec3 = *(vec *) (pSrc + length - width);
573  // inVec2.print();
574  // inVec3.print();
575 
576  // fill the uninitialized values with MIN_VAL
577  // size_t remainder = length % width;
578  // for (size_t i = remainder; i < width; i++) {
579  // inVec3.s[i] = std::numeric_limits<T>::lowest();
580  // }
581  vec maxOfLastTwo = __max(inVec2, inVec3);
582  maxVec = __max(maxOfFirstTwo, maxOfLastTwo);
583 
584  // debug
585  // maxVec.print();
586  }
587  else {
588  // redefine all of the max vectors for large loops since .s[i] calls make random calls to the stack even when not
589  // being used, which will increase the ii
590  vec maxValA = (vec) std::numeric_limits<T>::lowest();
591  vec maxValB = maxValA;
592  vec maxValC = maxValB;
593  vec maxValD = maxValC;
594  // loop through all the SIMD width blocks
595  size_t numIterations = length / (width * UNROLL_FACTOR); // always read a multiple of SIMD width values
596 
597  for (size_t i = 0; i < numIterations; i += 1) {
598  // put in parenthesis around the get advance call when it's being used in an argument to a c7x instrinsic
599  // because the Ninja compiler will not compile due to "not enough arguments"
600  maxValA = __max((c7x::strm_eng<0, vec>::get_adv()), maxValA);
601  maxValB = __max((c7x::strm_eng<1, vec>::get_adv()), maxValB);
602  maxValC = __max((c7x::strm_eng<0, vec>::get_adv()), maxValC);
603  maxValD = __max((c7x::strm_eng<1, vec>::get_adv()), maxValD);
604  }
605 
606  // number of remaining elements is less than 4 vector lengths
607  // read remaining vector elements after full iterations
608  // best case scenario: remaining is one vector length
609  // worst case scenario: remaining is three vectors
610  // printf("len: %d width: %d\n", length, width);
611  int32_t remBlockSize = length - (UNROLL_FACTOR * numIterations * width);
612  // printf("\nremblocksize: %d\n", remBlockSize);
613  int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
614  // printf("\nremveclen: %d\n", remVecLen);
615 
616  // vec remVec = (vec) std::numeric_limits<T>::max();
617 
618  T *remStart = pSrc + length - width;
619  vec remVec = *(vec *) remStart;
620  // 1 vector remainder
621  if (remBlockSize != 0 && remVecLen == 1) {
622 
623  // printf("\ninside length rem 1\n");
624  remVec = *(vec *) remStart;
625  // memcpy(&remVec, remStart, remBlockSize);
626  }
627 
628  // 2 vector remainder
629  else if (remBlockSize != 0 && remVecLen == 2) {
630 
631  // printf("\ninside length rem 2\n");
632  // vec remVec0 = c7x::strm_eng<0, vec>::get_adv();
633  vec remVec0 = *(vec *) (remStart - width);
634  remVec = __max(remVec0, remVec);
635  }
636 
637  // 3 vector remainder
638  else if (remBlockSize != 0 && remVecLen == 3) {
639 
640  // printf("\ninside length rem 3\n");
641  // vec remVec0 = c7x::strm_eng<0, vec>::get_adv();
642  // vec remVec1 = c7x::strm_eng<1, vec>::get_adv();
643  vec remVec0 = *(vec *) (remStart - width);
644  vec remVec1 = *(vec *) (remStart - 2 * width);
645 
646  vec remVec0_1 = __max(remVec0, remVec1);
647  remVec = __max(remVec0_1, remVec);
648  }
649 
650  // 4 vector remainder
651  else if (remBlockSize != 0 && remVecLen == 4) {
652 
653  // printf("\ninside length rem 4\n");
654  // vec remVec0 = c7x::strm_eng<0, vec>::get_adv();
655  // vec remVec1 = c7x::strm_eng<1, vec>::get_adv();
656  // vec remVec2 = c7x::strm_eng<0, vec>::get_adv();
657 
658  vec remVec0 = *(vec *) (remStart - width);
659  vec remVec1 = *(vec *) (remStart - 2 * width);
660  vec remVec2 = *(vec *) (remStart - 3 * width);
661  vec remVec0_1 = __max(remVec0, remVec1);
662  vec remVec2_3 = __max(remVec, remVec2);
663  remVec = __max(remVec0_1, remVec2_3);
664  }
665 
666  else {
667  /* Nothing to do here */
668  }
669 
670  // std::cout << "print maxvals" << std::endl;
671  // printf("\nmaxValA\n");
672  // maxValA.print();
673  // printf("\nmaxValB\n");
674  // maxValB.print();
675  // printf("\nmaxValC\n");
676  // maxValC.print();
677  // printf("\nmaxValD\n");
678  // maxValD.print();
679  vec temp1 = __max(maxValA, maxValB);
680  vec temp2 = __max(maxValC, maxValD);
681  vec maxVecA = __max(temp1, temp2);
682  vec maxRemVec = __max(maxVecA, remVec);
683  maxVec = maxRemVec;
684  }
685  return maxVec;
686 }
#define SE_SE0_PARAM_OFFSET
#define SE_SE1_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_max_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_max_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_max_exec_ci< int8_t, DSPLIB_INT8 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
vec DSPLIB_max_loopLogic(size_t length, size_t width, T *pSrc)
This function is the kernel loop helper function for the optimized implementation of the kernel....
template DSPLIB_STATUS DSPLIB_max_exec_ci< float, DSPLIB_FLOAT32 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define UNROLL_FACTOR
DSPLIB_STATUS DSPLIB_max_exec_ci< uint16_t, DSPLIB_UINT16 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_max_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_max_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_max_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_exec_ci< uint32_t, DSPLIB_UINT32 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_max_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_exec_ci< int32_t, DSPLIB_INT32 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_max_exec_ci< uint8_t, DSPLIB_UINT8 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_max_exec_ci< double, DSPLIB_FLOAT64 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_max_exec_ci< int16_t, DSPLIB_INT16 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_max.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Definition: DSPLIB_max.h:105
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_MAX_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t blockSize
Size of input buffer for different batches DSPLIB_max_init that will be retrieved and used by DSPLIB_...