47 #include "../common/c71/DSPLIB_inlines.h"
52 #define UNROLL_FACTOR 4
54 template <
typename dataType>
61 __SE_TEMPLATE_v1 se0Params, se1Params;
63 __SE_ELETYPE SE_ELETYPE;
64 __SE_VECLEN SE_VECLEN;
68 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
69 uint32_t blockSize = pKerPrivArgs->
blockSize;
71 typedef typename c7x::make_full_vector<dataType>::type vec;
72 uint32_t eleCount = c7x::element_count_of<vec>::value;
73 SE_VECLEN = c7x::se_veclen<vec>::value;
74 SE_ELETYPE = c7x::se_eletype<vec>::value;
76 uint32_t length = blockSize;
77 uint32_t width = eleCount;
80 printf(
"Enter eleCount %d\n", eleCount);
86 se0Params = __gen_SE_TEMPLATE_v1();
89 se0Params.ICNT0 = width;
90 se0Params.ELETYPE = SE_ELETYPE;
91 se0Params.VECLEN = SE_VECLEN;
92 se0Params.DIMFMT = __SE_DIMFMT_1D;
94 se1Params = __gen_SE_TEMPLATE_v1();
97 se1Params.ICNT0 = width;
98 se1Params.ELETYPE = SE_ELETYPE;
99 se1Params.VECLEN = SE_VECLEN;
100 se1Params.DIMFMT = __SE_DIMFMT_1D;
104 uint32_t numBlocks = length / width;
105 uint32_t remBlocksSize = length % width;
112 if (length <= width) {
116 se0Params.ICNT0 = length;
122 else if (length < 2 * width) {
126 se0Params.ICNT0 = width;
128 se1Params.ICNT0 = remBlocksSize;
133 else if (length < 3 * width) {
135 se0Params.ICNT0 = width;
137 se1Params.ICNT0 = width;
142 else if (length < 4 * width) {
144 se0Params.DIMFMT = __SE_DIMFMT_2D;
145 se0Params.DIM1 = 2 * width;
147 se0Params.ICNT0 = width;
149 se1Params.ICNT0 = width;
158 se0Params.DIMFMT = __SE_DIMFMT_2D;
160 se0Params.DIM1 = 2 * width;
162 se0Params.ICNT1 = length / (((uint32_t)
UNROLL_FACTOR >> (uint32_t)1) * width);
164 se0Params.ICNT0 = width;
167 se1Params = se0Params;
217 template <
typename dataType,
int32_t dataIn>
221 uint32_t blockSize = pKerPrivArgs->
blockSize;
225 __SE_TEMPLATE_v1 se0Params, se1Params;
228 dataType *restrict pInLocal = (dataType *) pIn;
229 dataType *restrict pOutLocal = (dataType *) pOut;
231 #if DSPLIB_DEBUGPRINT
232 printf(
"Enter DSPLIB_max_exec_ci\n");
235 typedef typename c7x::make_full_vector<dataType>::type vec;
236 uint32_t eleCount = c7x::element_count_of<vec>::value;
238 #if DSPLIB_DEBUGPRINT
239 printf(
"Enter eleCount %d\n", eleCount);
242 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
246 uint32_t length = blockSize;
247 uint32_t width = eleCount;
250 __SE0_OPEN(pInLocal, se0Params);
252 __SE1_OPEN(pInLocal + eleCount, se1Params);
258 #if DSPLIB_DEBUGPRINT
259 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
263 vec maxVec = DSPLIB_max_loopLogic<dataType, vec>(blockSize, eleCount, pInLocal);
265 dataType *currentMax = (dataType *) &maxVec;
266 dataType largest = *currentMax++;
267 dataType currentValue;
269 for (
size_t i = 1; i < c7x::element_count_of<vec>::value; i++) {
270 currentValue = *currentMax;
272 if (currentValue > largest) {
273 largest = currentValue;
279 *pOutLocal = largest;
295 uint32_t blockSize = pKerPrivArgs->
blockSize;
299 __SE_TEMPLATE_v1 se0Params;
301 int8_t *restrict pInLocal = (int8_t *) pIn;
302 int8_t *restrict pOutLocal = (int8_t *) pOut;
304 #if DSPLIB_DEBUGPRINT
305 printf(
"Enter DSPLIB_max_exec_ci\n");
308 typedef typename c7x::make_full_vector<int8_t>::type vec;
309 uint32_t eleCount = c7x::element_count_of<vec>::value;
311 #if DSPLIB_DEBUGPRINT
312 printf(
"Enter eleCount %d\n", eleCount);
315 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
319 __SE0_OPEN(pInLocal, se0Params);
320 __SE1_OPEN(pInLocal + eleCount, se0Params);
323 vec maxVec = DSPLIB_max_loopLogic<int8_t, vec>(blockSize, eleCount, pInLocal);
325 c7x::short_vec lowShorts = __low_char_to_short(maxVec);
326 c7x::short_vec highShorts = __high_char_to_short(maxVec);
327 c7x::int_vec lowlowInts = __low_short_to_int(lowShorts);
328 c7x::int_vec highlowInts = __high_short_to_int(lowShorts);
329 c7x::int_vec lowhighInts = __low_short_to_int(highShorts);
330 c7x::int_vec highhighInts = __high_short_to_int(highShorts);
331 c7x::int_vec lowmax = __max(lowlowInts, highlowInts);
332 c7x::int_vec highmax = __max(lowhighInts, highhighInts);
333 c7x::int_vec maxOfInts = __max(lowmax, highmax);
334 maxOfInts = __sort_desc(maxOfInts);
336 *pOutLocal = (int8_t) maxOfInts.s[0];
350 uint32_t blockSize = pKerPrivArgs->
blockSize;
354 __SE_TEMPLATE_v1 se0Params;
356 uint8_t *restrict pInLocal = (uint8_t *) pIn;
357 uint8_t *restrict pOutLocal = (uint8_t *) pOut;
359 #if DSPLIB_DEBUGPRINT
360 printf(
"Enter DSPLIB_max_exec_ci\n");
363 typedef typename c7x::make_full_vector<uint8_t>::type vec;
364 uint32_t eleCount = c7x::element_count_of<vec>::value;
366 #if DSPLIB_DEBUGPRINT
367 printf(
"Enter eleCount %d\n", eleCount);
370 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
374 __SE0_OPEN(pInLocal, se0Params);
375 __SE1_OPEN(pInLocal + eleCount, se0Params);
378 vec maxVec = DSPLIB_max_loopLogic<uint8_t, vec>(blockSize, eleCount, pInLocal);
380 c7x::ushort_vec lowShorts = __low_uchar_to_ushort(maxVec);
381 c7x::ushort_vec highShorts = __high_uchar_to_ushort(maxVec);
382 c7x::uint_vec maxOfInts = __max((__max((__low_ushort_to_uint(lowShorts)), (__high_ushort_to_uint(lowShorts)))),
383 (__max((__low_ushort_to_uint(highShorts)), (__high_ushort_to_uint(highShorts)))));
384 maxOfInts = __sort_desc(maxOfInts);
386 *pOutLocal = (uint8_t) maxOfInts.s[0];
400 uint32_t blockSize = pKerPrivArgs->
blockSize;
404 __SE_TEMPLATE_v1 se0Params;
406 int16_t *restrict pInLocal = (int16_t *) pIn;
407 int16_t *restrict pOutLocal = (int16_t *) pOut;
409 #if DSPLIB_DEBUGPRINT
410 printf(
"Enter DSPLIB_max_exec_ci\n");
413 typedef typename c7x::make_full_vector<int16_t>::type vec;
414 uint32_t eleCount = c7x::element_count_of<vec>::value;
416 #if DSPLIB_DEBUGPRINT
417 printf(
"Enter eleCount %d\n", eleCount);
420 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
424 __SE0_OPEN(pInLocal, se0Params);
425 __SE1_OPEN(pInLocal + eleCount, se0Params);
428 vec maxVec = DSPLIB_max_loopLogic<int16_t, vec>(blockSize, eleCount, pInLocal);
430 c7x::int_vec lowVec = __low_short_to_int(maxVec);
431 c7x::int_vec highVec = __high_short_to_int(maxVec);
433 c7x::int_vec maxOfInts = __max(lowVec, highVec);
434 maxOfInts = __sort_desc(maxOfInts);
436 *pOutLocal = (int16_t) maxOfInts.s[0];
450 uint32_t blockSize = pKerPrivArgs->
blockSize;
454 __SE_TEMPLATE_v1 se0Params;
456 uint16_t *restrict pInLocal = (uint16_t *) pIn;
457 uint16_t *restrict pOutLocal = (uint16_t *) pOut;
459 #if DSPLIB_DEBUGPRINT
460 printf(
"Enter DSPLIB_max_exec_ci\n");
463 typedef typename c7x::make_full_vector<uint16_t>::type vec;
464 uint32_t eleCount = c7x::element_count_of<vec>::value;
466 #if DSPLIB_DEBUGPRINT
467 printf(
"Enter eleCount %d\n", eleCount);
470 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
474 __SE0_OPEN(pInLocal, se0Params);
475 __SE1_OPEN(pInLocal + eleCount, se0Params);
478 vec maxVec = DSPLIB_max_loopLogic<uint16_t, vec>(blockSize, eleCount, pInLocal);
480 c7x::uint_vec lowVec = __low_ushort_to_uint(maxVec);
481 c7x::uint_vec highVec = __high_ushort_to_uint(maxVec);
482 c7x::uint_vec maxOfInts = __max(lowVec, highVec);
483 maxOfInts = __sort_desc(maxOfInts);
485 *pOutLocal = (uint16_t) maxOfInts.s[0];
507 vec maxVal0 = (vec) std::numeric_limits<T>::lowest();
508 vec maxVal1 = maxVal0;
509 vec maxVec = maxVal1;
512 if (length <= width) {
514 maxVec = c7x::strm_eng<0, vec>::get_adv();
520 for (
size_t i = length; i < width; i++) {
521 maxVec.s[i] = std::numeric_limits<T>::lowest();
528 else if (length < 2 * width) {
531 vec inVec0 = c7x::strm_eng<0, vec>::get_adv();
532 maxVal0 = __max(inVec0, maxVal0);
533 vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
535 size_t remElements = length % width;
536 for (
size_t i = remElements; i < width; i++) {
537 inVec1.s[i] = std::numeric_limits<T>::lowest();
539 maxVal1 = __max(inVec1, maxVal1);
540 maxVec = __max(maxVal0, maxVal1);
543 else if (length < 3 * width) {
546 vec inVec0 = c7x::strm_eng<0, vec>::get_adv();
547 vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
548 vec maxOfFirstTwo = __max(inVec0, inVec1);
549 vec inVec2 = *(vec *) (pSrc + length - width);
555 maxVec = __max(maxOfFirstTwo, inVec2);
561 else if (length < 4 * width) {
564 vec inVec0 = c7x::strm_eng<0, vec>::get_adv();
565 vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
568 vec maxOfFirstTwo = __max(inVec0, inVec1);
571 vec inVec2 = c7x::strm_eng<0, vec>::get_adv();
572 vec inVec3 = *(vec *) (pSrc + length - width);
581 vec maxOfLastTwo = __max(inVec2, inVec3);
582 maxVec = __max(maxOfFirstTwo, maxOfLastTwo);
590 vec maxValA = (vec) std::numeric_limits<T>::lowest();
591 vec maxValB = maxValA;
592 vec maxValC = maxValB;
593 vec maxValD = maxValC;
597 for (
size_t i = 0; i < numIterations; i += 1) {
600 maxValA = __max((c7x::strm_eng<0, vec>::get_adv()), maxValA);
601 maxValB = __max((c7x::strm_eng<1, vec>::get_adv()), maxValB);
602 maxValC = __max((c7x::strm_eng<0, vec>::get_adv()), maxValC);
603 maxValD = __max((c7x::strm_eng<1, vec>::get_adv()), maxValD);
611 int32_t remBlockSize = length - (
UNROLL_FACTOR * numIterations * width);
613 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
618 T *remStart = pSrc + length - width;
619 vec remVec = *(vec *) remStart;
621 if (remBlockSize != 0 && remVecLen == 1) {
624 remVec = *(vec *) remStart;
629 else if (remBlockSize != 0 && remVecLen == 2) {
633 vec remVec0 = *(vec *) (remStart - width);
634 remVec = __max(remVec0, remVec);
638 else if (remBlockSize != 0 && remVecLen == 3) {
643 vec remVec0 = *(vec *) (remStart - width);
644 vec remVec1 = *(vec *) (remStart - 2 * width);
646 vec remVec0_1 = __max(remVec0, remVec1);
647 remVec = __max(remVec0_1, remVec);
651 else if (remBlockSize != 0 && remVecLen == 4) {
658 vec remVec0 = *(vec *) (remStart - width);
659 vec remVec1 = *(vec *) (remStart - 2 * width);
660 vec remVec2 = *(vec *) (remStart - 3 * width);
661 vec remVec0_1 = __max(remVec0, remVec1);
662 vec remVec2_3 = __max(remVec, remVec2);
663 remVec = __max(remVec0_1, remVec2_3);
679 vec temp1 = __max(maxValA, maxValB);
680 vec temp2 = __max(maxValC, maxValD);
681 vec maxVecA = __max(temp1, temp2);
682 vec maxRemVec = __max(maxVecA, remVec);
#define SE_SE0_PARAM_OFFSET
#define SE_SE1_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_max_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_max_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_max_exec_ci< int8_t, DSPLIB_INT8 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
vec DSPLIB_max_loopLogic(size_t length, size_t width, T *pSrc)
This function is the kernel loop helper function for the optimized implementation of the kernel....
template DSPLIB_STATUS DSPLIB_max_exec_ci< float, DSPLIB_FLOAT32 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_max_exec_ci< uint16_t, DSPLIB_UINT16 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_max_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_max_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_max_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_exec_ci< uint32_t, DSPLIB_UINT32 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_max_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_max_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_max_exec_ci< int32_t, DSPLIB_INT32 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_max_exec_ci< uint8_t, DSPLIB_UINT8 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_max_exec_ci< double, DSPLIB_FLOAT64 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_max_exec_ci< int16_t, DSPLIB_INT16 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_max.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_MAX_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t blockSize
Size of input buffer for different batches DSPLIB_max_init that will be retrieved and used by DSPLIB_...