47 #include "../common/c71/DSPLIB_inlines.h"
50 #include "c7x_scalable.h"
59 #define INDEX_UNROLL_FACTOR 2
65 const c7x::ushort_vec
lastRunOffsetsShort = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
71 const c7x::uchar_vec
lastRunOffsetsChar = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
72 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
75 template <
typename dataType>
82 __SE_TEMPLATE_v1 se0Params, se1Params;
84 __SE_ELETYPE SE_ELETYPE;
85 __SE_VECLEN SE_VECLEN;
89 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
90 uint32_t blocksSize = pKerPrivArgs->
blockSize;
92 typedef typename c7x::make_full_vector<dataType>::type vec;
93 uint32_t eleCount = c7x::element_count_of<vec>::value;
94 SE_VECLEN = c7x::se_veclen<vec>::value;
95 SE_ELETYPE = c7x::se_eletype<vec>::value;
96 uint32_t length = blocksSize;
97 uint32_t width = eleCount;
100 printf(
"Enter eleCount %d\n", eleCount);
106 se0Params = __gen_SE_TEMPLATE_v1();
109 se0Params.ICNT0 = width;
110 se0Params.ELETYPE = SE_ELETYPE;
111 se0Params.VECLEN = SE_VECLEN;
112 se0Params.DIMFMT = __SE_DIMFMT_1D;
114 se1Params = __gen_SE_TEMPLATE_v1();
117 se1Params.ICNT0 = width;
118 se1Params.ELETYPE = SE_ELETYPE;
119 se1Params.VECLEN = SE_VECLEN;
120 se1Params.DIMFMT = __SE_DIMFMT_1D;
123 uint32_t numBlocks = length / width;
124 uint32_t remBlocksSize = length % width;
133 if (length <= width) {
136 se0Params.ICNT0 = length;
142 else if (length < 2 * width) {
145 se0Params.ICNT0 = width;
147 se1Params.ICNT0 = remBlocksSize;
156 se0Params.DIMFMT = __SE_DIMFMT_2D;
158 se0Params.DIM1 = 2 * width;
162 se0Params.ICNT0 = width;
165 se1Params = se0Params;
214 template <
typename T,
typename TIndex>
218 uint32_t blockSize = pKerPrivArgs->
blockSize;
219 uint32_t length = blockSize;
222 __SE_TEMPLATE_v1 se0Params, se1Params;
225 T *restrict pInLocal = (T *) pIn;
226 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
228 #if DSPLIB_DEBUGPRINT
229 printf(
"Enter DSPLIB_maxIndex_exec_ci\n");
232 typedef typename c7x::make_full_vector<T>::type vec;
233 uint32_t eleCount = c7x::element_count_of<vec>::value;
234 uint32_t width = eleCount;
235 #if DSPLIB_DEBUGPRINT
236 printf(
"Enter eleCount %d\n", eleCount);
239 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
244 __SE0_OPEN(pInLocal, se0Params);
245 if (length > width) {
246 __SE1_OPEN(pInLocal + eleCount, se1Params);
249 #if DSPLIB_DEBUGPRINT
250 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
253 size_t bitsInType =
sizeof(T) * 8;
254 bitsInType = (bitsInType > 32) ? 32 : bitsInType;
256 size_t maxSingleBufferSize = pow(2, bitsInType);
257 uint32_t numBufferIterations = DSPLIB_ceilingDiv(length, maxSingleBufferSize);
259 std::vector<T> maxVals(numBufferIterations);
260 std::vector<uint32_t> maxIndices(numBufferIterations);
266 size_t currentIterationSize;
268 TIndex *currentIndexPtr;
271 for (uint32_t buffer = 0; buffer < numBufferIterations; buffer++) {
273 currentIterationSize = std::min((
size_t) maxSingleBufferSize, (
size_t) (length - (maxSingleBufferSize * buffer)));
275 loopOutput = DSPLIB_maxIndex_loopLogic<T, TIndex>(currentIterationSize, pInLocal);
278 currentValuePtr = (T *) &loopOutput.
maxVals;
279 largest = *currentValuePtr++;
280 currentIndexPtr = (TIndex *) &loopOutput.
maxIndices;
281 maxIndex = *currentIndexPtr++;
282 for (i = 1; i < c7x::element_count_of<vec>::value; i++) {
283 currentValue = *currentValuePtr;
284 currentIndex = *currentIndexPtr;
285 if (currentValue > largest) {
286 largest = currentValue;
287 maxIndex = currentIndex;
291 else if (currentValue == largest) {
292 if (currentIndex < maxIndex) {
293 maxIndex = currentIndex;
302 maxVals[buffer] = largest;
303 maxIndices[buffer] = ((uint32_t) maxIndex) + (buffer * maxSingleBufferSize);
306 T largestVal = maxVals[0];
307 uint32_t largestIndex = maxIndices[0];
308 for (i = 1; i < maxVals.size(); i++) {
309 if (maxVals[i] > largestVal) {
310 largestVal = maxVals[i];
311 largestIndex = maxIndices[i];
315 *pOutLocal = largestIndex;
318 if (length > width) {
352 c7x::uint_vec maxIndices = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
353 c7x::uint_vec maxIndices0 = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
354 c7x::uint_vec maxIndices1 = c7x::uint_vec(8, 9, 10, 11, 12, 13, 14, 15);
356 c7x::uint_vec maxIndicesA = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
357 c7x::uint_vec maxIndicesB = c7x::uint_vec(8, 9, 10, 11, 12, 13, 14, 15);
358 c7x::uint_vec firstHalfIndices = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
359 c7x::uint_vec secondHalfIndices = c7x::uint_vec(8, 9, 10, 11, 12, 13, 14, 15);
361 typedef typename c7x::make_full_vector<T>::type vec;
362 typedef typename c7x::make_full_vector<TIndex>::type index_vec;
371 size_t width = c7x::element_count_of<vec>::value;
373 if (length <= width) {
374 maxVals = c7x::strm_eng<0, vec>::get_adv();
376 for (
size_t i = length; i < width; i++) {
377 maxVals.s[i] = std::numeric_limits<T>::lowest();
381 else if (length < 2 * width) {
382 maxVals0 = c7x::strm_eng<0, vec>::get_adv();
383 maxVals1 = c7x::strm_eng<1, vec>::get_adv();
385 size_t remElements = length % width;
386 for (
size_t i = remElements; i < width; i++) {
387 maxVals1.s[i] = std::numeric_limits<T>::lowest();
389 maskOfMaxs = __cmp_gt_pred(maxVals0, maxVals1);
390 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
391 maxIndices = __select(maskOfMaxs, maxIndices0, maxIndices1);
395 maskOfMaxs = __cmp_gt_pred(maxVals0, maxVals1);
396 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndices1, maxIndices0);
397 index_vec smallestIndices = __select(maskOfSmallerIndices, maxIndices0, maxIndices1);
398 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxVals0, maxVals1);
399 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
400 index_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxs, maxIndices0, maxIndices1);
401 index_vec zeroVec = c7x::uint_vec(0);
402 index_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
403 index_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
405 maxIndices = nonTiebreakerVec + tiebreakerVec;
410 __vpred mask0, mask1;
413 vec maxValsA = T(std::numeric_limits<T>::lowest());
414 vec maxValsB = maxValsA;
417 vec maxValsLarge = T(std::numeric_limits<T>::lowest());
421 for (
size_t i = 0; i < numIterations; i += 1) {
422 inVec0 = c7x::strm_eng<0, vec>::get_adv();
423 mask0 = __cmp_gt_pred(inVec0, maxValsA);
425 __select(mask0, inVec0, maxValsA);
428 __select(mask0, firstHalfIndices, maxIndicesA);
431 inVec1 = c7x::strm_eng<1, vec>::get_adv();
432 mask1 = __cmp_gt_pred(inVec1, maxValsB);
433 maxValsB = __select(mask1, inVec1, maxValsB);
434 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
444 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
445 T *remStart = (T *) pSrc + length - width;
447 if (remBlockSize != 0 && remVecLen == 1) {
449 inVec0 = *(vec *) remStart;
450 firstHalfIndices = c7x::uint_vec(length - (c7x::element_count_of<index_vec>::value)) +
lastRunOffsets;
451 mask0 = __cmp_gt_pred(inVec0, maxValsA);
453 __select(mask0, inVec0, maxValsA);
455 maxIndicesA = __select(mask0, firstHalfIndices,
460 else if (remBlockSize != 0 && remVecLen == 2) {
461 inVec0 = *(vec *) (remStart - width);
462 firstHalfIndices = c7x::uint_vec(length - (2 * c7x::element_count_of<index_vec>::value)) +
lastRunOffsets;
463 mask0 = __cmp_gt_pred(inVec0, maxValsA);
465 __select(mask0, inVec0, maxValsA);
467 maxIndicesA = __select(mask0, firstHalfIndices,
472 inVec1 = *(vec *) remStart;
473 secondHalfIndices = c7x::uint_vec(length - (c7x::element_count_of<index_vec>::value)) +
lastRunOffsets;
474 mask1 = __cmp_gt_pred(inVec1, maxValsB);
475 maxValsB = __select(mask1, inVec1, maxValsB);
476 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
484 __vpred maskOfMaxValues = __cmp_gt_pred(maxValsA, maxValsB);
485 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndicesB, maxIndicesA);
486 index_vec smallestIndices = __select(maskOfSmallerIndices, maxIndicesA, maxIndicesB);
487 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxValsA, maxValsB);
488 maxValsLarge = __select(maskOfMaxValues, maxValsA, maxValsB);
489 index_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxValues, maxIndicesA, maxIndicesB);
490 index_vec zeroVec = c7x::uint_vec(0);
491 index_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
492 index_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
494 index_vec maxIndicesLarge = nonTiebreakerVec + tiebreakerVec;
496 maxVals = maxValsLarge;
497 maxIndices = maxIndicesLarge;
510 c7x::uchar_vec maxIndices = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
511 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
512 c7x::uchar_vec maxIndices0 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
513 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
514 c7x::uchar_vec maxIndices1 = c7x::uchar_vec(32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
515 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
517 c7x::uchar_vec maxIndicesA = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
518 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
519 c7x::uchar_vec maxIndicesB = c7x::uchar_vec(32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
520 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
521 c7x::uchar_vec firstHalfIndices = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
522 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
523 c7x::uchar_vec secondHalfIndices = c7x::uchar_vec(32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
524 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
526 typedef typename c7x::make_full_vector<int8_t>::type vec;
527 typedef typename c7x::make_full_vector<uint8_t>::type index_vec;
536 size_t width = c7x::element_count_of<vec>::value;
539 if (length <= width) {
540 maxVals = c7x::strm_eng<0, vec>::get_adv();
542 for (
size_t i = length; i < width; i++) {
543 maxVals.s[i] = std::numeric_limits<int8_t>::lowest();
547 else if (length < 2 * width) {
548 maxVals0 = c7x::strm_eng<0, vec>::get_adv();
549 maxVals1 = c7x::strm_eng<1, vec>::get_adv();
551 size_t remElements = length % width;
552 for (
size_t i = remElements; i < width; i++) {
553 maxVals1.s[i] = std::numeric_limits<int8_t>::lowest();
557 maskOfMaxs = __cmp_gt_pred(maxVals0, maxVals1);
558 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndices1, maxIndices0);
559 index_vec smallestIndices = __select(maskOfSmallerIndices, maxIndices0, maxIndices1);
560 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxVals0, maxVals1);
561 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
562 index_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxs, maxIndices0, maxIndices1);
563 index_vec zeroVec = c7x::uchar_vec(0);
564 index_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
565 index_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
567 maxIndices = nonTiebreakerVec + tiebreakerVec;
572 __vpred mask0, mask1;
575 vec maxValsA = int8_t(std::numeric_limits<int8_t>::lowest());
576 vec maxValsB = maxValsA;
579 vec maxValsLarge = int8_t(std::numeric_limits<int8_t>::lowest());
583 for (
size_t i = 0; i < numIterations; i += 1) {
584 inVec0 = c7x::strm_eng<0, vec>::get_adv();
585 mask0 = __cmp_gt_pred(inVec0, maxValsA);
587 __select(mask0, inVec0, maxValsA);
590 __select(mask0, firstHalfIndices, maxIndicesA);
593 inVec1 = c7x::strm_eng<1, vec>::get_adv();
594 mask1 = __cmp_gt_pred(inVec1, maxValsB);
595 maxValsB = __select(mask1, inVec1, maxValsB);
596 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
605 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
606 int8_t *remStart = (int8_t *) pSrc + length - width;
608 if (remBlockSize != 0 && remVecLen == 1) {
609 inVec0 = *(vec *) remStart;
612 mask0 = __cmp_gt_pred(inVec0, maxValsA);
614 __select(mask0, inVec0, maxValsA);
616 maxIndicesA = __select(mask0, firstHalfIndices,
621 else if (remBlockSize != 0 && remVecLen == 2) {
622 inVec0 = *(vec *) (remStart - width);
624 mask0 = __cmp_gt_pred(inVec0, maxValsA);
626 __select(mask0, inVec0, maxValsA);
628 maxIndicesA = __select(mask0, firstHalfIndices,
632 inVec1 = *(vec *) remStart;
634 mask1 = __cmp_gt_pred(inVec1, maxValsB);
635 maxValsB = __select(mask1, inVec1, maxValsB);
636 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
644 __vpred maskOfMaxValues = __cmp_gt_pred(maxValsA, maxValsB);
645 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndicesB, maxIndicesA);
646 index_vec smallestIndices = __select(maskOfSmallerIndices, maxIndicesA, maxIndicesB);
647 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxValsA, maxValsB);
648 maxValsLarge = __select(maskOfMaxValues, maxValsA, maxValsB);
649 index_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxValues, maxIndicesA, maxIndicesB);
650 index_vec zeroVec = c7x::uchar_vec(0);
651 index_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
652 index_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
654 index_vec maxIndicesLarge = nonTiebreakerVec + tiebreakerVec;
656 maxVals = maxValsLarge;
657 maxIndices = maxIndicesLarge;
669 c7x::uchar_vec maxIndices = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
670 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
671 c7x::uchar_vec maxIndices0 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
672 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
673 c7x::uchar_vec maxIndices1 = c7x::uchar_vec(32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
674 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
676 c7x::uchar_vec maxIndicesA = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
677 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
678 c7x::uchar_vec maxIndicesB = c7x::uchar_vec(32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
679 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
680 c7x::uchar_vec firstHalfIndices = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
681 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
682 c7x::uchar_vec secondHalfIndices = c7x::uchar_vec(32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
683 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
685 typedef typename c7x::make_full_vector<uint8_t>::type vec;
686 typedef typename c7x::make_full_vector<uint8_t>::type index_vec;
695 size_t width = c7x::element_count_of<vec>::value;
698 if (length <= width) {
699 maxVals = c7x::strm_eng<0, vec>::get_adv();
701 for (
size_t i = length; i < width; i++) {
702 maxVals.s[i] = std::numeric_limits<uint8_t>::lowest();
706 else if (length < 2 * width) {
707 maxVals0 = c7x::strm_eng<0, vec>::get_adv();
708 maxVals1 = c7x::strm_eng<1, vec>::get_adv();
710 size_t remElements = length % width;
711 for (
size_t i = remElements; i < width; i++) {
712 maxVals1.s[i] = std::numeric_limits<uint8_t>::lowest();
716 maskOfMaxs = __cmp_gt_pred(maxVals0, maxVals1);
717 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndices1, maxIndices0);
718 index_vec smallestIndices = __select(maskOfSmallerIndices, maxIndices0, maxIndices1);
719 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxVals0, maxVals1);
720 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
721 index_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxs, maxIndices0, maxIndices1);
722 index_vec zeroVec = c7x::uchar_vec(0);
723 index_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
724 index_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
726 maxIndices = nonTiebreakerVec + tiebreakerVec;
731 __vpred mask0, mask1;
734 vec maxValsA = uint8_t(std::numeric_limits<uint8_t>::lowest());
735 vec maxValsB = maxValsA;
738 vec maxValsLarge = uint8_t(std::numeric_limits<uint8_t>::lowest());
742 for (
size_t i = 0; i < numIterations; i += 1) {
743 inVec0 = c7x::strm_eng<0, vec>::get_adv();
744 mask0 = __cmp_gt_pred(inVec0, maxValsA);
746 __select(mask0, inVec0, maxValsA);
749 __select(mask0, firstHalfIndices, maxIndicesA);
752 inVec1 = c7x::strm_eng<1, vec>::get_adv();
753 mask1 = __cmp_gt_pred(inVec1, maxValsB);
754 maxValsB = __select(mask1, inVec1, maxValsB);
755 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
764 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
765 int8_t *remStart = (int8_t *) pSrc + length - width;
767 if (remBlockSize != 0 && remVecLen == 1) {
768 inVec0 = *(vec *) remStart;
771 mask0 = __cmp_gt_pred(inVec0, maxValsA);
773 __select(mask0, inVec0, maxValsA);
775 maxIndicesA = __select(mask0, firstHalfIndices,
780 else if (remBlockSize != 0 && remVecLen == 2) {
781 inVec0 = *(vec *) (remStart - width);
783 mask0 = __cmp_gt_pred(inVec0, maxValsA);
785 __select(mask0, inVec0, maxValsA);
787 maxIndicesA = __select(mask0, firstHalfIndices,
791 inVec1 = *(vec *) remStart;
793 mask1 = __cmp_gt_pred(inVec1, maxValsB);
794 maxValsB = __select(mask1, inVec1, maxValsB);
795 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
802 __vpred maskOfMaxValues = __cmp_gt_pred(maxValsA, maxValsB);
803 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndicesB, maxIndicesA);
804 index_vec smallestIndices = __select(maskOfSmallerIndices, maxIndicesA, maxIndicesB);
805 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxValsA, maxValsB);
806 maxValsLarge = __select(maskOfMaxValues, maxValsA, maxValsB);
807 index_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxValues, maxIndicesA, maxIndicesB);
808 index_vec zeroVec = c7x::uchar_vec(0);
809 index_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
810 index_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
812 index_vec maxIndicesLarge = nonTiebreakerVec + tiebreakerVec;
814 maxVals = maxValsLarge;
815 maxIndices = maxIndicesLarge;
828 c7x::ushort_vec maxIndices = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
829 c7x::ushort_vec maxIndices0 = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
830 c7x::ushort_vec maxIndices1 = c7x::ushort_vec(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
832 c7x::ushort_vec maxIndicesA = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
833 c7x::ushort_vec maxIndicesB = c7x::ushort_vec(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
834 c7x::ushort_vec firstHalfIndices = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
835 c7x::ushort_vec secondHalfIndices = c7x::ushort_vec(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
837 typedef typename c7x::make_full_vector<int16_t>::type vec;
838 typedef typename c7x::make_full_vector<uint16_t>::type index_vec;
847 size_t width = c7x::element_count_of<vec>::value;
850 if (length <= width) {
851 maxVals = c7x::strm_eng<0, vec>::get_adv();
854 for (
size_t i = length; i < width; i++) {
855 maxVals.s[i] = std::numeric_limits<int16_t>::lowest();
859 else if (length < 2 * width) {
860 maxVals0 = c7x::strm_eng<0, vec>::get_adv();
861 maxVals1 = c7x::strm_eng<1, vec>::get_adv();
864 size_t remElements = length % width;
866 for (
size_t i = remElements; i < width; i++) {
867 maxVals1.s[i] = std::numeric_limits<int16_t>::lowest();
871 maskOfMaxs = __cmp_gt_pred(maxVals0, maxVals1);
872 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndices1, maxIndices0);
873 c7x::ushort_vec smallestIndices = __select(maskOfSmallerIndices, maxIndices0, maxIndices1);
874 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxVals0, maxVals1);
875 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
876 c7x::ushort_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxs, maxIndices0, maxIndices1);
877 c7x::ushort_vec zeroVec = c7x::ushort_vec(0);
878 c7x::ushort_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
879 c7x::ushort_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
881 maxIndices = nonTiebreakerVec + tiebreakerVec;
885 c7x::short_vec inVec0, inVec1;
886 __vpred mask0, mask1;
889 c7x::short_vec maxValsA = int16_t(std::numeric_limits<int16_t>::lowest());
890 c7x::short_vec maxValsB = maxValsA;
893 c7x::short_vec maxValsLarge = int16_t(std::numeric_limits<int16_t>::lowest());
897 for (
size_t i = 0; i < numIterations; i += 1) {
898 inVec0 = c7x::strm_eng<0, vec>::get_adv();
899 mask0 = __cmp_gt_pred(inVec0, maxValsA);
901 __select(mask0, inVec0, maxValsA);
904 __select(mask0, firstHalfIndices, maxIndicesA);
907 inVec1 = c7x::strm_eng<1, vec>::get_adv();
908 mask1 = __cmp_gt_pred(inVec1, maxValsB);
909 maxValsB = __select(mask1, inVec1, maxValsB);
910 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
921 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
922 int16_t *remStart = (int16_t *) pSrc + length - width;
924 if (remBlockSize != 0 && remVecLen == 1) {
925 inVec0 = *(vec *) remStart;
928 mask0 = __cmp_gt_pred(inVec0, maxValsA);
930 __select(mask0, inVec0, maxValsA);
932 maxIndicesA = __select(mask0, firstHalfIndices,
937 else if (remBlockSize != 0 && remVecLen == 2) {
938 inVec0 = *(vec *) (remStart - width);
940 mask0 = __cmp_gt_pred(inVec0, maxValsA);
942 __select(mask0, inVec0, maxValsA);
944 maxIndicesA = __select(mask0, firstHalfIndices,
947 inVec1 = *(vec *) remStart;
949 mask1 = __cmp_gt_pred(inVec1, maxValsB);
950 maxValsB = __select(mask1, inVec1, maxValsB);
951 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
956 __vpred maskOfMaxValues = __cmp_gt_pred(maxValsA, maxValsB);
957 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndicesB, maxIndicesA);
958 c7x::ushort_vec smallestIndices = __select(maskOfSmallerIndices, maxIndicesA, maxIndicesB);
959 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxValsA, maxValsB);
960 maxValsLarge = __select(maskOfMaxValues, maxValsA, maxValsB);
961 c7x::ushort_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxValues, maxIndicesA, maxIndicesB);
962 c7x::ushort_vec zeroVec = c7x::ushort_vec(0);
963 c7x::ushort_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
964 c7x::ushort_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
966 c7x::ushort_vec maxIndicesLarge = nonTiebreakerVec + tiebreakerVec;
968 maxVals = maxValsLarge;
969 maxIndices = maxIndicesLarge;
982 c7x::ushort_vec maxIndices = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
983 c7x::ushort_vec maxIndices0 = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
984 c7x::ushort_vec maxIndices1 = c7x::ushort_vec(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
986 c7x::ushort_vec maxIndicesA = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
987 c7x::ushort_vec maxIndicesB = c7x::ushort_vec(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
988 c7x::ushort_vec firstHalfIndices = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
989 c7x::ushort_vec secondHalfIndices = c7x::ushort_vec(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
991 typedef typename c7x::make_full_vector<uint16_t>::type vec;
992 typedef typename c7x::make_full_vector<uint16_t>::type index_vec;
1001 size_t width = c7x::element_count_of<vec>::value;
1004 if (length <= width) {
1005 maxVals = c7x::strm_eng<0, vec>::get_adv();
1008 for (
size_t i = length; i < width; i++) {
1009 maxVals.s[i] = std::numeric_limits<uint16_t>::lowest();
1013 else if (length < 2 * width) {
1014 maxVals0 = c7x::strm_eng<0, vec>::get_adv();
1015 maxVals1 = c7x::strm_eng<1, vec>::get_adv();
1018 size_t remElements = length % width;
1020 for (
size_t i = remElements; i < width; i++) {
1021 maxVals1.s[i] = std::numeric_limits<uint16_t>::lowest();
1025 maskOfMaxs = __cmp_gt_pred(maxVals0, maxVals1);
1026 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndices1, maxIndices0);
1027 c7x::ushort_vec smallestIndices = __select(maskOfSmallerIndices, maxIndices0, maxIndices1);
1028 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxVals0, maxVals1);
1029 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
1030 c7x::ushort_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxs, maxIndices0, maxIndices1);
1031 c7x::ushort_vec zeroVec = c7x::ushort_vec(0);
1032 c7x::ushort_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
1033 c7x::ushort_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
1035 maxIndices = nonTiebreakerVec + tiebreakerVec;
1039 c7x::ushort_vec inVec0, inVec1;
1040 __vpred mask0, mask1;
1043 c7x::ushort_vec maxValsA = uint16_t(std::numeric_limits<uint16_t>::lowest());
1044 c7x::ushort_vec maxValsB = maxValsA;
1047 c7x::ushort_vec maxValsLarge = uint16_t(std::numeric_limits<uint16_t>::lowest());
1051 for (
size_t i = 0; i < numIterations; i += 1) {
1052 inVec0 = c7x::strm_eng<0, c7x::ushort_vec>::get_adv();
1053 mask0 = __cmp_gt_pred(inVec0, maxValsA);
1055 __select(mask0, inVec0, maxValsA);
1058 __select(mask0, firstHalfIndices, maxIndicesA);
1061 inVec1 = c7x::strm_eng<1, c7x::ushort_vec>::get_adv();
1062 mask1 = __cmp_gt_pred(inVec1, maxValsB);
1063 maxValsB = __select(mask1, inVec1, maxValsB);
1064 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
1075 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
1076 uint16_t *remStart = (uint16_t *) pSrc + length - width;
1078 if (remBlockSize != 0 && remVecLen == 1) {
1079 inVec0 = *(vec *) remStart;
1082 mask0 = __cmp_gt_pred(inVec0, maxValsA);
1084 __select(mask0, inVec0, maxValsA);
1086 maxIndicesA = __select(mask0, firstHalfIndices,
1090 else if (remBlockSize != 0 && remVecLen == 2) {
1091 inVec0 = *(vec *) (remStart - width);
1093 mask0 = __cmp_gt_pred(inVec0, maxValsA);
1095 __select(mask0, inVec0, maxValsA);
1097 maxIndicesA = __select(mask0, firstHalfIndices,
1100 inVec1 = *(vec *) remStart;
1102 mask1 = __cmp_gt_pred(inVec1, maxValsB);
1103 maxValsB = __select(mask1, inVec1, maxValsB);
1104 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
1109 __vpred maskOfMaxValues = __cmp_gt_pred(maxValsA, maxValsB);
1110 __vpred maskOfSmallerIndices = __cmp_ge_pred(maxIndicesB, maxIndicesA);
1111 c7x::ushort_vec smallestIndices = __select(maskOfSmallerIndices, maxIndicesA, maxIndicesB);
1112 __vpred maskOfTiebreakerValues = __cmp_eq_pred(maxValsA, maxValsB);
1113 maxValsLarge = __select(maskOfMaxValues, maxValsA, maxValsB);
1114 c7x::ushort_vec maxIndicesIgnoringTiebreaker = __select(maskOfMaxValues, maxIndicesA, maxIndicesB);
1115 c7x::ushort_vec zeroVec = c7x::ushort_vec(0);
1116 c7x::ushort_vec nonTiebreakerVec = __select(maskOfTiebreakerValues, zeroVec, maxIndicesIgnoringTiebreaker);
1117 c7x::ushort_vec tiebreakerVec = __select(maskOfTiebreakerValues, smallestIndices, zeroVec);
1119 c7x::ushort_vec maxIndicesLarge = nonTiebreakerVec + tiebreakerVec;
1121 maxVals = maxValsLarge;
1122 maxIndices = maxIndicesLarge;
1135 c7x::uint_vec maxIndices = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
1136 c7x::uint_vec maxIndices0 = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
1137 c7x::uint_vec maxIndices1 = c7x::uint_vec(8, 9, 10, 11, 12, 13, 14, 15);
1139 c7x::uint_vec maxIndicesA = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
1140 c7x::uint_vec maxIndicesB = c7x::uint_vec(8, 9, 10, 11, 12, 13, 14, 15);
1141 c7x::uint_vec firstHalfIndices = c7x::uint_vec(0, 1, 2, 3, 4, 5, 6, 7);
1142 c7x::uint_vec secondHalfIndices = c7x::uint_vec(8, 9, 10, 11, 12, 13, 14, 15);
1144 c7x::float_vec maxVals0;
1145 c7x::float_vec maxVals1;
1148 size_t width = c7x::element_count_of<c7x::float_vec>::value;
1151 c7x::float_vec maxVals;
1153 if (length <= width) {
1154 maxVals = c7x::strm_eng<0, c7x::float_vec>::get_adv();
1156 for (
size_t i = length; i < width; i++) {
1157 maxVals.s[i] = std::numeric_limits<float>::lowest();
1161 else if (length < 2 * width) {
1162 maxVals0 = c7x::strm_eng<0, c7x::float_vec>::get_adv();
1163 maxVals1 = c7x::strm_eng<1, c7x::float_vec>::get_adv();
1165 size_t remElements = length % width;
1166 for (
size_t i = remElements; i < width; i++) {
1167 maxVals1.s[i] = std::numeric_limits<float>::lowest();
1169 maskOfMaxs = __cmp_lt_pred(maxVals1, maxVals0);
1170 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
1171 maxIndices = __select(maskOfMaxs, maxIndices0, maxIndices1);
1175 c7x::float_vec inVec0, inVec1;
1176 __vpred mask0, mask1, maskOfMaxsLarge;
1179 c7x::float_vec maxValsA = std::numeric_limits<float>::lowest();
1180 c7x::float_vec maxValsB = maxValsA;
1183 c7x::float_vec maxValsLarge = std::numeric_limits<float>::lowest();
1188 for (
size_t i = 0; i < numIterations; i += 1) {
1189 inVec0 = c7x::strm_eng<0, c7x::float_vec>::get_adv();
1190 mask0 = __cmp_lt_pred(maxValsA, inVec0);
1192 __select(mask0, inVec0, maxValsA);
1195 __select(mask0, firstHalfIndices, maxIndicesA);
1198 inVec1 = c7x::strm_eng<1, c7x::float_vec>::get_adv();
1199 mask1 = __cmp_lt_pred(maxValsB, inVec1);
1200 maxValsB = __select(mask1, inVec1, maxValsB);
1201 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
1213 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
1214 float *remStart = (
float *) pSrc + length - width;
1217 if (remBlockSize != 0 && remVecLen == 1) {
1220 inVec0 = *(c7x::float_vec *) remStart;
1221 firstHalfIndices = c7x::uint_vec(length - (width)) +
lastRunOffsets;
1222 mask0 = __cmp_lt_pred(maxValsA, inVec0);
1224 __select(mask0, inVec0, maxValsA);
1226 maxIndicesA = __select(mask0, firstHalfIndices,
1232 else if (remBlockSize != 0 && remVecLen == 2) {
1236 inVec0 = *(c7x::float_vec *) (remStart - width);
1237 firstHalfIndices = c7x::uint_vec(length - (2 * width)) +
lastRunOffsets;
1238 mask0 = __cmp_lt_pred(maxValsA, inVec0);
1240 __select(mask0, inVec0, maxValsA);
1242 maxIndicesA = __select(mask0, firstHalfIndices,
1247 inVec1 = *(c7x::float_vec *) remStart;
1248 secondHalfIndices = c7x::uint_vec(length - (width)) +
lastRunOffsets;
1249 mask1 = __cmp_lt_pred(maxValsB, inVec1);
1250 maxValsB = __select(mask1, inVec1, maxValsB);
1251 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
1256 maskOfMaxsLarge = __cmp_lt_pred(maxValsB, maxValsA);
1257 maxValsLarge = __select(maskOfMaxsLarge, maxValsA, maxValsB);
1258 c7x::uint_vec maxIndicesLarge = __select(maskOfMaxsLarge, maxIndicesA, maxIndicesB);
1263 maxVals = maxValsLarge;
1264 maxIndices = maxIndicesLarge;
1276 c7x::ulong_vec maxIndices = c7x::ulong_vec(0, 1, 2, 3);
1277 c7x::ulong_vec maxIndices0 = c7x::ulong_vec(0, 1, 2, 3);
1278 c7x::ulong_vec maxIndices1 = c7x::ulong_vec(4, 5, 6, 7);
1280 c7x::ulong_vec maxIndicesA = c7x::ulong_vec(0, 1, 2, 3);
1281 c7x::ulong_vec maxIndicesB = c7x::ulong_vec(4, 5, 6, 7);
1282 c7x::ulong_vec firstHalfIndices = c7x::ulong_vec(0, 1, 2, 3);
1283 c7x::ulong_vec secondHalfIndices = c7x::ulong_vec(4, 5, 6, 7);
1285 c7x::double_vec maxVals0;
1286 c7x::double_vec maxVals1;
1289 size_t width = c7x::element_count_of<c7x::double_vec>::value;
1292 c7x::double_vec maxVals;
1294 if (length <= width) {
1295 maxVals = c7x::strm_eng<0, c7x::double_vec>::get_adv();
1297 for (
size_t i = length; i < width; i++) {
1298 maxVals.s[i] = std::numeric_limits<double>::lowest();
1302 else if (length < 2 * width) {
1303 maxVals0 = c7x::strm_eng<0, c7x::double_vec>::get_adv();
1304 maxVals1 = c7x::strm_eng<1, c7x::double_vec>::get_adv();
1306 size_t remainingElement = length % width;
1307 for (
size_t i = remainingElement; i < width; i++) {
1308 maxVals1.s[i] = std::numeric_limits<double>::lowest();
1310 maskOfMaxs = __cmp_lt_pred(maxVals1, maxVals0);
1311 maxVals = __select(maskOfMaxs, maxVals0, maxVals1);
1312 maxIndices = __select(maskOfMaxs, maxIndices0, maxIndices1);
1316 c7x::double_vec inVec0, inVec1;
1317 __vpred mask0, mask1, maskOfMaxsLarge;
1320 c7x::double_vec maxValsA = std::numeric_limits<double>::lowest();
1321 c7x::double_vec maxValsB = maxValsA;
1324 c7x::double_vec maxValsLarge = std::numeric_limits<double>::lowest();
1329 for (
size_t i = 0; i < numIterations; i += 1) {
1330 inVec0 = c7x::strm_eng<0, c7x::double_vec>::get_adv();
1331 mask0 = __cmp_lt_pred(maxValsA, inVec0);
1333 __select(mask0, inVec0, maxValsA);
1336 __select(mask0, firstHalfIndices, maxIndicesA);
1339 inVec1 = c7x::strm_eng<1, c7x::double_vec>::get_adv();
1340 mask1 = __cmp_lt_pred(maxValsB, inVec1);
1341 maxValsB = __select(mask1, inVec1, maxValsB);
1342 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
1354 int32_t remVecLen = DSPLIB_ceilingDiv(remBlockSize, width);
1355 double *remStart = (
double *) pSrc + length - width;
1358 if (remBlockSize != 0 && remVecLen == 1) {
1361 inVec0 = *(c7x::double_vec *) remStart;
1363 mask0 = __cmp_lt_pred(maxValsA, inVec0);
1365 __select(mask0, inVec0, maxValsA);
1367 maxIndicesA = __select(mask0, firstHalfIndices,
1373 else if (remBlockSize != 0 && remVecLen == 2) {
1377 inVec0 = *(c7x::double_vec *) (remStart - width);
1378 firstHalfIndices = c7x::ulong_vec(length - (2 * width)) +
lastRunOffsetsDp;
1379 mask0 = __cmp_lt_pred(maxValsA, inVec0);
1381 __select(mask0, inVec0, maxValsA);
1383 maxIndicesA = __select(mask0, firstHalfIndices,
1388 inVec1 = *(c7x::double_vec *) remStart;
1390 mask1 = __cmp_lt_pred(maxValsB, inVec1);
1391 maxValsB = __select(mask1, inVec1, maxValsB);
1392 maxIndicesB = __select(mask1, secondHalfIndices, maxIndicesB);
1398 maskOfMaxsLarge = __cmp_lt_pred(maxValsB, maxValsA);
1399 maxValsLarge = __select(maskOfMaxsLarge, maxValsA, maxValsB);
1400 c7x::ulong_vec maxIndicesLarge = __select(maskOfMaxsLarge, maxIndicesA, maxIndicesB);
1402 maxVals = maxValsLarge;
1403 maxIndices = maxIndicesLarge;
#define SE_SE0_PARAM_OFFSET
#define SE_SE1_PARAM_OFFSET
metadata< uint16_t, uint16_t > DSPLIB_maxIndex_loopLogic< uint16_t, uint16_t >(size_t length, void *pSrc)
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< float, uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
metadata< int16_t, uint16_t > DSPLIB_maxIndex_loopLogic< int16_t, uint16_t >(size_t length, void *pSrc)
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
metadata< double, uint64_t > DSPLIB_maxIndex_loopLogic< double, uint64_t >(size_t length, void *pSrc)
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< uint32_t, uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< int32_t, uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_maxIndex_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< double, uint64_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
const c7x::uint_vec jumpFactor
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< int16_t, uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
metadata< float, uint32_t > DSPLIB_maxIndex_loopLogic< float, uint32_t >(size_t length, void *pSrc)
const c7x::ushort_vec jumpFactorShort
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
const c7x::uchar_vec jumpFactorChar
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< uint16_t, uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< int8_t, uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
const c7x::uint_vec lastRunOffsets
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_maxIndex_exec_ci< uint8_t, uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
metadata< uint8_t, uint8_t > DSPLIB_maxIndex_loopLogic< uint8_t, uint8_t >(size_t length, void *pSrc)
const c7x::uchar_vec lastRunOffsetsChar
#define INDEX_UNROLL_FACTOR
const c7x::ushort_vec lastRunOffsetsShort
metadata< T, TIndex > DSPLIB_maxIndex_loopLogic(size_t length, void *pSrc)
This function is the kernel loop helper function for the optimized implementation of the kernel....
const c7x::ulong_vec jumpFactorDp
metadata< int8_t, uint8_t > DSPLIB_maxIndex_loopLogic< int8_t, uint8_t >(size_t length, void *pSrc)
DSPLIB_STATUS DSPLIB_maxIndex_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
const c7x::ulong_vec lastRunOffsetsDp
template DSPLIB_STATUS DSPLIB_maxIndex_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_maxIndex_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_maxIndex.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_MAXINDEX_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t blockSize
Size of input buffer for different batches DSPLIB_maxIndex_init that will be retrieved and used by DS...