55 #define UNROLL_COUNT 4
56 #define MIN_HORIZONTAL_COLUMNS_FOR_UNROLL 2
57 #define NUM_VECS_IN_TILE 6
71 __SE_TEMPLATE_v1 seParamFetchL;
72 __SE_TEMPLATE_v1 seParamFetchR;
73 __SA_TEMPLATE_v1 saParamMulStore;
74 __SA_TEMPLATE_v1 saParamLStore;
76 __SE_ELETYPE SE_ELETYPE;
77 __SE_VECLEN SE_VECLEN;
78 __SA_VECLEN SA_VECLEN;
82 typedef typename c7x::make_full_vector<dataType>::type vec;
84 SE_VECLEN = c7x::se_veclen<vec>::value;
85 SE_ELETYPE = c7x::se_eletype<vec>::value;
86 SA_VECLEN = c7x::sa_veclen<vec>::value;
88 uint32_t eleCount = c7x::element_count_of<vec>::value;
91 uint32_t vecLenValue = eleCount;
92 while (vecLenValue != 0) {
97 int32_t yStride = pKerPrivArgs->
stride /
sizeof(dataType);
101 seParamFetchL = __gen_SE_TEMPLATE_v1();
103 seParamFetchL.ICNT0 = eleCount;
104 seParamFetchL.ICNT1 = 0;
105 seParamFetchL.DIM1 = yStride;
106 seParamFetchL.ICNT2 = 0;
107 seParamFetchL.DIM2 = eleCount << 1;
109 seParamFetchL.ELETYPE = SE_ELETYPE;
110 seParamFetchL.VECLEN = SE_VECLEN;
111 seParamFetchL.DIMFMT = __SE_DIMFMT_3D;
116 seParamFetchR = __gen_SE_TEMPLATE_v1();
118 seParamFetchR.ICNT0 = eleCount;
119 seParamFetchR.ICNT1 = 0;
120 seParamFetchR.DIM1 = yStride;
121 seParamFetchR.ICNT2 = 0;
122 seParamFetchR.DIM2 = eleCount << 1;
124 seParamFetchR.ELETYPE = SE_ELETYPE;
125 seParamFetchR.VECLEN = SE_VECLEN;
126 seParamFetchR.DIMFMT = __SE_DIMFMT_3D;
131 saParamMulStore = __gen_SA_TEMPLATE_v1();
133 saParamMulStore.ICNT0 = pKerPrivArgs->
order;
134 saParamMulStore.VECLEN = __SA_VECLEN_1ELEM;
135 saParamMulStore.DIMFMT = __SA_DIMFMT_1D;
140 saParamLStore = __gen_SA_TEMPLATE_v1();
142 saParamLStore.ICNT0 = pKerPrivArgs->
order;
143 saParamLStore.VECLEN = SA_VECLEN;
144 saParamLStore.DIMFMT = __SA_DIMFMT_1D;
159 template <
typename dataType>
169 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
170 int32_t order = pKerPrivArgs->
order;
171 int32_t strideA = pKerPrivArgs->
stride;
172 int32_t colAStride = strideA /
sizeof(dataType);
174 DSPLIB_cholesky_c7x_PingPong_init<dataType>(handle);
175 DSPLIB_cholesky_inplace_isPosDefinite_init<dataType>(order, colAStride, pBlock);
201 const dataType Half = 0.5f;
202 const dataType OneP5 = 1.5f;
207 x = x * (OneP5 - (a * x * x * Half));
208 x = x * (OneP5 - (a * x * x * Half));
224 template <
typename dataType>
227 dataType *restrict pInALocal,
228 dataType *restrict pOutULocal,
229 dataType *restrict pMulBuffer)
233 typedef typename c7x::make_full_vector<dataType>::type vec;
234 int32_t eleCount = c7x::element_count_of<vec>::value;
240 __SA_TEMPLATE_v1 saParamALoad = saParamLStore;
242 int32_t order = pKerPrivArgs->
order;
243 int32_t vecLen = eleCount;
245 int32_t row, fetch, lRow;
247 int32_t stride = pKerPrivArgs->
stride;
248 int32_t yStride = stride /
sizeof(dataType);
250 dataType *pLFirstRow = pOutULocal;
251 dataType recipDiagValue;
253 c7x::uchar_vec vMask, vMaskInit;
263 c7x::uchar_vec vMaskIncrement = DSPLIB_cholesky_getMaskIncrement<dataType>();
265 int32_t blockMax = int32_t((uint32_t) (order + vecLen - 1) >> (uint32_t) shiftForVecLenDiv);
266 int32_t extraRows = vecLen - (int32_t) ((uint32_t) order & (uint32_t) (vecLen - 1));
269 if (extraRows == vecLen) {
273 int32_t elemsPerRow = order;
275 int32_t rowNumber = 0;
277 int32_t elemsPerRowCeil = elemsPerRow + vecLen - 1;
282 int32_t *lezrCountPtr = lezrCount;
283 __SE_LEZR *lezrDimPtr = lezrDim;
286 *lezrDimPtr = __SE_LEZR_OFF;
292 *lezrDimPtr = __SE_LEZR_ICNT1;
297 for (block = 0; block < blockMax - 2; block++) {
299 saParamLStore.ICNT0 = elemsPerRow;
300 saParamALoad.ICNT0 = elemsPerRow;
303 for (row = 0; row < vecLen; row++) {
304 int32_t fetchesPerRow =
305 (int32_t) ((uint32_t) elemsPerRowCeil >> (uint32_t) shiftForVecLenDiv);
306 int32_t leftFetchesPerRow = (int32_t) ((uint32_t) (fetchesPerRow + 1) >> 1u);
307 int32_t rightFetchesPerRow = fetchesPerRow - leftFetchesPerRow;
309 (int32_t) ((uint32_t) rowNumber & (uint32_t) (
UNROLL_COUNT - 1));
312 seParamFetchL.ICNT1 = rowNumber;
313 seParamFetchL.ICNT2 = leftFetchesPerRow;
314 seParamFetchL.LEZR = lezrDim[lezrIndex];
315 seParamFetchL.LEZR_CNT = lezrCount[lezrIndex];
317 seParamFetchR.ICNT1 = rowNumber;
318 seParamFetchR.ICNT2 = rightFetchesPerRow;
319 seParamFetchR.LEZR = lezrDim[lezrIndex];
320 seParamFetchR.LEZR_CNT = lezrCount[lezrIndex];
323 __SE0_OPEN(pLFirstRow, seParamFetchL);
324 __SE1_OPEN(pLFirstRow + vecLen, seParamFetchR);
327 __SA1_OPEN(saParamLStore);
328 __SA2_OPEN(saParamALoad);
330 dataType *ptrL = pOutULocal + offset;
331 dataType *ptrA = pInALocal + offset;
338 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
345 vec vRA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
347 dataType *pMulStore = pMulBuffer;
348 for (lRow = 0; lRow < rowNumber; lRow +=
UNROLL_COUNT) {
349 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
350 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
351 vLSum0 += vLL0 * vLL0Temp.s[0];
352 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
353 vRSum0 += vLR0 * vLL0Temp.s[0];
354 *pMulStore = vLL0Temp.s[0];
357 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
358 vec vLL1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL1)));
359 vLSum1 += vLL1 * vLL1Temp.s[0];
360 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
361 vRSum1 += vLR1 * vLL1Temp.s[0];
362 *pMulStore = vLL1Temp.s[0];
365 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
366 vec vLL2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL2)));
367 vLSum2 += vLL2 * vLL2Temp.s[0];
368 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
369 vRSum2 += vLR2 * vLL2Temp.s[0];
370 *pMulStore = vLL2Temp.s[0];
373 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
374 vec vLL3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL3)));
375 vLSum3 += vLL3 * vLL3Temp.s[0];
376 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
377 vRSum3 += vLR3 * vLL3Temp.s[0];
378 *pMulStore = vLL3Temp.s[0];
388 vec vLDiff = vLA - vLSum0;
391 vec vRDiff = vRA - vRSum0;
393 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
397 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
398 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
399 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
401 __vpred vpStoreR = c7x::strm_agen<1, vec>::get_vpred();
402 vec *outVecPtrR = c7x::strm_agen<1, vec>::get_adv(ptrL);
403 __vstore_pred(vpStoreR, outVecPtrR, vRDiff * recipDiagValue);
406 for (fetch = 0; fetch < leftFetchesPerRow - 1; fetch++) {
412 vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
419 vRA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
421 pMulStore = pMulBuffer;
422 for (lRow = 0; lRow < rowNumber; lRow +=
UNROLL_COUNT) {
423 vec multiplier0 = __vload_dup(pMulStore);
425 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
426 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
427 vLSum0 += vLL0 * multiplier0;
428 vRSum0 += vLR0 * multiplier0;
430 vec multiplier1 = __vload_dup(pMulStore);
432 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
433 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
434 vLSum1 += vLL1 * multiplier1;
435 vRSum1 += vLR1 * multiplier1;
437 vec multiplier2 = __vload_dup(pMulStore);
439 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
440 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
441 vLSum2 += vLL2 * multiplier2;
442 vRSum2 += vLR2 * multiplier2;
444 vec multiplier3 = __vload_dup(pMulStore);
446 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
447 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
448 vLSum3 += vLL3 * multiplier3;
449 vRSum3 += vLR3 * multiplier3;
458 vec vLDiff1 = vLA - vLSum0;
461 vec vRDiff1 = vRA - vRSum0;
463 __vpred vpStoreL1 = c7x::strm_agen<1, vec>::get_vpred();
464 vec *outVecPtrL1 = c7x::strm_agen<1, vec>::get_adv(ptrL);
465 __vstore_pred(vpStoreL1, outVecPtrL1, vLDiff1 * recipDiagValue);
467 __vpred vpStoreR1 = c7x::strm_agen<1, vec>::get_vpred();
468 vec *outVecPtrR1 = c7x::strm_agen<1, vec>::get_adv(ptrL);
469 __vstore_pred(vpStoreR1, outVecPtrR1, vRDiff1 * recipDiagValue);
474 vMask += vMaskIncrement;
476 pLFirstRow += vecLen;
478 elemsPerRow -= vecLen;
479 elemsPerRowCeil -= vecLen;
482 for (; block < blockMax - 1; block++) {
484 saParamLStore.ICNT0 = elemsPerRow;
485 saParamALoad.ICNT0 = elemsPerRow;
488 for (row = 0; row < vecLen; row++) {
490 (int32_t) ((uint32_t) rowNumber & (uint32_t) (
UNROLL_COUNT - 1));
493 seParamFetchL.ICNT1 = rowNumber;
494 seParamFetchL.ICNT2 = 1;
495 seParamFetchL.LEZR = lezrDim[lezrIndex];
496 seParamFetchL.LEZR_CNT = lezrCount[lezrIndex];
498 seParamFetchR.ICNT1 = rowNumber;
499 seParamFetchR.ICNT2 = 1;
500 seParamFetchR.LEZR = lezrDim[lezrIndex];
501 seParamFetchR.LEZR_CNT = lezrCount[lezrIndex];
504 __SE0_OPEN(pLFirstRow, seParamFetchL);
505 __SE1_OPEN(pLFirstRow + vecLen, seParamFetchR);
508 __SA1_OPEN(saParamLStore);
509 __SA2_OPEN(saParamALoad);
510 dataType *ptrL = pOutULocal + offset;
511 dataType *ptrA = pInALocal + offset;
518 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
525 vec vRA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
526 for (lRow = 0; lRow < rowNumber; lRow +=
UNROLL_COUNT) {
527 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
528 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
529 vLSum0 += vLL0 * vLL0Temp.s[0];
530 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
531 vRSum0 += vLR0 * vLL0Temp.s[0];
533 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
534 vec vLL1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL1)));
535 vLSum1 += vLL1 * vLL1Temp.s[0];
536 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
537 vRSum1 += vLR1 * vLL1Temp.s[0];
539 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
540 vec vLL2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL2)));
541 vLSum2 += vLL2 * vLL2Temp.s[0];
542 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
543 vRSum2 += vLR2 * vLL2Temp.s[0];
545 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
546 vec vLL3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL3)));
547 vLSum3 += vLL3 * vLL3Temp.s[0];
548 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
549 vRSum3 += vLR3 * vLL3Temp.s[0];
558 vec vLDiff = vLA - vLSum0;
561 vec vRDiff = vRA - vRSum0;
563 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
567 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
568 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
569 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
571 __vpred vpStoreR = c7x::strm_agen<1, vec>::get_vpred();
572 vec *outVecPtrR = c7x::strm_agen<1, vec>::get_adv(ptrL);
573 __vstore_pred(vpStoreR, outVecPtrR, vRDiff * recipDiagValue);
577 vMask += vMaskIncrement;
580 pLFirstRow += vecLen;
582 elemsPerRow -= vecLen;
585 for (; block < blockMax; block++) {
587 saParamLStore.ICNT0 = elemsPerRow;
588 saParamALoad.ICNT0 = elemsPerRow;
591 seParamFetchL.ICNT0 = vecLen;
592 seParamFetchL.DIM1 = (int32_t) ((uint32_t) yStride << 1u);
593 seParamFetchR.ICNT0 = vecLen;
594 seParamFetchR.DIM1 = (int32_t) ((uint32_t) yStride << 1u);
596 seParamFetchL.DIMFMT = __SE_DIMFMT_2D;
597 seParamFetchR.DIMFMT = __SE_DIMFMT_2D;
601 for (row = 0; row < vecLen - extraRows; row++) {
606 seParamFetchL.ICNT1 = rowNumber;
609 __SE0_OPEN(pLFirstRow, seParamFetchL);
611 __SA1_OPEN(saParamLStore);
612 __SA2_OPEN(saParamALoad);
614 dataType *ptrA = pInALocal + offset;
616 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
619 dataType *ptrL = pOutULocal + offset;
621 vec vLL0 = c7x::strm_eng<0, vec>::get();
622 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
623 vLSum0 += vLL0 * vLL0Temp.s[0];
625 vec vLDiff = vLA - vLSum0;
627 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
631 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
632 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
633 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
640 __SA1_OPEN(saParamLStore);
641 __SA2_OPEN(saParamALoad);
643 dataType *ptrA = pInALocal + offset;
644 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
645 dataType *ptrL = pOutULocal + offset;
648 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
651 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
652 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
653 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
660 int32_t upFetchesPerRow = (int32_t) ((uint32_t) (rowNumber + 1) >> 1u);
661 int32_t downFetchesPerRow = (int32_t) ((uint32_t) rowNumber >> 1u);
664 seParamFetchL.ICNT1 = upFetchesPerRow;
665 seParamFetchR.ICNT1 = downFetchesPerRow;
667 __SE0_OPEN(pLFirstRow, seParamFetchL);
668 __SE1_OPEN(pLFirstRow + yStride, seParamFetchR);
670 __SA1_OPEN(saParamLStore);
671 __SA2_OPEN(saParamALoad);
672 dataType *ptrL = pOutULocal + offset;
673 dataType *ptrA = pInALocal + offset;
684 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
686 for (lRow = 0; lRow < upFetchesPerRow; lRow +=
UNROLL_COUNT) {
687 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
688 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
689 vLSum0 += vLL0 * vLL0Temp.s[0];
691 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
692 vec vLR0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR0)));
693 vRSum0 += vLR0 * vLR0Temp.s[0];
695 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
696 vec vLL1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL1)));
697 vLSum1 += vLL1 * vLL1Temp.s[0];
698 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
699 vec vLR1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR1)));
700 vRSum1 += vLR1 * vLR1Temp.s[0];
702 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
703 vec vLL2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL2)));
704 vLSum2 += vLL2 * vLL2Temp.s[0];
705 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
706 vec vLR2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR2)));
707 vRSum2 += vLR2 * vLR2Temp.s[0];
709 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
710 vec vLL3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL3)));
711 vLSum3 += vLL3 * vLL3Temp.s[0];
712 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
713 vec vLR3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR3)));
714 vRSum3 += vLR3 * vLR3Temp.s[0];
725 vec vLDiff = vLA - vLSum2 - vRSum2;
727 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
731 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
732 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
733 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
738 vMask += vMaskIncrement;
741 pLFirstRow += vecLen;
743 elemsPerRow -= vecLen;
755 template <
typename dataType>
758 void *restrict pOutU,
759 void *restrict pMulBuffer)
766 dataType *pLocalA = (dataType *) pInA;
767 dataType *pOutULocal = (dataType *) pOutU;
769 dataType *pLocalMul = (dataType *) pMulBuffer;
770 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
771 int32_t order = pKerPrivArgs->
order;
772 int32_t enable_test = pKerPrivArgs->
enableTest;
773 typedef typename c7x::make_full_vector<dataType>::type vec;
774 int32_t eleCount = c7x::element_count_of<vec>::value;
797 void *restrict pOutU,
798 void *restrict pMulBuffer);
802 void *restrict pOutU,
803 void *restrict pMulBuffer);
template DSPLIB_STATUS DSPLIB_cholesky_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pInA, void *restrict pOutU, void *restrict pMulBuffer)
c7x::uchar_vec DSPLIB_cholesky_getMaskIncrement()
c7x::uchar_vec DSPLIB_cholesky_getMaskIncrement< float >()
template DSPLIB_STATUS DSPLIB_cholesky_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_cholesky_InitArgs *pKerInitArgs)
c7x::uchar_vec DSPLIB_cholesky_getMaskIncrement< double >()
DSPLIB_STATUS DSPLIB_cholesky_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_cholesky_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_cholesky_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_cholesky_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_cholesky_c7x_PingPong_init(DSPLIB_kernelHandle handle)
DSPLIB_STATUS DSPLIB_cholesky_exec_ci(DSPLIB_kernelHandle handle, void *restrict pInA, void *restrict pOutU, void *restrict pMulBuffer)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_cholesky_c7x_PingPong_init< double >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_cholesky_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pInA, void *restrict pOutU, void *restrict pMulBuffer)
dataType getRecipSqrt(dataType a)
DSPLIB_STATUS DSPLIB_cholesky_c7x_PingPong(int enable_test, DSPLIB_cholesky_PrivArgs *pKerPrivArgs, dataType *restrict pInALocal, dataType *restrict pOutULocal, dataType *restrict pMulBuffer)
template DSPLIB_STATUS DSPLIB_cholesky_c7x_PingPong_init< float >(DSPLIB_kernelHandle handle)
dataType DSPLIB_cholesky_inplace_isPosDefinite(dataType *A, const int32_t order, const int32_t eleCount, uint8_t *pBlock)
#define SA_SA0_PARAM_OFFSET
#define SA_SA1_PARAM_OFFSET
#define SE_SE2_PARAM_OFFSET
#define SE_SE3_PARAM_OFFSET
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_cholesky.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t order
Order of input buffer for different batches DSPLIB_cholesky_init that will be retrieved and used by D...
uint8_t bufPblock[DSPLIB_cholesky_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t shiftForVecLenDiv