55 #define UNROLL_COUNT 4
56 #define MIN_HORIZONTAL_COLUMNS_FOR_UNROLL 2
57 #define NUM_VECS_IN_TILE 6
71 __SE_TEMPLATE_v1 seParamFetchL;
72 __SE_TEMPLATE_v1 seParamFetchR;
73 __SA_TEMPLATE_v1 saParamMulStore;
74 __SA_TEMPLATE_v1 saParamLStore;
76 __SE_ELETYPE SE_ELETYPE;
77 __SE_VECLEN SE_VECLEN;
78 __SA_VECLEN SA_VECLEN;
82 typedef typename c7x::make_full_vector<dataType>::type vec;
84 SE_VECLEN = c7x::se_veclen<vec>::value;
85 SE_ELETYPE = c7x::se_eletype<vec>::value;
86 SA_VECLEN = c7x::sa_veclen<vec>::value;
88 uint32_t eleCount = c7x::element_count_of<vec>::value;
91 uint32_t vecLenValue = eleCount;
92 while (vecLenValue != 0) {
97 int32_t yStride = pKerPrivArgs->
stride /
sizeof(dataType);
101 seParamFetchL = __gen_SE_TEMPLATE_v1();
103 seParamFetchL.ICNT0 = eleCount;
104 seParamFetchL.ICNT1 = 0;
105 seParamFetchL.DIM1 = yStride;
106 seParamFetchL.ICNT2 = 0;
107 seParamFetchL.DIM2 = eleCount << 1;
109 seParamFetchL.ELETYPE = SE_ELETYPE;
110 seParamFetchL.VECLEN = SE_VECLEN;
111 seParamFetchL.DIMFMT = __SE_DIMFMT_3D;
116 seParamFetchR = __gen_SE_TEMPLATE_v1();
118 seParamFetchR.ICNT0 = eleCount;
119 seParamFetchR.ICNT1 = 0;
120 seParamFetchR.DIM1 = yStride;
121 seParamFetchR.ICNT2 = 0;
122 seParamFetchR.DIM2 = eleCount << 1;
124 seParamFetchR.ELETYPE = SE_ELETYPE;
125 seParamFetchR.VECLEN = SE_VECLEN;
126 seParamFetchR.DIMFMT = __SE_DIMFMT_3D;
131 saParamMulStore = __gen_SA_TEMPLATE_v1();
133 saParamMulStore.ICNT0 = pKerPrivArgs->
order;
134 saParamMulStore.VECLEN = __SA_VECLEN_1ELEM;
135 saParamMulStore.DIMFMT = __SA_DIMFMT_1D;
140 saParamLStore = __gen_SA_TEMPLATE_v1();
142 saParamLStore.ICNT0 = pKerPrivArgs->
order;
143 saParamLStore.VECLEN = SA_VECLEN;
144 saParamLStore.DIMFMT = __SA_DIMFMT_1D;
159 template <
typename dataType>
169 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
170 int32_t order = pKerPrivArgs->
order;
171 int32_t strideA = pKerPrivArgs->
stride;
172 int32_t colAStride = strideA /
sizeof(dataType);
174 DSPLIB_cholesky_inplace_c7x_PingPong_init<dataType>(handle);
175 DSPLIB_cholesky_inplace_isPosDefinite_init<dataType>(order, colAStride, pBlock);
201 const dataType Half = 0.5f;
202 const dataType OneP5 = 1.5f;
207 x = x * (OneP5 - (a * x * x * Half));
208 x = x * (OneP5 - (a * x * x * Half));
224 template <
typename dataType>
227 dataType *restrict pInALocal,
228 dataType *restrict pOutULocal,
229 dataType *restrict pMulBuffer)
234 typedef typename c7x::make_full_vector<dataType>::type vec;
235 int32_t eleCount = c7x::element_count_of<vec>::value;
241 __SA_TEMPLATE_v1 saParamALoad = saParamLStore;
243 int32_t order = pKerPrivArgs->
order;
244 int32_t vecLen = eleCount;
246 int32_t row, fetch, lRow;
248 int32_t stride = pKerPrivArgs->
stride;
249 int32_t yStride = stride /
sizeof(dataType);
251 dataType *pLFirstRow = pOutULocal;
252 dataType recipDiagValue;
254 c7x::uchar_vec vMask, vMaskInit;
264 c7x::uchar_vec vMaskIncrement = DSPLIB_cholesky_inplace_getMaskIncrement<dataType>();
266 int32_t blockMax = int32_t((uint32_t) (order + vecLen - 1) >> (uint32_t) shiftForVecLenDiv);
267 int32_t extraRows = vecLen - (int32_t) ((uint32_t) order & (uint32_t) (vecLen - 1));
270 if (extraRows == vecLen) {
274 int32_t elemsPerRow = order;
276 int32_t rowNumber = 0;
278 int32_t elemsPerRowCeil = elemsPerRow + vecLen - 1;
283 int32_t *lezrCountPtr = lezrCount;
284 __SE_LEZR *lezrDimPtr = lezrDim;
287 *lezrDimPtr = __SE_LEZR_OFF;
293 *lezrDimPtr = __SE_LEZR_ICNT1;
298 for (block = 0; block < blockMax - 2; block++) {
300 saParamLStore.ICNT0 = elemsPerRow;
301 saParamALoad.ICNT0 = elemsPerRow;
304 for (row = 0; row < vecLen; row++) {
305 int32_t fetchesPerRow =
306 (int32_t) ((uint32_t) elemsPerRowCeil >> (uint32_t) shiftForVecLenDiv);
307 int32_t leftFetchesPerRow = (int32_t) ((uint32_t) (fetchesPerRow + 1) >> 1u);
308 int32_t rightFetchesPerRow = fetchesPerRow - leftFetchesPerRow;
310 (int32_t) ((uint32_t) rowNumber & (uint32_t) (
UNROLL_COUNT - 1));
313 seParamFetchL.ICNT1 = rowNumber;
314 seParamFetchL.ICNT2 = leftFetchesPerRow;
315 seParamFetchL.LEZR = lezrDim[lezrIndex];
316 seParamFetchL.LEZR_CNT = lezrCount[lezrIndex];
318 seParamFetchR.ICNT1 = rowNumber;
319 seParamFetchR.ICNT2 = rightFetchesPerRow;
320 seParamFetchR.LEZR = lezrDim[lezrIndex];
321 seParamFetchR.LEZR_CNT = lezrCount[lezrIndex];
324 __SE0_OPEN(pLFirstRow, seParamFetchL);
325 __SE1_OPEN(pLFirstRow + vecLen, seParamFetchR);
328 __SA1_OPEN(saParamLStore);
329 __SA2_OPEN(saParamALoad);
331 dataType *ptrL = pOutULocal + offset;
332 dataType *ptrA = pInALocal + offset;
339 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
346 vec vRA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
348 dataType *pMulStore = pMulBuffer;
350 for (lRow = 0; lRow < rowNumber; lRow +=
UNROLL_COUNT) {
351 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
352 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
353 vLSum0 += vLL0 * vLL0Temp.s[0];
354 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
355 vRSum0 += vLR0 * vLL0Temp.s[0];
356 *pMulStore = vLL0Temp.s[0];
359 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
360 vec vLL1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL1)));
361 vLSum1 += vLL1 * vLL1Temp.s[0];
362 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
363 vRSum1 += vLR1 * vLL1Temp.s[0];
364 *pMulStore = vLL1Temp.s[0];
367 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
368 vec vLL2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL2)));
369 vLSum2 += vLL2 * vLL2Temp.s[0];
370 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
371 vRSum2 += vLR2 * vLL2Temp.s[0];
372 *pMulStore = vLL2Temp.s[0];
375 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
376 vec vLL3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL3)));
377 vLSum3 += vLL3 * vLL3Temp.s[0];
378 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
379 vRSum3 += vLR3 * vLL3Temp.s[0];
380 *pMulStore = vLL3Temp.s[0];
391 vec vLDiff = vLA - vLSum0;
394 vec vRDiff = vRA - vRSum0;
396 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
400 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
401 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
402 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
404 __vpred vpStoreR = c7x::strm_agen<1, vec>::get_vpred();
405 vec *outVecPtrR = c7x::strm_agen<1, vec>::get_adv(ptrL);
406 __vstore_pred(vpStoreR, outVecPtrR, vRDiff * recipDiagValue);
410 for (fetch = 0; fetch < leftFetchesPerRow - 1; fetch++) {
416 vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
423 vRA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
425 pMulStore = pMulBuffer;
427 for (lRow = 0; lRow < rowNumber; lRow +=
UNROLL_COUNT) {
428 vec multiplier0 = __vload_dup(pMulStore);
430 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
431 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
432 vLSum0 += vLL0 * multiplier0;
433 vRSum0 += vLR0 * multiplier0;
435 vec multiplier1 = __vload_dup(pMulStore);
437 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
438 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
439 vLSum1 += vLL1 * multiplier1;
440 vRSum1 += vLR1 * multiplier1;
442 vec multiplier2 = __vload_dup(pMulStore);
444 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
445 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
446 vLSum2 += vLL2 * multiplier2;
447 vRSum2 += vLR2 * multiplier2;
449 vec multiplier3 = __vload_dup(pMulStore);
451 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
452 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
453 vLSum3 += vLL3 * multiplier3;
454 vRSum3 += vLR3 * multiplier3;
464 vec vLDiff1 = vLA - vLSum0;
467 vec vRDiff1 = vRA - vRSum0;
469 __vpred vpStoreL1 = c7x::strm_agen<1, vec>::get_vpred();
470 vec *outVecPtrL1 = c7x::strm_agen<1, vec>::get_adv(ptrL);
471 __vstore_pred(vpStoreL1, outVecPtrL1, vLDiff1 * recipDiagValue);
473 __vpred vpStoreR1 = c7x::strm_agen<1, vec>::get_vpred();
474 vec *outVecPtrR1 = c7x::strm_agen<1, vec>::get_adv(ptrL);
475 __vstore_pred(vpStoreR1, outVecPtrR1, vRDiff1 * recipDiagValue);
480 vMask += vMaskIncrement;
483 pLFirstRow += vecLen;
485 elemsPerRow -= vecLen;
486 elemsPerRowCeil -= vecLen;
489 for (; block < blockMax - 1; block++) {
491 saParamLStore.ICNT0 = elemsPerRow;
492 saParamALoad.ICNT0 = elemsPerRow;
495 for (row = 0; row < vecLen; row++) {
497 (int32_t) ((uint32_t) rowNumber & (uint32_t) (
UNROLL_COUNT - 1));
500 seParamFetchL.ICNT1 = rowNumber;
501 seParamFetchL.ICNT2 = 1;
502 seParamFetchL.LEZR = lezrDim[lezrIndex];
503 seParamFetchL.LEZR_CNT = lezrCount[lezrIndex];
505 seParamFetchR.ICNT1 = rowNumber;
506 seParamFetchR.ICNT2 = 1;
507 seParamFetchR.LEZR = lezrDim[lezrIndex];
508 seParamFetchR.LEZR_CNT = lezrCount[lezrIndex];
511 __SE0_OPEN(pLFirstRow, seParamFetchL);
512 __SE1_OPEN(pLFirstRow + vecLen, seParamFetchR);
515 __SA1_OPEN(saParamLStore);
516 __SA2_OPEN(saParamALoad);
518 dataType *ptrL = pOutULocal + offset;
519 dataType *ptrA = pInALocal + offset;
526 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
533 vec vRA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
535 for (lRow = 0; lRow < rowNumber; lRow +=
UNROLL_COUNT) {
536 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
537 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
538 vLSum0 += vLL0 * vLL0Temp.s[0];
539 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
540 vRSum0 += vLR0 * vLL0Temp.s[0];
542 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
543 vec vLL1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL1)));
544 vLSum1 += vLL1 * vLL1Temp.s[0];
545 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
546 vRSum1 += vLR1 * vLL1Temp.s[0];
548 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
549 vec vLL2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL2)));
550 vLSum2 += vLL2 * vLL2Temp.s[0];
551 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
552 vRSum2 += vLR2 * vLL2Temp.s[0];
554 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
555 vec vLL3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL3)));
556 vLSum3 += vLL3 * vLL3Temp.s[0];
557 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
558 vRSum3 += vLR3 * vLL3Temp.s[0];
568 vec vLDiff = vLA - vLSum0;
571 vec vRDiff = vRA - vRSum0;
573 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
577 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
578 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
579 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
581 __vpred vpStoreR = c7x::strm_agen<1, vec>::get_vpred();
582 vec *outVecPtrR = c7x::strm_agen<1, vec>::get_adv(ptrL);
583 __vstore_pred(vpStoreR, outVecPtrR, vRDiff * recipDiagValue);
587 vMask += vMaskIncrement;
590 pLFirstRow += vecLen;
592 elemsPerRow -= vecLen;
595 for (; block < blockMax; block++) {
597 saParamLStore.ICNT0 = elemsPerRow;
598 saParamALoad.ICNT0 = elemsPerRow;
601 seParamFetchL.ICNT0 = vecLen;
602 seParamFetchL.DIM1 = (int32_t) ((uint32_t) yStride << 1u);
603 seParamFetchR.ICNT0 = vecLen;
604 seParamFetchR.DIM1 = (int32_t) ((uint32_t) yStride << 1u);
606 seParamFetchL.DIMFMT = __SE_DIMFMT_2D;
607 seParamFetchR.DIMFMT = __SE_DIMFMT_2D;
611 for (row = 0; row < vecLen - extraRows; row++) {
616 seParamFetchL.ICNT1 = rowNumber;
620 __SE0_OPEN(pLFirstRow, seParamFetchL);
622 __SA1_OPEN(saParamLStore);
623 __SA2_OPEN(saParamALoad);
625 dataType *ptrA = pInALocal + offset;
627 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
630 dataType *ptrL = pOutULocal + offset;
632 vec vLL0 = c7x::strm_eng<0, vec>::get();
633 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
634 vLSum0 += vLL0 * vLL0Temp.s[0];
636 vec vLDiff = vLA - vLSum0;
638 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
642 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
643 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
644 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
650 __SA1_OPEN(saParamLStore);
651 __SA2_OPEN(saParamALoad);
653 dataType *ptrA = pInALocal + offset;
654 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
655 dataType *ptrL = pOutULocal + offset;
658 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
661 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
662 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
663 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
670 int32_t upFetchesPerRow = (int32_t) ((uint32_t) (rowNumber + 1) >> 1u);
671 int32_t downFetchesPerRow = (int32_t) ((uint32_t) rowNumber >> 1u);
674 seParamFetchL.ICNT1 = upFetchesPerRow;
675 seParamFetchR.ICNT1 = downFetchesPerRow;
677 __SE0_OPEN(pLFirstRow, seParamFetchL);
678 __SE1_OPEN(pLFirstRow + yStride, seParamFetchR);
680 __SA1_OPEN(saParamLStore);
681 __SA2_OPEN(saParamALoad);
683 dataType *ptrL = pOutULocal + offset;
684 dataType *ptrA = pInALocal + offset;
696 vec vLA = *(c7x::strm_agen<2, vec>::get_adv(ptrA));
698 for (lRow = 0; lRow < upFetchesPerRow; lRow +=
UNROLL_COUNT) {
699 vec vLL0 = c7x::strm_eng<0, vec>::get_adv();
700 vec vLL0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL0)));
701 vLSum0 += vLL0 * vLL0Temp.s[0];
703 vec vLR0 = c7x::strm_eng<1, vec>::get_adv();
704 vec vLR0Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR0)));
705 vRSum0 += vLR0 * vLR0Temp.s[0];
707 vec vLL1 = c7x::strm_eng<0, vec>::get_adv();
708 vec vLL1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL1)));
709 vLSum1 += vLL1 * vLL1Temp.s[0];
710 vec vLR1 = c7x::strm_eng<1, vec>::get_adv();
711 vec vLR1Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR1)));
712 vRSum1 += vLR1 * vLR1Temp.s[0];
714 vec vLL2 = c7x::strm_eng<0, vec>::get_adv();
715 vec vLL2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL2)));
716 vLSum2 += vLL2 * vLL2Temp.s[0];
717 vec vLR2 = c7x::strm_eng<1, vec>::get_adv();
718 vec vLR2Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR2)));
719 vRSum2 += vLR2 * vLR2Temp.s[0];
721 vec vLL3 = c7x::strm_eng<0, vec>::get_adv();
722 vec vLL3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLL3)));
723 vLSum3 += vLL3 * vLL3Temp.s[0];
724 vec vLR3 = c7x::strm_eng<1, vec>::get_adv();
725 vec vLR3Temp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLR3)));
726 vRSum3 += vLR3 * vLR3Temp.s[0];
738 vec vLDiff = vLA - vLSum2 - vRSum2;
740 vec vLDiffTemp = c7x::reinterpret<vec>(__permute(vMask, __as_uchar64(vLDiff)));
744 __vpred vpStoreL = c7x::strm_agen<1, vec>::get_vpred();
745 vec *outVecPtrL = c7x::strm_agen<1, vec>::get_adv(ptrL);
746 __vstore_pred(vpStoreL, outVecPtrL, vLDiff * recipDiagValue);
751 vMask += vMaskIncrement;
754 pLFirstRow += vecLen;
756 elemsPerRow -= vecLen;
768 template <
typename dataType>
776 dataType *pLocalA = (dataType *) pA;
777 dataType *pLocalMul = (dataType *) pMul;
778 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
779 int32_t order = pKerPrivArgs->
order;
780 int32_t enable_test = pKerPrivArgs->
enableTest;
781 typedef typename c7x::make_full_vector<dataType>::type vec;
782 int32_t eleCount = c7x::element_count_of<vec>::value;
dataType DSPLIB_cholesky_inplace_isPosDefinite(dataType *A, const int32_t order, const int32_t eleCount, uint8_t *pBlock)
#define SA_SA0_PARAM_OFFSET
#define SA_SA1_PARAM_OFFSET
#define SE_SE2_PARAM_OFFSET
#define SE_SE3_PARAM_OFFSET
c7x::uchar_vec DSPLIB_cholesky_inplace_getMaskIncrement< float >()
c7x::uchar_vec DSPLIB_cholesky_inplace_getMaskIncrement< double >()
DSPLIB_STATUS DSPLIB_cholesky_inplace_init_ci(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsA, DSPLIB_bufParams1D_t *bufParamsMul, const DSPLIB_cholesky_inplace_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_cholesky_inplace_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pMul)
DSPLIB_STATUS DSPLIB_cholesky_inplace_c7x_PingPong_init(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_cholesky_inplace_c7x_PingPong_init< float >(DSPLIB_kernelHandle handle)
c7x::uchar_vec DSPLIB_cholesky_inplace_getMaskIncrement()
template DSPLIB_STATUS DSPLIB_cholesky_inplace_init_ci< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsA, DSPLIB_bufParams1D_t *bufParamsMul, const DSPLIB_cholesky_inplace_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_cholesky_inplace_exec_ci(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pMul)
This function is the main execution function for the C7x implementation of the kernel....
DSPLIB_STATUS DSPLIB_cholesky_inplace_c7x_PingPong(int enable_test, DSPLIB_cholesky_inplace_PrivArgs *pKerPrivArgs, dataType *restrict pInALocal, dataType *restrict pOutULocal, dataType *restrict pMulBuffer)
dataType getRecipSqrt(dataType a)
template DSPLIB_STATUS DSPLIB_cholesky_inplace_init_ci< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsA, DSPLIB_bufParams1D_t *bufParamsMul, const DSPLIB_cholesky_inplace_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_cholesky_inplace_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pMul)
template DSPLIB_STATUS DSPLIB_cholesky_inplace_c7x_PingPong_init< double >(DSPLIB_kernelHandle handle)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_cholesky_inplace...
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_CHOLESKY_INPLACE_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t order
Order of input buffer for different batches DSPLIB_cholesky_inplace_init that will be retrieved and u...
int32_t shiftForVecLenDiv