47 #include "../common/c71/DSPLIB_inlines.h"
60 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
61 int32_t order = pKerPrivArgs->
order;
62 int32_t strideL = pKerPrivArgs->
stride;
63 int32_t colLstride = strideL /
sizeof(dataType);
65 typedef typename c7x::make_full_vector<dataType>::type vec;
66 uint32_t eleCount = c7x::element_count_of<vec>::value;
68 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
69 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
70 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
71 __SE_ELEDUP SE_ELEDUP = c7x::se_eledup<dataType>::value;
73 __SE_TEMPLATE_v1 seDiagReadParams = __gen_SE_TEMPLATE_v1();
74 seDiagReadParams.ICNT0 = 1;
75 seDiagReadParams.ICNT1 = order;
76 seDiagReadParams.DIM1 = colLstride + 1;
77 seDiagReadParams.DIMFMT = __SE_DIMFMT_2D;
78 seDiagReadParams.ELETYPE = SE_ELETYPE;
79 seDiagReadParams.VECLEN = __SE_VECLEN_1ELEM;
80 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE)) = seDiagReadParams;
82 __SA_TEMPLATE_v1 saWriteXParams = __gen_SA_TEMPLATE_v1();
83 saWriteXParams.ICNT0 = 1;
84 saWriteXParams.ICNT1 = order;
85 saWriteXParams.DIM1 = -1;
86 saWriteXParams.DIMFMT = __SA_DIMFMT_2D;
87 saWriteXParams.VECLEN = SA_VECLEN;
88 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE)) = saWriteXParams;
90 __SE_TEMPLATE_v1 seDivReadParams = __gen_SE_TEMPLATE_v1();
91 seDivReadParams.ICNT0 = order;
92 seDivReadParams.DIMFMT = __SE_DIMFMT_1D;
93 seDivReadParams.ELETYPE = SE_ELETYPE;
94 seDivReadParams.VECLEN = SE_VECLEN;
95 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE)) = seDivReadParams;
97 __SA_TEMPLATE_v1 saDivStoreParams = __gen_SA_TEMPLATE_v1();
98 saDivStoreParams.ICNT0 = order;
99 saDivStoreParams.DIMFMT = __SA_DIMFMT_1D;
100 saDivStoreParams.VECLEN = SA_VECLEN;
101 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE)) = saDivStoreParams;
103 __SE_TEMPLATE_v1 seReadXParams = __gen_SE_TEMPLATE_v1();
104 seReadXParams.ICNT0 = 1;
105 seReadXParams.DIM1 = -1;
106 seReadXParams.DIMFMT = __SE_DIMFMT_2D;
107 seReadXParams.VECLEN = SE_VECLEN;
108 seReadXParams.ELETYPE = SE_ELETYPE;
109 seReadXParams.ELEDUP = SE_ELEDUP;
110 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE)) = seReadXParams;
112 __SE_TEMPLATE_v1 seBlockParams = __gen_SE_TEMPLATE_v1();
113 seBlockParams.ICNT0 = eleCount;
114 seBlockParams.DIM1 = -colLstride;
115 seBlockParams.DIMFMT = __SE_DIMFMT_2D;
116 seBlockParams.ELETYPE = SE_ELETYPE;
117 seBlockParams.VECLEN = SE_VECLEN;
118 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE)) = seBlockParams;
120 __SA_TEMPLATE_v1 saReverseParams = __gen_SA_TEMPLATE_v1();
121 saReverseParams.ICNT0 = eleCount;
122 saReverseParams.DIM1 = -((int32_t)eleCount);
123 saReverseParams.DIMFMT = __SA_DIMFMT_2D;
124 saReverseParams.VECLEN = SA_VECLEN;
125 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE)) = saReverseParams;
132 template <
typename dataType>
149 DSPLIB_cholesky_solver_backSubstitution_init_ci<dataType>(handle);
151 matTransInitArgs.
dimX = pKerPrivArgs->
order;
152 matTransInitArgs.
dimY = pKerPrivArgs->
order;
160 DSPLIB_matTrans_init_ci<dataType>(matTransPrivArgs, bufParamsU, bufParamsScratch, &matTransInitArgs);
191 template <typename dataType, typename V = typename c7x::make_full_vector<dataType>::type>
192 inline void getElement(V inVec, uint32_t index, dataType *element);
193 template <
typename V>
inline void getElement(V inVec, uint32_t index,
float *element)
195 *element = __as_float(__vgetw_vrd(c7x::as_int_vec(inVec), index));
198 template <
typename V>
inline void getElement(V inVec, uint32_t index,
double *element)
200 *element = __as_double(__vgetd_vrd(c7x::as_long_vec(inVec), index));
203 template <
typename dataType>
214 __SE_TEMPLATE_v1 seDivReadParams;
215 __SA_TEMPLATE_v1 saDivStoreParams;
216 __SE_TEMPLATE_v1 seDiagReadParams;
217 __SA_TEMPLATE_v1 saDiagWriteParams;
218 seDiagReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
219 saDiagWriteParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
220 seDivReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
221 saDivStoreParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
223 saDiagWriteParams.DIM1 = 1;
225 typedef typename c7x::make_full_vector<dataType>::type vec;
226 int32_t eleCount = c7x::element_count_of<vec>::value;
227 int32_t nVec = DSPLIB_ceilingDiv(order, eleCount);
230 __SE1_OPEN(pL, seDiagReadParams);
231 __SA1_OPEN(saDiagWriteParams);
234 for (row = 0; row < order; row++) {
235 vec vecDiag = c7x::strm_eng<1, vec>::get_adv();
237 __vpred predDiag = c7x::strm_agen<1, vec>::get_vpred();
238 vec *pStoreDiag = c7x::strm_agen<1, vec>::get_adv(pDiv);
239 __vstore_pred(predDiag, pStoreDiag, vecDiag);
244 __SE0_OPEN(pDiv, seDivReadParams);
245 __SA0_OPEN(saDivStoreParams);
246 dataType TwoP0 = 2.0;
249 for (ii = 0; ii < nVec - 3; ii += 4) {
250 vec v1 = c7x::strm_eng<0, vec>::get_adv();
251 vec v2 = c7x::strm_eng<0, vec>::get_adv();
252 vec v3 = c7x::strm_eng<0, vec>::get_adv();
253 vec v4 = c7x::strm_eng<0, vec>::get_adv();
255 vec yy1 = __recip(v1);
256 yy1 = yy1 * (TwoP0 - v1 * yy1);
257 yy1 = yy1 * (TwoP0 - v1 * yy1);
259 vec yy2 = __recip(v2);
260 yy2 = yy2 * (TwoP0 - v2 * yy2);
261 yy2 = yy2 * (TwoP0 - v2 * yy2);
263 vec yy3 = __recip(v3);
264 yy3 = yy3 * (TwoP0 - v3 * yy3);
265 yy3 = yy3 * (TwoP0 - v3 * yy3);
267 vec yy4 = __recip(v4);
268 yy4 = yy4 * (TwoP0 - v4 * yy4);
269 yy4 = yy4 * (TwoP0 - v4 * yy4);
271 __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
272 vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
273 __vstore_pred(predDiv1, pStoreDiv1, yy1);
275 __vpred predDiv2 = c7x::strm_agen<0, vec>::get_vpred();
276 vec *pStoreDiv2 = c7x::strm_agen<0, vec>::get_adv(pDiv);
277 __vstore_pred(predDiv2, pStoreDiv2, yy2);
279 __vpred predDiv3 = c7x::strm_agen<0, vec>::get_vpred();
280 vec *pStoreDiv3 = c7x::strm_agen<0, vec>::get_adv(pDiv);
281 __vstore_pred(predDiv3, pStoreDiv3, yy3);
283 __vpred predDiv4 = c7x::strm_agen<0, vec>::get_vpred();
284 vec *pStoreDiv4 = c7x::strm_agen<0, vec>::get_adv(pDiv);
285 __vstore_pred(predDiv4, pStoreDiv4, yy4);
289 for (; ii < nVec; ii++) {
290 vec v1 = c7x::strm_eng<0, vec>::get_adv();
292 vec yy1 = __recip(v1);
293 yy1 = yy1 * (TwoP0 - v1 * yy1);
294 yy1 = yy1 * (TwoP0 - v1 * yy1);
296 __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
297 vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
298 __vstore_pred(predDiv1, pStoreDiv1, yy1);
308 __SE_TEMPLATE_v1 seBlockParams;
309 __SE_TEMPLATE_v1 seReadYParams;
310 __SA_TEMPLATE_v1 saWriteYParams;
311 __SA_TEMPLATE_v1 sa1DReadParams;
313 saWriteYParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
314 seReadYParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
315 seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
316 sa1DReadParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
318 saWriteYParams.DIM1 = 1;
319 seReadYParams.DIM1 = 1;
320 seBlockParams.DIM1 = colLstride;
325 dataType *pSA3 = pDiv;
327 __SA1_OPEN(saWriteYParams);
328 __SA2_OPEN(sa1DReadParams);
329 __SA3_OPEN(sa1DReadParams);
332 for (int32_t block = 0; block < nVec; block++) {
334 __vpred predB = c7x::strm_agen<2, vec>::get_vpred();
335 vec *pLoadB = c7x::strm_agen<2, vec>::get_adv(pSA2);
336 vec vecB = __vload_pred(predB, pLoadB);
338 __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
339 vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
340 vec vecDiv = __vload_pred(predDiv, pLoadDiv);
342 int32_t sumRows = block * eleCount;
343 int32_t totalRows = sumRows + eleCount;
345 seBlockParams.ICNT1 = totalRows;
346 seReadYParams.ICNT1 = sumRows;
348 __SE0_OPEN(pSE0, seBlockParams);
350 __SE1_OPEN(pY, seReadYParams);
354 vec vecSum = (vec) 0;
355 vec vecSum1 = (vec) 0;
356 vec vecSum2 = (vec) 0;
357 vec vecSum3 = (vec) 0;
358 vec vecSum4 = (vec) 0;
359 int32_t vertical = 0;
361 for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
362 vec v1 = c7x::strm_eng<0, vec>::get_adv();
363 vec y1 = c7x::strm_eng<1, vec>::get_adv();
366 vec v2 = c7x::strm_eng<0, vec>::get_adv();
367 vec y2 = c7x::strm_eng<1, vec>::get_adv();
370 vec v3 = c7x::strm_eng<0, vec>::get_adv();
371 vec y3 = c7x::strm_eng<1, vec>::get_adv();
374 vec v4 = c7x::strm_eng<0, vec>::get_adv();
375 vec y4 = c7x::strm_eng<1, vec>::get_adv();
380 vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
384 for (vertical = 0; vertical < eleCount; vertical++) {
385 vec v1 = c7x::strm_eng<0, vec>::get_adv();
386 vec result1 = (vecB - vecSum) * vecDiv;
388 vecSum += v1 * (resultEle1);
389 __vpred predYCalc = c7x::strm_agen<1, vec>::get_vpred();
390 vec *pStoreY = c7x::strm_agen<1, vec>::get_adv(pSA1);
391 __vstore_pred(predYCalc, pStoreY, (vec) resultEle1);
419 template <
typename dataType>
430 typedef typename c7x::make_full_vector<dataType>::type vec;
431 int32_t eleCount = c7x::element_count_of<vec>::value;
435 int32_t totalBlocks = order / eleCount;
436 int32_t remainingEle = order - (totalBlocks * eleCount);
438 __SE_TEMPLATE_v1 seBlockParams;
439 __SE_TEMPLATE_v1 seReadXParams;
440 __SA_TEMPLATE_v1 saWriteXParams;
441 __SA_TEMPLATE_v1 saReverseParams;
443 saWriteXParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
444 seReadXParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
445 seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
446 saReverseParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
448 seReadXParams.ICNT0 = 1;
449 seReadXParams.DIM1 = -1;
450 saReverseParams.ICNT1 = totalBlocks;
452 dataType *pLLastElem = &pL[(order - 1) + ((order - 1) * colLstride)];
453 dataType *pXLastElem = &pX[order - 1];
454 dataType *pSE0 = pLLastElem - (eleCount - 1);
455 dataType *pSA1 = pX + order - 1;
456 dataType *pSA2 = pY + order - eleCount;
457 dataType *pSA3 = pDiv + order - eleCount;
459 __SA1_OPEN(saWriteXParams);
461 if (totalBlocks > 0) {
462 __SA2_OPEN(saReverseParams);
463 __SA3_OPEN(saReverseParams);
466 for (int32_t block = 0; block < totalBlocks; block++) {
467 __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
468 vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pSA2);
469 vec vecY = __vload_pred(predY, pLoadY);
471 __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
472 vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
473 vec vecDiv = __vload_pred(predDiv, pLoadDiv);
475 int32_t sumRows = block * eleCount;
476 int32_t totalRows = sumRows + eleCount;
478 seBlockParams.ICNT1 = totalRows;
479 seReadXParams.ICNT1 = sumRows;
481 __SE0_OPEN(pSE0, seBlockParams);
483 __SE1_OPEN(pXLastElem, seReadXParams);
487 vec vecSum = (vec) 0;
488 vec vecSum1 = (vec) 0;
489 vec vecSum2 = (vec) 0;
490 vec vecSum3 = (vec) 0;
491 vec vecSum4 = (vec) 0;
492 int32_t vertical = 0;
495 for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
496 vec v1 = c7x::strm_eng<0, vec>::get_adv();
497 vec x1 = c7x::strm_eng<1, vec>::get_adv();
500 vec v2 = c7x::strm_eng<0, vec>::get_adv();
501 vec x2 = c7x::strm_eng<1, vec>::get_adv();
504 vec v3 = c7x::strm_eng<0, vec>::get_adv();
505 vec x3 = c7x::strm_eng<1, vec>::get_adv();
508 vec v4 = c7x::strm_eng<0, vec>::get_adv();
509 vec x4 = c7x::strm_eng<1, vec>::get_adv();
514 vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
517 uint32_t vecIndex = eleCount - 1;
520 for (vertical = 0; vertical < eleCount; vertical++) {
521 vec v1 = c7x::strm_eng<0, vec>::get_adv();
522 vec result1 = (vecY - vecSum) * vecDiv;
524 vecSum += v1 * (resultEle1);
525 dataType *pStoreX1 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
526 *pStoreX1 = resultEle1;
539 if (remainingEle > 0) {
540 seBlockParams.ICNT0 = remainingEle;
541 seBlockParams.ICNT1 = order;
543 seReadXParams.ICNT1 = order;
545 saReverseParams.ICNT0 = remainingEle;
546 saReverseParams.ICNT1 = 1;
547 saReverseParams.DIM1 = 0;
549 pSE0 = &pL[(order - 1) * colLstride];
550 __SE0_OPEN(pSE0, seBlockParams);
551 __SE1_OPEN(pXLastElem, seReadXParams);
552 __SA2_OPEN(saReverseParams);
553 __SA3_OPEN(saReverseParams);
555 int32_t sumRows = totalBlocks * eleCount;
557 __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
558 vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pY);
559 vec vecY = __vload_pred(predY, pLoadY);
561 __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
562 vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pDiv);
563 vec vecDiv = __vload_pred(predDiv, pLoadDiv);
565 vec vecSum = (vec) 0;
566 vec vecSum1 = (vec) 0;
567 vec vecSum2 = (vec) 0;
568 vec vecSum3 = (vec) 0;
569 vec vecSum4 = (vec) 0;
570 int32_t vertical = 0;
572 for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
573 vec v1 = c7x::strm_eng<0, vec>::get_adv();
574 vec x1 = c7x::strm_eng<1, vec>::get_adv();
577 vec v2 = c7x::strm_eng<0, vec>::get_adv();
578 vec x2 = c7x::strm_eng<1, vec>::get_adv();
581 vec v3 = c7x::strm_eng<0, vec>::get_adv();
582 vec x3 = c7x::strm_eng<1, vec>::get_adv();
585 vec v4 = c7x::strm_eng<0, vec>::get_adv();
586 vec x4 = c7x::strm_eng<1, vec>::get_adv();
590 vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
592 int32_t vecIndex = remainingEle - 1;
595 for (vertical = 0; vertical < remainingEle; vertical++) {
596 vec v1 = c7x::strm_eng<0, vec>::get_adv();
597 vec result = (vecY - vecSum) * vecDiv;
600 vecSum += v1 * (resultEle);
602 dataType *pStoreX = c7x::strm_agen<1, dataType>::get_adv(pSA1);
603 *pStoreX = resultEle;
629 template <
typename dataType>
632 void *restrict pScratch,
644 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
645 int32_t order = pKerPrivArgs->
order;
646 int32_t strideL = pKerPrivArgs->
stride;
647 int32_t colLStride = strideL /
sizeof(dataType);
649 dataType *pLocalU = (dataType *) pU;
650 dataType *pLocaltransU = (dataType *) pScratch;
651 dataType *pLocalY = (dataType *) pY;
652 dataType *pLocalB = (dataType *) pB;
653 dataType *pLocalX = (dataType *) pX;
654 dataType *pLocalDiv = (dataType *) pDiv;
661 DSPLIB_cholesky_solver_forwardSubstitution_ci<dataType>(pLocalU, pLocalY, pLocalB, pLocalDiv, order, colLStride,
667 DSPLIB_matTrans_exec_ci<dataType>(matTransPrivArgs, pLocalU, pLocaltransU);
669 DSPLIB_cholesky_solver_backSubstitution_ci<dataType>(pLocaltransU, pLocalX, pLocalY, pLocalDiv, order, colLStride,
678 void *restrict pScratch,
682 void *restrict pDiv);
686 void *restrict pScratch,
690 void *restrict pDiv);
template void DSPLIB_cholesky_solver_forwardSubstitution_ci< double >(double *pL, double *pX, double *pY, double *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
static void DSPLIB_cholesky_solver_backSubstitution_ci(dataType *pL, dataType *pX, dataType *pY, dataType *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_backSubstitution_init_ci< float >(DSPLIB_kernelHandle handle)
void getElement(V inVec, uint32_t index, dataType *element)
template DSPLIB_STATUS DSPLIB_cholesky_solver_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pU, void *restrict pScratch, void *restrict pY, void *restrict pB, void *restrict pX, void *restrict pDiv)
void DSPLIB_cholesky_solver_backSubstitution_init_ci(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_cholesky_solver_init_ci< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsU, DSPLIB_bufParams2D_t *bufParamsScratch, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsX, DSPLIB_bufParams1D_t *bufParamsDiv, const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs)
static void DSPLIB_cholesky_solver_forwardSubstitution_ci(dataType *pL, dataType *pY, dataType *pB, dataType *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_forwardSubstitution_ci< float >(float *pL, float *pX, float *pY, float *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_backSubstitution_ci< double >(double *pL, double *pX, double *pY, double *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_backSubstitution_init_ci< double >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_cholesky_solver_init_ci< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsU, DSPLIB_bufParams2D_t *bufParamsScratch, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsX, DSPLIB_bufParams1D_t *bufParamsDiv, const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_cholesky_solver_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pU, void *restrict pScratch, void *restrict pY, void *restrict pB, void *restrict pX, void *restrict pDiv)
DSPLIB_STATUS DSPLIB_cholesky_solver_exec_ci(DSPLIB_kernelHandle handle, void *restrict pU, void *restrict pScratch, void *restrict pY, void *restrict pB, void *restrict pX, void *restrict pDiv)
This function is the main execution function for the C7x implementation of the kernel....
template void DSPLIB_cholesky_solver_backSubstitution_ci< float >(float *pL, float *pX, float *pY, float *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
DSPLIB_STATUS DSPLIB_cholesky_solver_init_ci(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsU, DSPLIB_bufParams2D_t *bufParamsScratch, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsX, DSPLIB_bufParams1D_t *bufParamsDiv, const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_cholesky_solver.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
@ DSPLIB_FUNCTION_OPTIMIZED
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
uint32_t dim_x
Width of buffer in X dimension in elements.
uint32_t dim_y
Height of buffer in Y dimension in elements.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t order
Order of input buffer for different batches DSPLIB_cholesky_solver_init that will be retrieved and us...
DSPLIB_matTrans_PrivArgs matTransPrivArgs
Struture to store privArgs for matTrans kernel.
uint8_t bufPblock[DSPLIB_CHOLESKY_SOLVER_IXX_IXX_OXX_PBLOCK_SIZE]
Structure containing the parameters to initialize the kernel.
uint32_t dimX
Size of input data.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideOut
Stride between rows of output data matrix
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...