45 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
47 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
48 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
50 int32_t strideR = pKerPrivArgs->
strideR;
51 int32_t nCols = pKerPrivArgs->
widthR;
52 int32_t colStrideR = strideR /
sizeof(dataType);
54 typedef typename c7x::make_full_vector<dataType>::type vec;
56 uint32_t eleCount = c7x::element_count_of<vec>::value;
57 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
58 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
59 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
62 se0Params.ICNT1 = eleCount;
63 se0Params.DIM1 = colStrideR;
64 se0Params.DIM2 = colStrideR * eleCount;
65 se0Params.DIMFMT = __SE_DIMFMT_3D;
66 se0Params.ELETYPE = SE_ELETYPE;
67 se0Params.VECLEN = SE_VECLEN;
68 if (
sizeof(dataType) == 4) {
69 se0Params.TRANSPOSE = __SE_TRANSPOSE_32BIT;
72 se0Params.TRANSPOSE = __SE_TRANSPOSE_64BIT;
75 sa0Params.ICNT0 = nCols;
76 sa0Params.VECLEN = SA_VECLEN;
77 sa0Params.DIMFMT = __SA_DIMFMT_1D;
79 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (SE_PARAM_SIZE)) = se0Params;
80 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE)) = sa0Params;
91 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
92 int32_t nCols = pKerPrivArgs->
widthR;
93 int32_t strideR = pKerPrivArgs->
strideR;
94 int32_t colStrideR = strideR /
sizeof(dataType);
96 typedef typename c7x::make_full_vector<dataType>::type vec;
99 uint32_t eleCount = c7x::element_count_of<vec>::value;
100 int32_t nTiles_8 = DSPLIB_ceilingDiv(nCols, (eleCount * lenTile8));
102 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
103 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
104 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
105 __SE_ELEDUP SE_ELEDUP = c7x::se_eledup<dataType>::value;
107 __SE_TEMPLATE_v1 seScalarParams = __gen_SE_TEMPLATE_v1();
108 __SE_TEMPLATE_v1 seMatrixParams = __gen_SE_TEMPLATE_v1();
109 __SA_TEMPLATE_v1 saMatrixParams = __gen_SA_TEMPLATE_v1();
111 seScalarParams.ICNT1 = nTiles_8;
112 seScalarParams.DIM1 = 0;
113 seScalarParams.ELEDUP = SE_ELEDUP;
114 seScalarParams.DIMFMT = __SE_DIMFMT_2D;
115 seScalarParams.VECLEN = SE_VECLEN;
116 seScalarParams.ELETYPE = SE_ELETYPE;
118 seMatrixParams.ICNT0 = (eleCount * lenTile8);
119 seMatrixParams.DIM1 = colStrideR;
120 seMatrixParams.ICNT2 = nTiles_8;
121 seMatrixParams.DIM2 = (eleCount * lenTile8);
122 seMatrixParams.DIMFMT = __SE_DIMFMT_3D;
123 seMatrixParams.ELETYPE = SE_ELETYPE;
124 seMatrixParams.VECLEN = SE_VECLEN;
125 seMatrixParams.DECDIM1 = __SE_DECDIM_DIM2;
126 seMatrixParams.DECDIM1_WIDTH = nCols;
128 saMatrixParams.ICNT0 = (eleCount * lenTile8);
129 saMatrixParams.DIM1 = colStrideR;
130 saMatrixParams.ICNT2 = nTiles_8;
131 saMatrixParams.DIM2 = (eleCount * lenTile8);
132 saMatrixParams.DIMFMT = __SA_DIMFMT_3D;
133 saMatrixParams.VECLEN = SA_VECLEN;
134 saMatrixParams.DECDIM1 = __SA_DECDIM_DIM2;
135 saMatrixParams.DECDIM1_WIDTH = nCols;
137 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE)) = seScalarParams;
138 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE)) = seMatrixParams;
139 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE)) = saMatrixParams;
146 template <
typename dataType>
165 kerInitArgsMatTrans.
dimX = bufParamsQ->
dim_x;
166 kerInitArgsMatTrans.
dimY = bufParamsQ->
dim_y;
168 uint32_t strideIn0Elements = pKerPrivArgs->
strideInvA /
sizeof(dataType);
169 uint32_t strideIn1Elements = pKerPrivArgs->
strideR /
sizeof(dataType);
170 uint32_t strideOutElements = pKerPrivArgs->
strideInvA /
sizeof(dataType);
177 pMatMulKerPrivArgs->
M = pKerPrivArgs->
heightR;
178 pMatMulKerPrivArgs->
N = pKerPrivArgs->
widthR;
179 pMatMulKerPrivArgs->
K = pKerPrivArgs->
heightR;
185 DSPLIB_qrd_identity_matrix_generate_init_ci<dataType>(pKerPrivArgs->
heightR, pKerPrivArgs->
strideR,
187 DSPLIB_qrd_inverse_R_invA_init_ci<dataType>(handle);
189 DSPLIB_matTrans_init_ci<dataType>(pMatTransKerPrivArgs, bufParamsQ, bufParamsR, &kerInitArgsMatTrans);
191 DSPLIB_matMul_init_ci<dataType>(pMatMulKerPrivArgs, bufParamsInvA, bufParamsR, bufParamsInvA, &kerInitArgsMatMul);
192 DSPLIB_qrd_inverse_factor_init_ci<dataType>(handle);
215 template <typename dataType, typename vec = typename c7x::make_full_vector<dataType>::type>
222 __SE_TEMPLATE_v1 se0Params,
223 __SE_TEMPLATE_v1 se1Params,
224 __SA_TEMPLATE_v1 sa0Params,
225 __SA_TEMPLATE_v1 sa1Params)
231 uint32_t eleCount = c7x::element_count_of<vec>::value;
233 int32_t nVec = DSPLIB_ceilingDiv(nRows, eleCount);
234 int32_t se0ICNT2 = nVec / 2;
235 int32_t se1ICNT2 = nVec - se0ICNT2;
237 se0Params.ICNT2 = se0ICNT2;
239 se1Params.ICNT2 = se1ICNT2;
242 dataType *pSE1 = pR + (se0ICNT2 * colStrideR * eleCount);
244 __SE1_OPEN(pSE1, se1Params);
246 __SE0_OPEN(pSE0, se0Params);
251 sa0Params.ICNT0 = (se0ICNT2 * eleCount);
253 sa1Params.ICNT0 = nRows - ((se0ICNT2 * eleCount));
255 dataType *pFactorHalf = pFactor + (se0ICNT2 * eleCount);
256 if (sa0Params.ICNT0){
257 __SA0_OPEN(sa0Params);
260 __SA1_OPEN(sa1Params);
262 for (vertical = 0; vertical < se0ICNT2 - 1; vertical += 2) {
263 vec v1 = c7x::strm_eng<0, vec>::get_adv();
264 vec v2 = c7x::strm_eng<1, vec>::get_adv();
265 vec v3 = c7x::strm_eng<0, vec>::get_adv();
266 vec v4 = c7x::strm_eng<1, vec>::get_adv();
268 __vpred pred = c7x::strm_agen<0, vec>::get_vpred();
269 vec *pStoreVec = c7x::strm_agen<0, vec>::get_adv((dataType *) pFactor);
270 __vstore_pred(pred, pStoreVec, scaleVec * v1);
272 pred = c7x::strm_agen<1, vec>::get_vpred();
273 pStoreVec = c7x::strm_agen<1, vec>::get_adv((dataType *) pFactorHalf);
274 __vstore_pred(pred, pStoreVec, scaleVec * v2);
276 pred = c7x::strm_agen<0, vec>::get_vpred();
277 pStoreVec = c7x::strm_agen<0, vec>::get_adv((dataType *) pFactor);
278 __vstore_pred(pred, pStoreVec, scaleVec * v3);
280 pred = c7x::strm_agen<1, vec>::get_vpred();
281 pStoreVec = c7x::strm_agen<1, vec>::get_adv((dataType *) pFactorHalf);
282 __vstore_pred(pred, pStoreVec, scaleVec * v4);
285 for (; vertical < se0ICNT2; vertical++) {
286 vec v1 = c7x::strm_eng<0, vec>::get_adv();
287 vec v2 = c7x::strm_eng<1, vec>::get_adv();
289 __vpred pred = c7x::strm_agen<0, vec>::get_vpred();
290 vec *pStoreVec = c7x::strm_agen<0, vec>::get_adv((dataType *) pFactor);
291 __vstore_pred(pred, pStoreVec, scaleVec * v1);
293 pred = c7x::strm_agen<1, vec>::get_vpred();
294 pStoreVec = c7x::strm_agen<1, vec>::get_adv((dataType *) pFactorHalf);
295 __vstore_pred(pred, pStoreVec, scaleVec * v2);
297 if (se0ICNT2 != se1ICNT2) {
298 vec v1 = c7x::strm_eng<1, vec>::get_adv();
300 __vpred pred = c7x::strm_agen<1, vec>::get_vpred();
301 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv((dataType *) pFactorHalf);
302 __vstore_pred(pred, pStoreVec, scaleVec * v1);
305 if (sa0Params.ICNT0){
320 template float DSPLIB_qrd_inverse_factor_exec_ci<float, typename c7x::make_full_vector<float>::type>(
325 typename c7x::make_full_vector<float>::type scale,
327 __SE_TEMPLATE_v1 se0Params,
328 __SE_TEMPLATE_v1 se1Params,
329 __SA_TEMPLATE_v1 sa0Params,
330 __SA_TEMPLATE_v1 sa1Params);
331 template double DSPLIB_qrd_inverse_factor_exec_ci<double, typename c7x::make_full_vector<double>::type>(
336 typename c7x::make_full_vector<double>::type scale,
338 __SE_TEMPLATE_v1 se0Params,
339 __SE_TEMPLATE_v1 se1Params,
340 __SA_TEMPLATE_v1 sa0Params,
341 __SA_TEMPLATE_v1 sa1Params);
343 template <
typename dataType>
345 dataType *pLocalInvA,
348 int32_t colInvAStride,
353 typedef typename c7x::make_full_vector<dataType>::type vec;
355 __SE_TEMPLATE_v1 se0ParamsFact = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (SE_PARAM_SIZE));
356 __SE_TEMPLATE_v1 se1ParamsFact = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (SE_PARAM_SIZE));
357 __SA_TEMPLATE_v1 sa0ParamsFact = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
358 __SA_TEMPLATE_v1 sa1ParamsFact = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
360 uint32_t eleCount = c7x::element_count_of<vec>::value;
362 __SE_TEMPLATE_v1 seScalarParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
363 __SE_TEMPLATE_v1 seMatrixParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
364 __SA_TEMPLATE_v1 saMatrixParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
365 __SA_TEMPLATE_v1 saRefParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
366 __SA_TEMPLATE_v1 saRefStoreParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
368 int32_t lenTile8 = 8;
369 int32_t lenTile4 = 4;
370 int32_t lenTile2 = 2;
371 int32_t lenTile1 = 1;
373 int32_t nTiles1 = DSPLIB_ceilingDiv(nCols, (eleCount));
374 int32_t nTiles8 = nTiles1 / lenTile8;
375 nTiles1 -= nTiles8 * lenTile8;
376 int32_t nTiles4 = nTiles1 / lenTile4;
377 nTiles1 -= nTiles4 * lenTile4;
378 int32_t nTiles2 = nTiles1 / lenTile2;
379 nTiles1 -= nTiles2 * lenTile2;
381 int32_t remainingCols = nCols;
382 int32_t colLimit8 = nTiles8 * lenTile8 * eleCount;
383 colLimit8 = (remainingCols < colLimit8) ? remainingCols : colLimit8;
385 remainingCols = remainingCols - colLimit8;
386 int32_t colLimit4 = nTiles4 * lenTile4 * eleCount;
387 colLimit4 = (remainingCols < colLimit4) ? remainingCols : colLimit4;
389 remainingCols = remainingCols - colLimit4;
390 int32_t colLimit2 = nTiles2 * lenTile2 * eleCount;
391 colLimit2 = (remainingCols < colLimit2) ? remainingCols : colLimit2;
393 int32_t colLimit1 = remainingCols - colLimit2;
394 seScalarParams.ICNT1 = 2 * (nTiles8 + nTiles4 + nTiles2 + nTiles1);
396 for (int32_t col = nCols - 1; col >= 0; col--) {
397 dataType *pLastR = pLocalR + (colStrideR * col);
398 dataType *pLastInvA = pLocalInvA + (colStrideR * col);
400 dataType diag = pLocalR[col + col * colStrideR];
402 dataType recipScalar = __recip(diag);
403 dataType twoP0 = 2.0;
404 recipScalar = recipScalar * (twoP0 - (diag * recipScalar));
405 recipScalar = recipScalar * (twoP0 - (diag * recipScalar));
407 vec divVec = (vec) recipScalar;
410 DSPLIB_qrd_inverse_factor_exec_ci<dataType, vec>(&pLocalR[col], colStrideR, col, factArray, divVec, pBlock,
411 se0ParamsFact, se1ParamsFact, sa0ParamsFact, sa1ParamsFact);
412 seScalarParams.ICNT0 = col;
413 __SE0_OPEN(factArray, seScalarParams);
416 __SA0_OPEN(saRefParams);
417 __SA2_OPEN(saRefStoreParams);
422 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile8;
423 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
424 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles8;
425 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile8;
426 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit8;
429 __SE1_OPEN(pLocalR, seMatrixParams);
430 __SA1_OPEN(saMatrixParams);
433 for (int32_t tile = 0; tile < nTiles8; tile++) {
434 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
435 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
436 vec sV1 = __vload_pred(lPred, pLoadVec);
438 lPred = c7x::strm_agen<0, vec>::get_vpred();
439 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
440 vec sV2 = __vload_pred(lPred, pLoadVec);
442 lPred = c7x::strm_agen<0, vec>::get_vpred();
443 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
444 vec sV3 = __vload_pred(lPred, pLoadVec);
446 lPred = c7x::strm_agen<0, vec>::get_vpred();
447 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
448 vec sV4 = __vload_pred(lPred, pLoadVec);
450 lPred = c7x::strm_agen<0, vec>::get_vpred();
451 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
452 vec sV5 = __vload_pred(lPred, pLoadVec);
454 lPred = c7x::strm_agen<0, vec>::get_vpred();
455 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
456 vec sV6 = __vload_pred(lPred, pLoadVec);
458 lPred = c7x::strm_agen<0, vec>::get_vpred();
459 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
460 vec sV7 = __vload_pred(lPred, pLoadVec);
462 lPred = c7x::strm_agen<0, vec>::get_vpred();
463 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastR);
464 vec sV8 = __vload_pred(lPred, pLoadVec);
466 for (int32_t vertical = 0; vertical < col; vertical++) {
467 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
469 vec v1 = c7x::strm_eng<1, vec>::get_adv();
470 vec v2 = c7x::strm_eng<1, vec>::get_adv();
471 vec v3 = c7x::strm_eng<1, vec>::get_adv();
472 vec v4 = c7x::strm_eng<1, vec>::get_adv();
473 vec v5 = c7x::strm_eng<1, vec>::get_adv();
474 vec v6 = c7x::strm_eng<1, vec>::get_adv();
475 vec v7 = c7x::strm_eng<1, vec>::get_adv();
476 vec v8 = c7x::strm_eng<1, vec>::get_adv();
478 v1 -= sV1 * scalarDup;
479 v2 -= sV2 * scalarDup;
480 v3 -= sV3 * scalarDup;
481 v4 -= sV4 * scalarDup;
482 v5 -= sV5 * scalarDup;
483 v6 -= sV6 * scalarDup;
484 v7 -= sV7 * scalarDup;
485 v8 -= sV8 * scalarDup;
487 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
488 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
489 __vstore_pred(sPred, pStoreVec, v1);
491 sPred = c7x::strm_agen<1, vec>::get_vpred();
492 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
493 __vstore_pred(sPred, pStoreVec, v2);
495 sPred = c7x::strm_agen<1, vec>::get_vpred();
496 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
497 __vstore_pred(sPred, pStoreVec, v3);
499 sPred = c7x::strm_agen<1, vec>::get_vpred();
500 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
501 __vstore_pred(sPred, pStoreVec, v4);
503 sPred = c7x::strm_agen<1, vec>::get_vpred();
504 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
505 __vstore_pred(sPred, pStoreVec, v5);
507 sPred = c7x::strm_agen<1, vec>::get_vpred();
508 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
509 __vstore_pred(sPred, pStoreVec, v6);
511 sPred = c7x::strm_agen<1, vec>::get_vpred();
512 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
513 __vstore_pred(sPred, pStoreVec, v7);
515 sPred = c7x::strm_agen<1, vec>::get_vpred();
516 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalR);
517 __vstore_pred(sPred, pStoreVec, v8);
529 lPred = c7x::strm_agen<2, vec>::get_vpred();
530 vec *psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
531 __vstore_pred(lPred, psV, sV1);
533 lPred = c7x::strm_agen<2, vec>::get_vpred();
534 psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
535 __vstore_pred(lPred, psV, sV2);
537 lPred = c7x::strm_agen<2, vec>::get_vpred();
538 psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
539 __vstore_pred(lPred, psV, sV3);
541 lPred = c7x::strm_agen<2, vec>::get_vpred();
542 psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
543 __vstore_pred(lPred, psV, sV4);
545 lPred = c7x::strm_agen<2, vec>::get_vpred();
546 psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
547 __vstore_pred(lPred, psV, sV5);
549 lPred = c7x::strm_agen<2, vec>::get_vpred();
550 psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
551 __vstore_pred(lPred, psV, sV6);
553 lPred = c7x::strm_agen<2, vec>::get_vpred();
554 psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
555 __vstore_pred(lPred, psV, sV7);
557 lPred = c7x::strm_agen<2, vec>::get_vpred();
558 psV = c7x::strm_agen<2, vec>::get_adv(pLastR);
559 __vstore_pred(lPred, psV, sV8);
568 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile4;
569 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
570 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles4;
571 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile4;
572 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit4;
574 dataType *pSE1 = pLocalR + colLimit8;
575 dataType *pSA1 = pLocalR + colLimit8;
576 dataType *pSA0 = pLastR;
577 dataType *pSA2 = pLastR;
580 __SE1_OPEN(pSE1, seMatrixParams);
581 __SA1_OPEN(saMatrixParams);
584 for (int32_t tile = 0; tile < nTiles4; tile++) {
585 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
586 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
587 vec sV1 = __vload_pred(lPred, pLoadVec);
589 lPred = c7x::strm_agen<0, vec>::get_vpred();
590 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
591 vec sV2 = __vload_pred(lPred, pLoadVec);
593 lPred = c7x::strm_agen<0, vec>::get_vpred();
594 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
595 vec sV3 = __vload_pred(lPred, pLoadVec);
597 lPred = c7x::strm_agen<0, vec>::get_vpred();
598 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
599 vec sV4 = __vload_pred(lPred, pLoadVec);
601 for (int32_t vertical = 0; vertical < col; vertical++) {
602 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
604 vec v1 = c7x::strm_eng<1, vec>::get_adv();
605 vec v2 = c7x::strm_eng<1, vec>::get_adv();
606 vec v3 = c7x::strm_eng<1, vec>::get_adv();
607 vec v4 = c7x::strm_eng<1, vec>::get_adv();
609 v1 -= sV1 * scalarDup;
610 v2 -= sV2 * scalarDup;
611 v3 -= sV3 * scalarDup;
612 v4 -= sV4 * scalarDup;
614 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
615 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
616 __vstore_pred(sPred, pStoreVec, v1);
618 sPred = c7x::strm_agen<1, vec>::get_vpred();
619 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
620 __vstore_pred(sPred, pStoreVec, v2);
622 sPred = c7x::strm_agen<1, vec>::get_vpred();
623 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
624 __vstore_pred(sPred, pStoreVec, v3);
626 sPred = c7x::strm_agen<1, vec>::get_vpred();
627 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
628 __vstore_pred(sPred, pStoreVec, v4);
636 lPred = c7x::strm_agen<2, vec>::get_vpred();
637 vec *psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
638 __vstore_pred(lPred, psV, sV1);
640 lPred = c7x::strm_agen<2, vec>::get_vpred();
641 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
642 __vstore_pred(lPred, psV, sV2);
644 lPred = c7x::strm_agen<2, vec>::get_vpred();
645 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
646 __vstore_pred(lPred, psV, sV3);
648 lPred = c7x::strm_agen<2, vec>::get_vpred();
649 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
650 __vstore_pred(lPred, psV, sV4);
659 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile2;
660 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
661 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles2;
662 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile2;
663 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit2;
665 dataType *pSE1 = pLocalR + colLimit8 + colLimit4;
666 dataType *pSA1 = pLocalR + colLimit8 + colLimit4;
667 dataType *pSA0 = pLastR;
668 dataType *pSA2 = pLastR;
671 __SE1_OPEN(pSE1, seMatrixParams);
672 __SA1_OPEN(saMatrixParams);
675 for (int32_t tile = 0; tile < nTiles2; tile++) {
676 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
677 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
678 vec sV1 = __vload_pred(lPred, pLoadVec);
680 lPred = c7x::strm_agen<0, vec>::get_vpred();
681 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
682 vec sV2 = __vload_pred(lPred, pLoadVec);
683 for (int32_t vertical = 0; vertical < col; vertical++) {
684 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
686 vec v1 = c7x::strm_eng<1, vec>::get_adv();
687 vec v2 = c7x::strm_eng<1, vec>::get_adv();
689 v1 -= sV1 * scalarDup;
690 v2 -= sV2 * scalarDup;
692 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
693 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
694 __vstore_pred(sPred, pStoreVec, v1);
696 sPred = c7x::strm_agen<1, vec>::get_vpred();
697 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
698 __vstore_pred(sPred, pStoreVec, v2);
704 lPred = c7x::strm_agen<2, vec>::get_vpred();
705 vec *psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
706 __vstore_pred(lPred, psV, sV1);
708 lPred = c7x::strm_agen<2, vec>::get_vpred();
709 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
710 __vstore_pred(lPred, psV, sV2);
718 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile1;
719 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
720 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles1;
721 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile1;
722 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit1;
724 dataType *pSE1 = pLocalR + colLimit8 + colLimit4 + colLimit2;
725 dataType *pSA1 = pLocalR + colLimit8 + colLimit4 + colLimit2;
726 dataType *pSA0 = pLastR;
727 dataType *pSA2 = pLastR;
730 __SE1_OPEN(pSE1, seMatrixParams);
731 __SA1_OPEN(saMatrixParams);
734 for (int32_t tile = 0; tile < nTiles1; tile++) {
735 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
736 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
737 vec sV1 = __vload_pred(lPred, pLoadVec);
739 for (int32_t vertical = 0; vertical < col; vertical++) {
740 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
742 vec v1 = c7x::strm_eng<1, vec>::get_adv();
744 v1 -= sV1 * scalarDup;
746 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
747 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
748 __vstore_pred(sPred, pStoreVec, v1);
753 lPred = c7x::strm_agen<2, vec>::get_vpred();
754 vec *psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
755 __vstore_pred(lPred, psV, sV1);
763 __SA0_OPEN(saRefParams);
764 __SA2_OPEN(saRefStoreParams);
769 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile8;
770 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
771 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles8;
772 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile8;
773 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit8;
776 __SE1_OPEN(pLocalInvA, seMatrixParams);
777 __SA1_OPEN(saMatrixParams);
780 for (int32_t tile = 0; tile < nTiles8; tile++) {
781 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
782 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
783 vec sV1 = __vload_pred(lPred, pLoadVec);
785 lPred = c7x::strm_agen<0, vec>::get_vpred();
786 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
787 vec sV2 = __vload_pred(lPred, pLoadVec);
789 lPred = c7x::strm_agen<0, vec>::get_vpred();
790 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
791 vec sV3 = __vload_pred(lPred, pLoadVec);
793 lPred = c7x::strm_agen<0, vec>::get_vpred();
794 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
795 vec sV4 = __vload_pred(lPred, pLoadVec);
797 lPred = c7x::strm_agen<0, vec>::get_vpred();
798 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
799 vec sV5 = __vload_pred(lPred, pLoadVec);
801 lPred = c7x::strm_agen<0, vec>::get_vpred();
802 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
803 vec sV6 = __vload_pred(lPred, pLoadVec);
805 lPred = c7x::strm_agen<0, vec>::get_vpred();
806 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
807 vec sV7 = __vload_pred(lPred, pLoadVec);
809 lPred = c7x::strm_agen<0, vec>::get_vpred();
810 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pLastInvA);
811 vec sV8 = __vload_pred(lPred, pLoadVec);
813 for (int32_t vertical = 0; vertical < col; vertical++) {
814 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
816 vec v1 = c7x::strm_eng<1, vec>::get_adv();
817 vec v2 = c7x::strm_eng<1, vec>::get_adv();
818 vec v3 = c7x::strm_eng<1, vec>::get_adv();
819 vec v4 = c7x::strm_eng<1, vec>::get_adv();
820 vec v5 = c7x::strm_eng<1, vec>::get_adv();
821 vec v6 = c7x::strm_eng<1, vec>::get_adv();
822 vec v7 = c7x::strm_eng<1, vec>::get_adv();
823 vec v8 = c7x::strm_eng<1, vec>::get_adv();
825 v1 -= sV1 * scalarDup;
826 v2 -= sV2 * scalarDup;
827 v3 -= sV3 * scalarDup;
828 v4 -= sV4 * scalarDup;
829 v5 -= sV5 * scalarDup;
830 v6 -= sV6 * scalarDup;
831 v7 -= sV7 * scalarDup;
832 v8 -= sV8 * scalarDup;
834 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
835 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
836 __vstore_pred(sPred, pStoreVec, v1);
838 sPred = c7x::strm_agen<1, vec>::get_vpred();
839 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
840 __vstore_pred(sPred, pStoreVec, v2);
842 sPred = c7x::strm_agen<1, vec>::get_vpred();
843 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
844 __vstore_pred(sPred, pStoreVec, v3);
846 sPred = c7x::strm_agen<1, vec>::get_vpred();
847 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
848 __vstore_pred(sPred, pStoreVec, v4);
850 sPred = c7x::strm_agen<1, vec>::get_vpred();
851 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
852 __vstore_pred(sPred, pStoreVec, v5);
854 sPred = c7x::strm_agen<1, vec>::get_vpred();
855 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
856 __vstore_pred(sPred, pStoreVec, v6);
858 sPred = c7x::strm_agen<1, vec>::get_vpred();
859 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
860 __vstore_pred(sPred, pStoreVec, v7);
862 sPred = c7x::strm_agen<1, vec>::get_vpred();
863 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pLocalInvA);
864 __vstore_pred(sPred, pStoreVec, v8);
876 lPred = c7x::strm_agen<2, vec>::get_vpred();
877 vec *psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
878 __vstore_pred(lPred, psV, sV1);
880 lPred = c7x::strm_agen<2, vec>::get_vpred();
881 psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
882 __vstore_pred(lPred, psV, sV2);
884 lPred = c7x::strm_agen<2, vec>::get_vpred();
885 psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
886 __vstore_pred(lPred, psV, sV3);
888 lPred = c7x::strm_agen<2, vec>::get_vpred();
889 psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
890 __vstore_pred(lPred, psV, sV4);
892 lPred = c7x::strm_agen<2, vec>::get_vpred();
893 psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
894 __vstore_pred(lPred, psV, sV5);
896 lPred = c7x::strm_agen<2, vec>::get_vpred();
897 psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
898 __vstore_pred(lPred, psV, sV6);
900 lPred = c7x::strm_agen<2, vec>::get_vpred();
901 psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
902 __vstore_pred(lPred, psV, sV7);
904 lPred = c7x::strm_agen<2, vec>::get_vpred();
905 psV = c7x::strm_agen<2, vec>::get_adv(pLastInvA);
906 __vstore_pred(lPred, psV, sV8);
915 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile4;
916 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
917 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles4;
918 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile4;
919 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit4;
921 dataType *pSE1 = pLocalInvA + colLimit8;
922 dataType *pSA1 = pLocalInvA + colLimit8;
923 dataType *pSA0 = pLastInvA;
924 dataType *pSA2 = pLastInvA;
927 __SE1_OPEN(pSE1, seMatrixParams);
928 __SA1_OPEN(saMatrixParams);
931 for (int32_t tile = 0; tile < nTiles4; tile++) {
932 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
933 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
934 vec sV1 = __vload_pred(lPred, pLoadVec);
936 lPred = c7x::strm_agen<0, vec>::get_vpred();
937 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
938 vec sV2 = __vload_pred(lPred, pLoadVec);
940 lPred = c7x::strm_agen<0, vec>::get_vpred();
941 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
942 vec sV3 = __vload_pred(lPred, pLoadVec);
944 lPred = c7x::strm_agen<0, vec>::get_vpred();
945 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
946 vec sV4 = __vload_pred(lPred, pLoadVec);
948 for (int32_t vertical = 0; vertical < col; vertical++) {
949 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
951 vec v1 = c7x::strm_eng<1, vec>::get_adv();
952 vec v2 = c7x::strm_eng<1, vec>::get_adv();
953 vec v3 = c7x::strm_eng<1, vec>::get_adv();
954 vec v4 = c7x::strm_eng<1, vec>::get_adv();
956 v1 -= sV1 * scalarDup;
957 v2 -= sV2 * scalarDup;
958 v3 -= sV3 * scalarDup;
959 v4 -= sV4 * scalarDup;
961 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
962 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
963 __vstore_pred(sPred, pStoreVec, v1);
965 sPred = c7x::strm_agen<1, vec>::get_vpred();
966 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
967 __vstore_pred(sPred, pStoreVec, v2);
969 sPred = c7x::strm_agen<1, vec>::get_vpred();
970 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
971 __vstore_pred(sPred, pStoreVec, v3);
973 sPred = c7x::strm_agen<1, vec>::get_vpred();
974 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
975 __vstore_pred(sPred, pStoreVec, v4);
983 lPred = c7x::strm_agen<2, vec>::get_vpred();
984 vec *psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
985 __vstore_pred(lPred, psV, sV1);
987 lPred = c7x::strm_agen<2, vec>::get_vpred();
988 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
989 __vstore_pred(lPred, psV, sV2);
991 lPred = c7x::strm_agen<2, vec>::get_vpred();
992 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
993 __vstore_pred(lPred, psV, sV3);
995 lPred = c7x::strm_agen<2, vec>::get_vpred();
996 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
997 __vstore_pred(lPred, psV, sV4);
1005 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile2;
1006 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
1007 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles2;
1008 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile2;
1009 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit2;
1011 dataType *pSE1 = pLocalInvA + colLimit8 + colLimit4;
1012 dataType *pSA1 = pLocalInvA + colLimit8 + colLimit4;
1013 dataType *pSA0 = pLastInvA;
1014 dataType *pSA2 = pLastInvA;
1017 __SE1_OPEN(pSE1, seMatrixParams);
1018 __SA1_OPEN(saMatrixParams);
1021 for (int32_t tile = 0; tile < nTiles2; tile++) {
1022 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
1023 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
1024 vec sV1 = __vload_pred(lPred, pLoadVec);
1026 lPred = c7x::strm_agen<0, vec>::get_vpred();
1027 pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
1028 vec sV2 = __vload_pred(lPred, pLoadVec);
1030 for (int32_t vertical = 0; vertical < col; vertical++) {
1031 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
1033 vec v1 = c7x::strm_eng<1, vec>::get_adv();
1034 vec v2 = c7x::strm_eng<1, vec>::get_adv();
1036 v1 -= sV1 * scalarDup;
1037 v2 -= sV2 * scalarDup;
1039 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
1040 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
1041 __vstore_pred(sPred, pStoreVec, v1);
1043 sPred = c7x::strm_agen<1, vec>::get_vpred();
1044 pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
1045 __vstore_pred(sPred, pStoreVec, v2);
1051 lPred = c7x::strm_agen<2, vec>::get_vpred();
1052 vec *psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
1053 __vstore_pred(lPred, psV, sV1);
1055 lPred = c7x::strm_agen<2, vec>::get_vpred();
1056 psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
1057 __vstore_pred(lPred, psV, sV2);
1065 seMatrixParams.ICNT0 = saMatrixParams.ICNT0 = eleCount * lenTile1;
1066 seMatrixParams.ICNT1 = saMatrixParams.ICNT1 = col;
1067 seMatrixParams.ICNT2 = saMatrixParams.ICNT2 = nTiles1;
1068 seMatrixParams.DIM2 = saMatrixParams.DIM2 = eleCount * lenTile1;
1069 seMatrixParams.DECDIM1_WIDTH = saMatrixParams.DECDIM1_WIDTH = colLimit1;
1071 dataType *pSE1 = pLocalInvA + colLimit8 + colLimit4 + colLimit2;
1072 dataType *pSA1 = pLocalInvA + colLimit8 + colLimit4 + colLimit2;
1073 dataType *pSA0 = pLastInvA;
1074 dataType *pSA2 = pLastInvA;
1077 __SE1_OPEN(pSE1, seMatrixParams);
1078 __SA1_OPEN(saMatrixParams);
1081 for (int32_t tile = 0; tile < nTiles1; tile++) {
1082 __vpred lPred = c7x::strm_agen<0, vec>::get_vpred();
1083 vec *pLoadVec = c7x::strm_agen<0, vec>::get_adv(pSA0);
1084 vec sV1 = __vload_pred(lPred, pLoadVec);
1086 for (int32_t vertical = 0; vertical < col; vertical++) {
1087 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
1089 vec v1 = c7x::strm_eng<1, vec>::get_adv();
1091 v1 -= sV1 * scalarDup;
1093 __vpred sPred = c7x::strm_agen<1, vec>::get_vpred();
1094 vec *pStoreVec = c7x::strm_agen<1, vec>::get_adv(pSA1);
1095 __vstore_pred(sPred, pStoreVec, v1);
1100 lPred = c7x::strm_agen<2, vec>::get_vpred();
1101 vec *psV = c7x::strm_agen<2, vec>::get_adv(pSA2);
1102 __vstore_pred(lPred, psV, sV1);
1119 int32_t colInvAStride,
1126 int32_t colInvAStride,
1130 template <
typename dataType>
1134 void *restrict pInvA,
1135 void *restrict pInvAScratch,
1136 void *restrict pScratch)
1143 int32_t strideR = pKerPrivArgs->
strideR;
1144 int32_t heightR = pKerPrivArgs->
heightR;
1145 int32_t widthR = pKerPrivArgs->
widthR;
1146 int32_t strideInvA = pKerPrivArgs->
strideInvA;
1147 int32_t dataSize =
sizeof(dataType);
1148 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
1153 dataType *pLocalQ = (dataType *) pQ;
1154 dataType *pLocalR = (dataType *) pR;
1155 dataType *pLocalInvA = (dataType *) pInvA;
1156 dataType *pLocalInvAScratch = (dataType *) pInvAScratch;
1157 dataType *pFactArray = (dataType *) pScratch;
1159 int32_t colStrideR = strideR / dataSize;
1160 int32_t colInvAStride = strideInvA / dataSize;
1162 DSPLIB_DEBUGPRINTFN(0,
"pLocalQ: %p pLocalR: %p pLocalInvA: %p widthR: %d heightR: %d\n", pLocalQ, pLocalR,
1163 pLocalInvA, widthR, heightR);
1171 DSPLIB_qrd_identity_matrix_generate_exec_ci<dataType>(pLocalInvAScratch, heightR, colInvAStride, pBlock);
1172 DSPLIB_qrd_inverse_R_invA_exec_ci<dataType>(pLocalR, pLocalInvAScratch, widthR, colStrideR, colInvAStride,
1173 pFactArray, pBlock);
1175 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs, pLocalQ, pLocalR);
1176 DSPLIB_matMul_exec_ci<dataType>(pMatMulKerPrivArgs, pLocalInvAScratch, pLocalR, pLocalInvA);
1186 void *restrict pInvA,
1187 void *restrict pInvScratch,
1188 void *restrict pScratch);
1193 void *restrict pInvA,
1194 void *restrict pInvScratch,
1195 void *restrict pScratch);
template DSPLIB_STATUS DSPLIB_qrd_inverse_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pInvA, void *restrict pInvScratch, void *restrict pScratch)
void DSPLIB_qrd_inverse_R_invA_exec_ci(dataType *pLocalR, dataType *pLocalInvA, int32_t nCols, int32_t colStrideR, int32_t colInvAStride, dataType *factArray, uint8_t *pBlock)
static dataType DSPLIB_qrd_inverse_factor_exec_ci(dataType *pR, int32_t colStrideR, int32_t nRows, dataType *pFactor, vec scaleVec, uint8_t *pBlock, __SE_TEMPLATE_v1 se0Params, __SE_TEMPLATE_v1 se1Params, __SA_TEMPLATE_v1 sa0Params, __SA_TEMPLATE_v1 sa1Params)
template void DSPLIB_qrd_inverse_R_invA_init_ci< double >(DSPLIB_kernelHandle handle)
DSPLIB_STATUS DSPLIB_qrd_inverse_init_ci(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams2D_t *bufParamsInvA, DSPLIB_bufParams2D_t *bufParamsInvAFinal, const DSPLIB_qrdInvInitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template void DSPLIB_qrd_inverse_factor_init_ci< float >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_qrd_inverse_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pInvA, void *restrict pInvScratch, void *restrict pScratch)
template void DSPLIB_qrd_inverse_R_invA_exec_ci< float >(float *pLocalR, float *pLocalInvA, int32_t nCols, int32_t colStrideR, int32_t colInvAStride, float *factArray, uint8_t *pBlock)
DSPLIB_STATUS DSPLIB_qrd_inverse_exec_ci(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pInvA, void *restrict pInvAScratch, void *restrict pScratch)
This function is the main execution function for the C7x implementation of the kernel....
void DSPLIB_qrd_inverse_R_invA_init_ci(DSPLIB_kernelHandle handle)
template void DSPLIB_qrd_inverse_R_invA_init_ci< float >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_qrd_inverse_init_ci< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams2D_t *bufParamsInvA, DSPLIB_bufParams2D_t *bufParamsInvAFinal, const DSPLIB_qrdInvInitArgs *pKerInitArgs)
template void DSPLIB_qrd_inverse_factor_init_ci< double >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_qrd_inverse_init_ci< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams2D_t *bufParamsInvA, DSPLIB_bufParams2D_t *bufParamsInvAFinal, const DSPLIB_qrdInvInitArgs *pKerInitArgs)
void DSPLIB_qrd_inverse_factor_init_ci(DSPLIB_kernelHandle handle)
template void DSPLIB_qrd_inverse_R_invA_exec_ci< double >(double *pLocalR, double *pLocalInvA, int32_t nCols, int32_t colStrideR, int32_t colInvAStride, double *factArray, uint8_t *pBlock)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_qrd_inverse.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
uint32_t dim_x
Width of buffer in X dimension in elements.
uint32_t dim_y
Height of buffer in Y dimension in elements.
Structure containing the parameters to initialize the kernel.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideIn1Elements
int32_t strideIn0Elements
int32_t strideOutElements
Structure containing the parameters to initialize the kernel.
uint32_t dimX
Size of input data.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideOut
Stride between rows of output data matrix
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...
Structure containing the parameters to initialize the kernel.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
uint32_t heightR
Height of input data matrix
DSPLIB_matMul_PrivArgs pMatMulKerPrivArgs
Privargs for the matMul kernel.
int32_t strideR
Stride between rows of R output data matrix
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs
Privargs for the matTrans kernel.
uint8_t bufPblock[DSPLIB_QRD_INVERSE_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
uint32_t widthR
Size of input buffer for different batches DSPLIB_qrd_inverse_init that will be retrieved and used by...
int32_t strideInvA
Stride between rows of input data matrix