37 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
38 int32_t nCols = pKerPrivArgs->
widthR;
39 int32_t nRows = pKerPrivArgs->
widthR;
40 int32_t strideQ = pKerPrivArgs->
strideQ;
41 int32_t colStrideQ = strideQ /
sizeof(dataType);
43 typedef typename c7x::make_full_vector<dataType>::type vec;
46 uint32_t eleCount = c7x::element_count_of<vec>::value;
47 int32_t nTiles_8 = DSPLIB_ceilingDiv(nCols, (eleCount * lenTile8));
49 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
50 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
51 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
52 __SE_ELEDUP SE_ELEDUP = c7x::se_eledup<dataType>::value;
54 __SE_TEMPLATE_v1 seScalarParams = __gen_SE_TEMPLATE_v1();
55 __SE_TEMPLATE_v1 seMatrixParams = __gen_SE_TEMPLATE_v1();
56 __SA_TEMPLATE_v1 saMatrixParams = __gen_SA_TEMPLATE_v1();
57 __SA_TEMPLATE_v1 saRefParams = __gen_SA_TEMPLATE_v1();
59 seScalarParams.DIM1 = 0;
60 seScalarParams.ELEDUP = SE_ELEDUP;
61 seScalarParams.DIMFMT = __SE_DIMFMT_2D;
62 seScalarParams.VECLEN = SE_VECLEN;
63 seScalarParams.ELETYPE = SE_ELETYPE;
64 seScalarParams.ICNT1 = nTiles_8;
65 seScalarParams.ICNT0 = nRows;
67 seMatrixParams.ICNT0 = (eleCount * lenTile8);
68 seMatrixParams.DIM1 = colStrideQ;
69 seMatrixParams.DIM2 = (eleCount * lenTile8);
70 seMatrixParams.DIMFMT = __SE_DIMFMT_3D;
71 seMatrixParams.ELETYPE = SE_ELETYPE;
72 seMatrixParams.VECLEN = SE_VECLEN;
73 seMatrixParams.DECDIM1 = __SE_DECDIM_DIM2;
74 seMatrixParams.ICNT2 = nTiles_8;
75 seMatrixParams.DECDIM1_WIDTH = nCols;
76 seMatrixParams.ICNT1 = nRows;
78 saMatrixParams.ICNT0 = (eleCount * lenTile8);
79 saMatrixParams.DIM1 = colStrideQ;
80 saMatrixParams.DIM2 = (eleCount * lenTile8);
81 saMatrixParams.DIMFMT = __SA_DIMFMT_3D;
82 saMatrixParams.VECLEN = SA_VECLEN;
83 saMatrixParams.DECDIM1 = __SA_DECDIM_DIM2;
84 saMatrixParams.ICNT2 = nTiles_8;
85 saMatrixParams.DECDIM1_WIDTH = nCols;
86 saMatrixParams.ICNT1 = nRows;
88 saRefParams.ICNT0 = nCols;
89 saRefParams.DIMFMT = __SA_DIMFMT_1D;
90 saRefParams.VECLEN = SA_VECLEN;
92 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE)) = seScalarParams;
93 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE)) = seMatrixParams;
95 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE)) = saMatrixParams;
96 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE)) = saRefParams;
103 template <
typename dataType>
112 typedef typename c7x::make_full_vector<dataType>::type vec;
114 __SE_TEMPLATE_v1 seScalarParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
115 __SE_TEMPLATE_v1 seMatrixParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
116 __SA_TEMPLATE_v1 saMatrixParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
117 __SA_TEMPLATE_v1 saRefParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
119 uint32_t eleCount = c7x::element_count_of<vec>::value;
121 int32_t lenTile8 = 8;
122 int32_t nTiles_8 = DSPLIB_ceilingDiv(nCols, (eleCount * lenTile8));
124 __SE0_OPEN(pLocalB, seScalarParams);
125 __SE1_OPEN(pLocalQ, seMatrixParams);
126 __SA1_OPEN(saMatrixParams);
127 __SA0_OPEN(saRefParams);
128 __SA2_OPEN(saRefParams);
131 for (int32_t tile = 0; tile < nTiles_8; tile++) {
140 for (int32_t vertical = 0; vertical < nRows; vertical++) {
141 vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
143 vec v1 = c7x::strm_eng<1, vec>::get_adv();
144 vec v2 = c7x::strm_eng<1, vec>::get_adv();
145 vec v3 = c7x::strm_eng<1, vec>::get_adv();
146 vec v4 = c7x::strm_eng<1, vec>::get_adv();
147 vec v5 = c7x::strm_eng<1, vec>::get_adv();
148 vec v6 = c7x::strm_eng<1, vec>::get_adv();
149 vec v7 = c7x::strm_eng<1, vec>::get_adv();
150 vec v8 = c7x::strm_eng<1, vec>::get_adv();
152 sV1 += v1 * scalarDup;
153 sV2 += v2 * scalarDup;
154 sV3 += v3 * scalarDup;
155 sV4 += v4 * scalarDup;
156 sV5 += v5 * scalarDup;
157 sV6 += v6 * scalarDup;
158 sV7 += v7 * scalarDup;
159 sV8 += v8 * scalarDup;
162 __vpred lPred = c7x::strm_agen<2, vec>::get_vpred();
163 vec *psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
164 __vstore_pred(lPred, psV, sV1);
166 lPred = c7x::strm_agen<2, vec>::get_vpred();
167 psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
168 __vstore_pred(lPred, psV, sV2);
170 lPred = c7x::strm_agen<2, vec>::get_vpred();
171 psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
172 __vstore_pred(lPred, psV, sV3);
174 lPred = c7x::strm_agen<2, vec>::get_vpred();
175 psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
176 __vstore_pred(lPred, psV, sV4);
178 lPred = c7x::strm_agen<2, vec>::get_vpred();
179 psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
180 __vstore_pred(lPred, psV, sV5);
182 lPred = c7x::strm_agen<2, vec>::get_vpred();
183 psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
184 __vstore_pred(lPred, psV, sV6);
186 lPred = c7x::strm_agen<2, vec>::get_vpred();
187 psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
188 __vstore_pred(lPred, psV, sV7);
190 lPred = c7x::strm_agen<2, vec>::get_vpred();
191 psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
192 __vstore_pred(lPred, psV, sV8);
218 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
219 int32_t nRows = pKerPrivArgs->
widthR;
220 int32_t strideR = pKerPrivArgs->
strideR;
221 int32_t colRstride = strideR /
sizeof(dataType);
223 typedef typename c7x::make_full_vector<dataType>::type vec;
224 int32_t eleCount = c7x::element_count_of<vec>::value;
226 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
227 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
228 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
230 __SE_TEMPLATE_v1 seDivReadParams = __gen_SE_TEMPLATE_v1();
231 seDivReadParams.ICNT0 = nRows;
232 seDivReadParams.DIMFMT = __SE_DIMFMT_1D;
233 seDivReadParams.ELETYPE = SE_ELETYPE;
234 seDivReadParams.VECLEN = SE_VECLEN;
236 __SE_TEMPLATE_v1 seBlockParams = __gen_SE_TEMPLATE_v1();
237 __SA_TEMPLATE_v1 saWriteXParams = __gen_SA_TEMPLATE_v1();
238 __SA_TEMPLATE_v1 saReverseParams = __gen_SA_TEMPLATE_v1();
240 seBlockParams.ICNT0 = eleCount;
241 seBlockParams.DIM1 = -colRstride;
242 seBlockParams.DIMFMT = __SE_DIMFMT_2D;
243 seBlockParams.ELETYPE = SE_ELETYPE;
244 seBlockParams.VECLEN = SE_VECLEN;
246 saWriteXParams.ICNT0 = 1;
247 saWriteXParams.ICNT1 = nRows;
248 saWriteXParams.DIM1 = -1;
249 saWriteXParams.DIMFMT = __SA_DIMFMT_2D;
250 saWriteXParams.VECLEN = SA_VECLEN;
252 saReverseParams.ICNT0 = eleCount;
253 saReverseParams.DIM1 = -eleCount;
254 saReverseParams.DIMFMT = __SA_DIMFMT_2D;
255 saReverseParams.VECLEN = SA_VECLEN;
257 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE)) = seDivReadParams;
258 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE)) = seBlockParams;
259 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE)) = saWriteXParams;
260 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (7 * SE_PARAM_SIZE)) = saReverseParams;
262 __SE_TEMPLATE_v1 seDiagReadParams = __gen_SE_TEMPLATE_v1();
263 seDiagReadParams.ICNT0 = 1;
264 seDiagReadParams.ICNT1 = nRows;
265 seDiagReadParams.DIM1 = colRstride + 1;
266 seDiagReadParams.DIMFMT = __SE_DIMFMT_2D;
267 seDiagReadParams.ELETYPE = SE_ELETYPE;
268 seDiagReadParams.VECLEN = __SE_VECLEN_1ELEM;
269 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (8 * SE_PARAM_SIZE)) = seDiagReadParams;
276 template <typename dataType, typename V = typename c7x::make_full_vector<dataType>::type>
277 inline void getElement(V inVec, uint32_t index, dataType *element);
278 template <
typename V>
inline void getElement(V inVec, uint32_t index,
float *element)
280 *element = __as_float(__vgetw_vrd(c7x::as_int_vec(inVec), index));
283 template <
typename V>
inline void getElement(V inVec, uint32_t index,
double *element)
285 *element = __as_double(__vgetd_vrd(c7x::as_long_vec(inVec), index));
288 template <
typename dataType>
300 __SE_TEMPLATE_v1 seDivReadParams;
301 __SA_TEMPLATE_v1 saDivStoreParams;
302 __SE_TEMPLATE_v1 seDiagReadParams;
303 __SA_TEMPLATE_v1 saDiagWriteParams;
304 saDivStoreParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
305 seDivReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
306 saDiagWriteParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
307 seDiagReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (8 * SE_PARAM_SIZE));
309 saDiagWriteParams.DIM1 = 1;
311 typedef typename c7x::make_full_vector<dataType>::type vec;
312 int32_t eleCount = c7x::element_count_of<vec>::value;
313 int32_t nVec = DSPLIB_ceilingDiv(nRows, eleCount);
317 __SE1_OPEN(pR, seDiagReadParams);
318 __SA1_OPEN(saDiagWriteParams);
321 for (row = 0; row < nRows; row++) {
322 vec vecDiag = c7x::strm_eng<1, vec>::get_adv();
323 __vpred predDiag = c7x::strm_agen<1, vec>::get_vpred();
324 vec *pStoreDiag = c7x::strm_agen<1, vec>::get_adv(pDiv);
325 __vstore_pred(predDiag, pStoreDiag, vecDiag);
330 __SE0_OPEN(pDiv, seDivReadParams);
331 __SA0_OPEN(saDivStoreParams);
332 dataType TwoP0 = 2.0;
335 for (ii = 0; ii < nVec - 3; ii += 4) {
336 vec v1 = c7x::strm_eng<0, vec>::get_adv();
337 vec v2 = c7x::strm_eng<0, vec>::get_adv();
338 vec v3 = c7x::strm_eng<0, vec>::get_adv();
339 vec v4 = c7x::strm_eng<0, vec>::get_adv();
341 vec yy1 = __recip(v1);
342 yy1 = yy1 * (TwoP0 - v1 * yy1);
343 yy1 = yy1 * (TwoP0 - v1 * yy1);
345 vec yy2 = __recip(v2);
346 yy2 = yy2 * (TwoP0 - v2 * yy2);
347 yy2 = yy2 * (TwoP0 - v2 * yy2);
349 vec yy3 = __recip(v3);
350 yy3 = yy3 * (TwoP0 - v3 * yy3);
351 yy3 = yy3 * (TwoP0 - v3 * yy3);
353 vec yy4 = __recip(v4);
354 yy4 = yy4 * (TwoP0 - v4 * yy4);
355 yy4 = yy4 * (TwoP0 - v4 * yy4);
357 __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
358 vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
359 __vstore_pred(predDiv1, pStoreDiv1, yy1);
361 __vpred predDiv2 = c7x::strm_agen<0, vec>::get_vpred();
362 vec *pStoreDiv2 = c7x::strm_agen<0, vec>::get_adv(pDiv);
363 __vstore_pred(predDiv2, pStoreDiv2, yy2);
365 __vpred predDiv3 = c7x::strm_agen<0, vec>::get_vpred();
366 vec *pStoreDiv3 = c7x::strm_agen<0, vec>::get_adv(pDiv);
367 __vstore_pred(predDiv3, pStoreDiv3, yy3);
369 __vpred predDiv4 = c7x::strm_agen<0, vec>::get_vpred();
370 vec *pStoreDiv4 = c7x::strm_agen<0, vec>::get_adv(pDiv);
371 __vstore_pred(predDiv4, pStoreDiv4, yy4);
374 for (; ii < nVec; ii++) {
375 vec v1 = c7x::strm_eng<0, vec>::get_adv();
377 vec yy1 = __recip(v1);
378 yy1 = yy1 * (TwoP0 - v1 * yy1);
379 yy1 = yy1 * (TwoP0 - v1 * yy1);
381 __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
382 vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
383 __vstore_pred(predDiv1, pStoreDiv1, yy1);
390 int32_t totalBlocks = nCols / eleCount;
391 int32_t remainingEle = nCols - (totalBlocks * eleCount);
393 __SE_TEMPLATE_v1 seBlockParams;
394 __SE_TEMPLATE_v1 seReadXParams;
395 __SA_TEMPLATE_v1 saWriteXParams;
396 __SA_TEMPLATE_v1 saReverseParams;
398 seReadXParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
399 seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
400 saWriteXParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
401 saReverseParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (7 * SE_PARAM_SIZE));
403 seReadXParams.ICNT0 = 1;
404 seReadXParams.DIM1 = -1;
405 saReverseParams.ICNT1 = totalBlocks;
407 dataType *pRLastElem = &pR[(nRows - 1) + ((nRows - 1) * colRstride)];
408 dataType *pXLastElem = &pX[nRows - 1];
409 dataType *pSE0 = pRLastElem - (eleCount - 1);
410 dataType *pSA1 = pX + nRows - 1;
411 dataType *pSA2 = pY + nRows - eleCount;
412 dataType *pSA3 = pDiv + nRows - eleCount;
414 __SA1_OPEN(saWriteXParams);
416 __SA2_OPEN(saReverseParams);
417 __SA3_OPEN(saReverseParams);
420 for (int32_t block = 0; block < totalBlocks; block++) {
421 __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
422 vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pSA2);
423 vec vecY = __vload_pred(predY, pLoadY);
425 __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
426 vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
427 vec vecDiv = __vload_pred(predDiv, pLoadDiv);
429 int32_t sumRows = block * eleCount;
430 int32_t totalRows = sumRows + eleCount;
432 seBlockParams.ICNT1 = totalRows;
433 seReadXParams.ICNT1 = sumRows;
435 __SE0_OPEN(pSE0, seBlockParams);
437 __SE1_OPEN(pXLastElem, seReadXParams);
441 vec vecSum = (vec) 0;
442 vec vecSum1 = (vec) 0;
443 vec vecSum2 = (vec) 0;
444 vec vecSum3 = (vec) 0;
445 vec vecSum4 = (vec) 0;
446 int32_t vertical = 0;
448 for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
449 vec v1 = c7x::strm_eng<0, vec>::get_adv();
450 vec x1 = c7x::strm_eng<1, vec>::get_adv();
453 vec v2 = c7x::strm_eng<0, vec>::get_adv();
454 vec x2 = c7x::strm_eng<1, vec>::get_adv();
457 vec v3 = c7x::strm_eng<0, vec>::get_adv();
458 vec x3 = c7x::strm_eng<1, vec>::get_adv();
461 vec v4 = c7x::strm_eng<0, vec>::get_adv();
462 vec x4 = c7x::strm_eng<1, vec>::get_adv();
466 vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
468 uint32_t vecIndex = eleCount - 1;
469 dataType resultEle1, resultEle2, resultEle3, resultEle4;
471 for (vertical = 0; vertical < eleCount - 3; vertical += 4) {
472 vec v1 = c7x::strm_eng<0, vec>::get_adv();
473 vec result1 = (vecY - vecSum) * vecDiv;
475 vecSum += v1 * (resultEle1);
476 dataType *pStoreX1 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
477 *pStoreX1 = resultEle1;
479 vec v2 = c7x::strm_eng<0, vec>::get_adv();
480 vec result2 = (vecY - vecSum) * vecDiv;
482 vecSum += v2 * (resultEle2);
483 dataType *pStoreX2 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
484 *pStoreX2 = resultEle2;
486 vec v3 = c7x::strm_eng<0, vec>::get_adv();
487 vec result3 = (vecY - vecSum) * vecDiv;
489 vecSum += v3 * (resultEle3);
490 dataType *pStoreX3 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
491 *pStoreX3 = resultEle3;
493 vec v4 = c7x::strm_eng<0, vec>::get_adv();
494 vec result4 = (vecY - vecSum) * vecDiv;
496 vecSum += v4 * (resultEle4);
497 dataType *pStoreX4 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
498 *pStoreX4 = resultEle4;
511 if (remainingEle > 0) {
512 seBlockParams.ICNT0 = remainingEle;
513 seBlockParams.ICNT1 = nRows;
515 seReadXParams.ICNT1 = nRows;
517 saReverseParams.ICNT0 = remainingEle;
518 saReverseParams.ICNT1 = 1;
519 saReverseParams.DIM1 = 0;
521 pSE0 = &pR[(nRows - 1) * colRstride];
522 __SE0_OPEN(pSE0, seBlockParams);
523 __SE1_OPEN(pXLastElem, seReadXParams);
524 __SA2_OPEN(saReverseParams);
525 __SA3_OPEN(saReverseParams);
527 int32_t sumRows = totalBlocks * eleCount;
529 __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
530 vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pY);
531 vec vecY = __vload_pred(predY, pLoadY);
533 __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
534 vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pDiv);
535 vec vecDiv = __vload_pred(predDiv, pLoadDiv);
537 vec vecSum = (vec) 0;
538 vec vecSum1 = (vec) 0;
539 vec vecSum2 = (vec) 0;
540 vec vecSum3 = (vec) 0;
541 vec vecSum4 = (vec) 0;
542 int32_t vertical = 0;
544 for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
545 vec v1 = c7x::strm_eng<0, vec>::get_adv();
546 vec x1 = c7x::strm_eng<1, vec>::get_adv();
549 vec v2 = c7x::strm_eng<0, vec>::get_adv();
550 vec x2 = c7x::strm_eng<1, vec>::get_adv();
553 vec v3 = c7x::strm_eng<0, vec>::get_adv();
554 vec x3 = c7x::strm_eng<1, vec>::get_adv();
557 vec v4 = c7x::strm_eng<0, vec>::get_adv();
558 vec x4 = c7x::strm_eng<1, vec>::get_adv();
562 vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
564 int32_t vecIndex = remainingEle - 1;
567 for (vertical = 0; vertical < remainingEle; vertical++) {
568 vec v1 = c7x::strm_eng<0, vec>::get_adv();
569 vec result = (vecY - vecSum) * vecDiv;
572 vecSum += v1 * (resultEle);
574 dataType *pStoreX = c7x::strm_agen<1, dataType>::get_adv(pSA1);
575 *pStoreX = resultEle;
603 template <
typename dataType>
620 kerInitArgsMatTrans.
dimX = bufParamsR->
dim_x;
621 kerInitArgsMatTrans.
dimY = bufParamsR->
dim_y;
628 DSPLIB_qrd_solver_y_init_ci<dataType>(handle);
629 DSPLIB_matTrans_init_ci<dataType>(pMatTransKerPrivArgs, bufParamsR, bufParamsR, &kerInitArgsMatTrans);
630 DSPLIB_qrd_solver_backSubstitution_init_ci<dataType>(handle);
651 template <
typename dataType>
667 int32_t nRows = pKerPrivArgs->
heightR;
668 int32_t nCols = pKerPrivArgs->
widthR;
669 int32_t strideR = pKerPrivArgs->
strideR;
670 int32_t dataSize =
sizeof(dataType);
671 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
675 dataType *pLocalQ = (dataType *) pQ;
676 dataType *pLocalR = (dataType *) pR;
677 dataType *pLocalB = (dataType *) pB;
678 dataType *pLocalY = (dataType *) pY;
679 dataType *pLocalX = (dataType *) pX;
680 dataType *pLocalD = (dataType *) pD;
681 dataType *pLocalR1 = (dataType *) pR1;
683 int32_t colRstride = strideR / dataSize;
684 DSPLIB_DEBUGPRINTFN(0,
"pLocalQ: %p pLocalR: %p pLocalB: %p pLocalY: %p pLocalX: %p nCols: %d nRows: %d\n", pLocalQ,
685 pLocalR, pLocalB, pLocalY, pLocalX, nCols, nRows);
691 DSPLIB_qrd_solver_y_exec_ci<dataType>(pLocalQ, nCols, nRows, pLocalB, pLocalY, pBlock);
694 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs, pLocalR, pLocalR1);
696 memset(pLocalX, 0,
sizeof(dataType) * nCols);
698 DSPLIB_qrd_solver_backSubstitution_ci<dataType>(pLocalR1, pLocalX, pLocalY, pLocalD, loopCnt, loopCnt, colRstride,
template void DSPLIB_qrd_solver_y_exec_ci< double >(double *pLocalQ, int32_t nCols, int32_t nRows, double *pLocalB, double *pLocalY, uint8_t *pBlock)
template void DSPLIB_qrd_solver_y_init_ci< double >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_qrd_solver_init_ci< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_qrdSolverInitArgs *pKerInitArgs)
template void DSPLIB_qrd_solver_y_exec_ci< float >(float *pLocalQ, int32_t nCols, int32_t nRows, float *pLocalB, float *pLocalY, uint8_t *pBlock)
void getElement(V inVec, uint32_t index, dataType *element)
DSPLIB_STATUS DSPLIB_qrd_solver_exec_ci(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pB, void *restrict pY, void *restrict pX, void *restrict pD, void *restrict pR1)
This function is the main execution function for the C7x implementation of the kernel....
template void DSPLIB_qrd_solver_backSubstitution_ci< float >(float *pR, float *pX, float *pY, float *pDiv, int32_t nRows, int32_t nCols, int32_t colRstride, uint8_t *pBlock)
void DSPLIB_qrd_solver_backSubstitution_init_ci(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_qrd_solver_init_ci< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_qrdSolverInitArgs *pKerInitArgs)
template void DSPLIB_qrd_solver_y_init_ci< float >(DSPLIB_kernelHandle handle)
void DSPLIB_qrd_solver_y_init_ci(DSPLIB_kernelHandle handle)
DSPLIB_STATUS DSPLIB_qrd_solver_init_ci(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_qrdSolverInitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_qrd_solver_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pB, void *restrict pY, void *restrict pX, void *restrict pD, void *restrict pR1)
template void DSPLIB_qrd_solver_backSubstitution_ci< double >(double *pR, double *pX, double *pY, double *pDiv, int32_t nRows, int32_t nCols, int32_t colRstride, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_qrd_solver_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pB, void *restrict pY, void *restrict pX, void *restrict pD, void *restrict pR1)
void DSPLIB_qrd_solver_backSubstitution_ci(dataType *pR, dataType *pX, dataType *pY, dataType *pDiv, int32_t nRows, int32_t nCols, int32_t colRstride, uint8_t *pBlock)
void DSPLIB_qrd_solver_y_exec_ci(dataType *pLocalQ, int32_t nCols, int32_t nRows, dataType *pLocalB, dataType *pLocalY, uint8_t *pBlock)
template void DSPLIB_qrd_solver_backSubstitution_init_ci< float >(DSPLIB_kernelHandle handle)
template void DSPLIB_qrd_solver_backSubstitution_init_ci< double >(DSPLIB_kernelHandle handle)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_qrd_solver.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
uint32_t dim_x
Width of buffer in X dimension in elements.
uint32_t dim_y
Height of buffer in Y dimension in elements.
Structure containing the parameters to initialize the kernel.
uint32_t dimX
Size of input data.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideOut
Stride between rows of output data matrix
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...
Structure containing the parameters to initialize the kernel.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_QRD_SOLVER_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
int32_t strideR
Stride between rows of R output data matrix
int32_t strideQ
Stride between rows of Q output data matrix
uint32_t heightR
Height of input data matrix
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs
Privargs for the matTrans kernel.
uint32_t widthR
Size of input buffer for different batches DSPLIB_qrd_solver_init that will be retrieved and used by ...