40 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
42 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
43 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
44 __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
46 typedef typename c7x::make_full_vector<dataType>::type vec;
47 int32_t eleCount = c7x::element_count_of<vec>::value;
48 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
49 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
50 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
52 int32_t rowVStride = strideVRow /
sizeof(dataType);
54 se0Params.ICNT0 = eleCount;
55 se0Params.DIM1 = rowVStride;
56 se0Params.DIM2 = eleCount * 2;
57 se0Params.DIMFMT = __SE_DIMFMT_3D;
58 se0Params.ELETYPE = SE_ELETYPE;
59 se0Params.VECLEN = SE_VECLEN;
60 se0Params.DECDIM1 = __SE_DECDIM_DIM2;
62 se1Params.ICNT0 = eleCount;
63 se1Params.DIM1 = -eleCount;
64 se1Params.DIMFMT = __SE_DIMFMT_2D;
65 se1Params.ELETYPE = SE_ELETYPE;
66 se1Params.VECLEN = SE_VECLEN;
68 sa0Params.ICNT0 = eleCount;
69 sa0Params.DIM1 = rowVStride;
70 sa0Params.DIM2 = eleCount * 2;
71 sa0Params.DIMFMT = __SA_DIMFMT_3D;
72 sa0Params.VECLEN = SA_VECLEN;
73 sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
75 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (19 * SE_PARAM_SIZE)) = se1Params;
76 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (20 * SE_PARAM_SIZE)) = se0Params;
77 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (21 * SE_PARAM_SIZE)) = sa0Params;
97 template <
typename dataType>
102 __SE_TEMPLATE_v1 se0Params, se1Params;
104 se0Params = se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (11 * SE_PARAM_SIZE));
106 typedef typename c7x::make_full_vector<dataType>::type vec;
107 int32_t eleCount = c7x::element_count_of<vec>::value;
109 int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
111 se0Params.ICNT0 = Ncols;
112 se1Params.ICNT0 = Ncols;
114 __SE0_OPEN(diag, se0Params);
115 __SE1_OPEN(superdiag, se1Params);
118 max1 = max2 = (vec) 0;
119 int32_t horizontal = 0;
120 for (horizontal = 0; horizontal < nVec - 1; horizontal += 2) {
121 vec v1 = c7x::strm_eng<0, vec>::get_adv();
122 vec v2 = c7x::strm_eng<1, vec>::get_adv();
124 vec v3 = c7x::strm_eng<0, vec>::get_adv();
125 vec v4 = c7x::strm_eng<1, vec>::get_adv();
127 vec add1 = __abs(v1) + __abs(v2);
128 vec add2 = __abs(v3) + __abs(v4);
130 max1 = __max(max1, add1);
131 max2 = __max(max2, add2);
134 if (horizontal != nVec) {
135 vec v1 = c7x::strm_eng<0, vec>::get_adv();
136 vec v2 = c7x::strm_eng<1, vec>::get_adv();
138 vec add1 = __abs(v1) + __abs(v2);
140 max1 = __max(max1, add1);
143 max1 = __max(max1, max2);
145 dataType maxVal = c7x_horizontal_max_fp<dataType, vec>(max1);
147 *epsilon = constEpsilon<dataType>() * maxVal;
169 template <
typename dataType>
174 int32_t *rotation_test,
180 __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (19 * SE_PARAM_SIZE));
182 typedef typename c7x::make_full_vector<dataType>::type vec;
183 int32_t eleCount = c7x::element_count_of<vec>::value;
184 int32_t horizontal = Ncols;
185 int32_t nVec = (Ncols - 1) / eleCount;
186 dataType *diagStart = diag - Ncols + 2;
187 dataType *superdiagStart = superdiag - Ncols + 1;
188 bool breakLoop =
false;
191 se0Params.ICNT1 = nVec;
193 dataType *pSE0 = diag - (eleCount - 1);
194 dataType *pSE1 = superdiag - (eleCount - 1);
196 __SE0_OPEN(pSE0, se0Params);
197 __SE1_OPEN(pSE1, se0Params);
199 vec vecEpsilon = (vec) epsilon;
200 for (horizontal = Ncols; horizontal > eleCount; horizontal -= eleCount) {
202 vec v1SD = c7x::strm_eng<1, vec>::get_adv();
203 vec v2SD = __abs(v1SD);
204 __vpred predSD = __cmp_le_pred(v2SD, vecEpsilon);
206 uint64_t predStoreSD = movePredicate<dataType>(predSD);
207 uint64_t leftMostBitSD = __leftmost_bit_detect_one(predStoreSD);
208 int32_t minIndexSD = 63 - leftMostBitSD;
211 vec v1D = c7x::strm_eng<0, vec>::get_adv();
212 vec v2D = __abs(v1D);
213 __vpred predD = __cmp_le_pred(v2D, vecEpsilon);
215 uint64_t predStoreD = movePredicate<dataType>(predD);
216 uint64_t leftMostBitD = __leftmost_bit_detect_one(predStoreD);
217 int32_t minIndexD = 63 - leftMostBitD;
219 if (minIndexD >= 0 || minIndexSD >= 0) {
222 #if !defined(ENABLE_LDRA_COVERAGE)
226 if (minIndexSD >= minIndexD) {
229 *m = horizontal - eleCount + minIndexSD;
232 *m = horizontal - eleCount + minIndexD;
237 *m = horizontal - eleCount + minIndexSD;
251 for (i = horizontal - 1; i > 0; i--) {
252 if (fabs(superdiagStart[i]) <= epsilon) {
256 if (fabs(diagStart[i - 1]) <= epsilon) {
272 int32_t *rotation_test,
279 int32_t *rotation_test,
286 template <
typename dataType>
291 __SE_TEMPLATE_v1 se0Params, se1Params;
292 __SA_TEMPLATE_v1 sa0Params, sa1Params;
294 se0Params = se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (11 * SE_PARAM_SIZE));
295 sa0Params = sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
297 typedef typename c7x::make_full_vector<dataType>::type vec;
298 int32_t eleCount = c7x::element_count_of<vec>::value;
300 int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
301 int32_t totalIter DSPLIB_ceilingDiv(nVec, 2);
302 int32_t se0ICNT0 = (nVec / 2) * eleCount;
303 int32_t se1ICNT0 = Ncols - se0ICNT0;
305 se0Params.ICNT0 = sa0Params.ICNT0 = se0ICNT0;
306 se1Params.ICNT0 = sa1Params.ICNT0 = se1ICNT0;
309 dataType *pSE1 = V + se0ICNT0;
310 __SE1_OPEN(pSE1, se1Params);
311 __SA1_OPEN(sa1Params);
314 __SE0_OPEN(pSE0, se0Params);
315 __SA0_OPEN(sa0Params);
317 for (int32_t horizontal = 0; horizontal < totalIter; horizontal++) {
318 vec v1 = c7x::strm_eng<0, vec>::get_adv();
319 vec v2 = c7x::strm_eng<1, vec>::get_adv();
321 __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
322 vec *pV1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
323 __vstore_pred(pred1, pV1, -v1);
325 __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
326 vec *pV2 = c7x::strm_agen<1, vec>::get_adv(pSE1);
327 __vstore_pred(pred2, pV2, -v2);
338 template <
typename dataType>
343 __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (11 * SE_PARAM_SIZE));
344 __SA_TEMPLATE_v1 sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
346 typedef typename c7x::make_full_vector<dataType>::type vec;
347 int32_t eleCount = c7x::element_count_of<vec>::value;
348 int32_t nVec = DSPLIB_ceilingDiv(length, eleCount);
350 se0Params.ICNT0 = sa0Params.ICNT0 = length;
352 __SE0_OPEN(superdiag, se0Params);
353 __SE1_OPEN(diag, se0Params);
354 __SA0_OPEN(sa0Params);
355 __SA1_OPEN(sa0Params);
358 vec half = (vec) 0.5;
359 vec OneP5 = (vec) 1.5;
361 vec maxValue = (vec) std::numeric_limits<dataType>::max();
363 for (int32_t i = 0; i < nVec; i++) {
364 vec vSD = c7x::strm_eng<0, vec>::get_adv();
365 vec vD = c7x::strm_eng<1, vec>::get_adv();
367 vec p0SD = __recip_sqrt(vSD);
368 vec d0SD = p0SD * vSD;
369 vec p1SD = OneP5 - d0SD * p0SD * half;
370 vec ySD = p0SD * p1SD;
373 p1SD = OneP5 - d0SD * ySD * half;
374 ySD = vSD * ySD * p1SD;
376 vec p0D = __recip_sqrt(vD);
378 vec p1D = OneP5 - d0D * p0D * half;
382 p1D = OneP5 - d0D * yD * half;
385 __vpred cmp_lezeroSD = __cmp_le_pred(vSD, zero);
386 ySD = __select(cmp_lezeroSD, zero, ySD);
387 __vpred cmp_gtmaxSD = __cmp_le_pred(maxValue, vSD);
388 vec outSD = __select(cmp_gtmaxSD, maxValue, ySD);
390 __vpred cmp_lezeroD = __cmp_le_pred(vD, zero);
391 yD = __select(cmp_lezeroD, zero, yD);
392 __vpred cmp_gtmaxD = __cmp_le_pred(maxValue, vD);
393 vec outD = __select(cmp_gtmaxD, maxValue, yD);
395 __vpred predSD = c7x::strm_agen<0, vec>::get_vpred();
396 vec *pSD = c7x::strm_agen<0, vec>::get_adv(superdiag);
397 __vstore_pred(predSD, pSD, outSD);
399 __vpred predD = c7x::strm_agen<1, vec>::get_vpred();
400 vec *pD = c7x::strm_agen<1, vec>::get_adv(diag);
401 __vstore_pred(predD, pD, outD);
412 template <
typename dataType>
423 typedef typename c7x::make_full_vector<dataType>::type vec;
424 int32_t eleCount = c7x::element_count_of<vec>::value;
426 dataType *vStart = V + startRow * rowVStride;
428 __SE_TEMPLATE_v1 se0Params, se1Params;
429 __SA_TEMPLATE_v1 sa0Params, sa1Params, sa2Params, sa3Params;
430 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (20 * SE_PARAM_SIZE));
431 se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (20 * SE_PARAM_SIZE));
432 sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (21 * SE_PARAM_SIZE));
433 sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (21 * SE_PARAM_SIZE));
434 sa2Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
435 sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
437 int32_t rowPair = Nrows - 1;
438 int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
439 int32_t se1ICNT2 = nVec / 2;
440 int32_t se0ICNT2 = nVec - se1ICNT2;
442 se0Params.ICNT1 = sa0Params.ICNT1 = Nrows;
443 se0Params.ICNT2 = sa0Params.ICNT2 = se0ICNT2;
444 se0Params.DECDIM1_WIDTH = sa0Params.DECDIM1_WIDTH = Ncols;
446 se1Params.ICNT1 = sa1Params.ICNT1 = Nrows;
447 se1Params.ICNT2 = sa1Params.ICNT2 = se1ICNT2;
448 se1Params.DECDIM1_WIDTH = sa1Params.DECDIM1_WIDTH = Ncols - eleCount;
450 se0Params.DIM1 = sa0Params.DIM1 = se1Params.DIM1 = sa1Params.DIM1 = rowVStride;
452 sa2Params.ICNT0 = sa3Params.ICNT0 = rowPair;
453 sa2Params.ICNT1 = sa3Params.ICNT1 = se0ICNT2;
455 __SE0_OPEN(vStart, se0Params);
456 __SA0_OPEN(sa0Params);
457 __SA2_OPEN(sa2Params);
458 __SA3_OPEN(sa3Params);
461 __SE1_OPEN(vStart + eleCount, se1Params);
462 __SA1_OPEN(sa1Params);
463 __vpred pred; vec *pStore;
464 for (int32_t horizontal = 0; horizontal < se1ICNT2; horizontal++) {
465 vec v1_0 = c7x::strm_eng<0, vec>::get_adv();
466 vec v1_1 = c7x::strm_eng<1, vec>::get_adv();
468 int32_t vertical = 0;
471 dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
472 vec cv1 = __vload_dup(pcv1);
473 dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
474 vec sv1 = __vload_dup(psv1);
475 dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
476 vec cv2 = __vload_dup(pcv2);
477 dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
478 vec sv2 = __vload_dup(psv2);
479 dataType *pcv3 = c7x::strm_agen<2, dataType>::get_adv(cV);
480 vec cv3 = __vload_dup(pcv3);
481 dataType *psv3 = c7x::strm_agen<3, dataType>::get_adv(sV);
482 vec sv3 = __vload_dup(psv3);
483 dataType *pcv4 = c7x::strm_agen<2, dataType>::get_adv(cV);
484 vec cv4 = __vload_dup(pcv4);
485 dataType *psv4 = c7x::strm_agen<3, dataType>::get_adv(sV);
486 vec sv4 = __vload_dup(psv4);
487 vec v2_0, v2_1, v3_0, v3_1, v4_0, v4_1, v5_0, v5_1;
488 vec vs1_0, temps2_0, vs1_1, temps2_1;
489 vec vs2_0, temps3_0, vs2_1, temps3_1;
490 vec vs3_0, temps4_0, vs3_1, temps4_1;
492 for (vertical = 0; vertical < rowPair - 7; vertical += 4) {
493 v2_0 = c7x::strm_eng<0, vec>::get_adv();
494 v2_1 = c7x::strm_eng<1, vec>::get_adv();
496 v3_0 = c7x::strm_eng<0, vec>::get_adv();
497 v3_1 = c7x::strm_eng<1, vec>::get_adv();
499 v4_0 = c7x::strm_eng<0, vec>::get_adv();
500 v4_1 = c7x::strm_eng<1, vec>::get_adv();
502 v5_0 = c7x::strm_eng<0, vec>::get_adv();
503 v5_1 = c7x::strm_eng<1, vec>::get_adv();
505 vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
506 temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
507 vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
508 temps2_1 = (v2_1 * cv1) - (v1_1 * sv1);
510 vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
511 temps3_0 = (v3_0 * cv2) - (temps2_0 * sv2);
512 vs2_1 = (v3_1 * sv2) + (temps2_1 * cv2);
513 temps3_1 = (v3_1 * cv2) - (temps2_1 * sv2);
515 vs3_0 = (v4_0 * sv3) + (temps3_0 * cv3);
516 temps4_0 = (v4_0 * cv3) - (temps3_0 * sv3);
517 vs3_1 = (v4_1 * sv3) + (temps3_1 * cv3);
518 temps4_1 = (v4_1 * cv3) - (temps3_1 * sv3);
520 vs4_0 = (v5_0 * sv4) + (temps4_0 * cv4);
521 v1_0 = (v5_0 * cv4) - (temps4_0 * sv4);
522 vs4_1 = (v5_1 * sv4) + (temps4_1 * cv4);
523 v1_1 = (v5_1 * cv4) - (temps4_1 * sv4);
525 pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
526 cv1 = __vload_dup(pcv1);
527 psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
528 sv1 = __vload_dup(psv1);
529 pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
530 cv2 = __vload_dup(pcv2);
531 psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
532 sv2 = __vload_dup(psv2);
533 pcv3 = c7x::strm_agen<2, dataType>::get_adv(cV);
534 cv3 = __vload_dup(pcv3);
535 psv3 = c7x::strm_agen<3, dataType>::get_adv(sV);
536 sv3 = __vload_dup(psv3);
537 pcv4 = c7x::strm_agen<2, dataType>::get_adv(cV);
538 cv4 = __vload_dup(pcv4);
539 psv4 = c7x::strm_agen<3, dataType>::get_adv(sV);
540 sv4 = __vload_dup(psv4);
542 pred = c7x::strm_agen<0, vec>::get_vpred();
543 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
544 __vstore_pred(pred, pStore, vs1_0);
546 pred = c7x::strm_agen<1, vec>::get_vpred();
547 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
548 __vstore_pred(pred, pStore, vs1_1);
550 pred = c7x::strm_agen<0, vec>::get_vpred();
551 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
552 __vstore_pred(pred, pStore, vs2_0);
554 pred = c7x::strm_agen<1, vec>::get_vpred();
555 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
556 __vstore_pred(pred, pStore, vs2_1);
558 pred = c7x::strm_agen<0, vec>::get_vpred();
559 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
560 __vstore_pred(pred, pStore, vs3_0);
562 pred = c7x::strm_agen<1, vec>::get_vpred();
563 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
564 __vstore_pred(pred, pStore, vs3_1);
566 pred = c7x::strm_agen<0, vec>::get_vpred();
567 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
568 __vstore_pred(pred, pStore, vs4_0);
570 pred = c7x::strm_agen<1, vec>::get_vpred();
571 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
572 __vstore_pred(pred, pStore, vs4_1);
574 v2_0 = c7x::strm_eng<0, vec>::get_adv();
575 v2_1 = c7x::strm_eng<1, vec>::get_adv();
577 v3_0 = c7x::strm_eng<0, vec>::get_adv();
578 v3_1 = c7x::strm_eng<1, vec>::get_adv();
580 v4_0 = c7x::strm_eng<0, vec>::get_adv();
581 v4_1 = c7x::strm_eng<1, vec>::get_adv();
583 v5_0 = c7x::strm_eng<0, vec>::get_adv();
584 v5_1 = c7x::strm_eng<1, vec>::get_adv();
586 vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
587 temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
588 vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
589 temps2_1 = (v2_1 * cv1) - (v1_1 * sv1);
591 vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
592 temps3_0 = (v3_0 * cv2) - (temps2_0 * sv2);
593 vs2_1 = (v3_1 * sv2) + (temps2_1 * cv2);
594 temps3_1 = (v3_1 * cv2) - (temps2_1 * sv2);
596 vs3_0 = (v4_0 * sv3) + (temps3_0 * cv3);
597 temps4_0 = (v4_0 * cv3) - (temps3_0 * sv3);
598 vs3_1 = (v4_1 * sv3) + (temps3_1 * cv3);
599 temps4_1 = (v4_1 * cv3) - (temps3_1 * sv3);
601 vs4_0 = (v5_0 * sv4) + (temps4_0 * cv4);
602 v1_0 = (v5_0 * cv4) - (temps4_0 * sv4);
603 vs4_1 = (v5_1 * sv4) + (temps4_1 * cv4);
604 v1_1 = (v5_1 * cv4) - (temps4_1 * sv4);
606 pred = c7x::strm_agen<0, vec>::get_vpred();
607 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
608 __vstore_pred(pred, pStore, vs1_0);
610 pred = c7x::strm_agen<1, vec>::get_vpred();
611 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
612 __vstore_pred(pred, pStore, vs1_1);
614 pred = c7x::strm_agen<0, vec>::get_vpred();
615 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
616 __vstore_pred(pred, pStore, vs2_0);
618 pred = c7x::strm_agen<1, vec>::get_vpred();
619 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
620 __vstore_pred(pred, pStore, vs2_1);
622 pred = c7x::strm_agen<0, vec>::get_vpred();
623 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
624 __vstore_pred(pred, pStore, vs3_0);
626 pred = c7x::strm_agen<1, vec>::get_vpred();
627 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
628 __vstore_pred(pred, pStore, vs3_1);
630 pred = c7x::strm_agen<0, vec>::get_vpred();
631 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
632 __vstore_pred(pred, pStore, vs4_0);
634 pred = c7x::strm_agen<1, vec>::get_vpred();
635 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
636 __vstore_pred(pred, pStore, vs4_1);
641 for (; vertical < rowPair - 1; vertical += 2) {
642 dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
643 vec cv1 = __vload_dup(pcv1);
644 dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
645 vec sv1 = __vload_dup(psv1);
646 dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
647 vec cv2 = __vload_dup(pcv2);
648 dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
649 vec sv2 = __vload_dup(psv2);
651 vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
652 vec v2_1 = c7x::strm_eng<1, vec>::get_adv();
654 vec v3_0 = c7x::strm_eng<0, vec>::get_adv();
655 vec v3_1 = c7x::strm_eng<1, vec>::get_adv();
657 vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
658 vec temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
659 vec vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
660 vec temps2_1 = (v2_1 * cv1) - (v1_1 * sv1);
662 vec vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
663 v1_0 = (v3_0 * cv2) - (temps2_0 * sv2);
664 vec vs2_1 = (v3_1 * sv2) + (temps2_1 * cv2);
665 v1_1 = (v3_1 * cv2) - (temps2_1 * sv2);
667 pred = c7x::strm_agen<0, vec>::get_vpred();
668 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
669 __vstore_pred(pred, pStore, vs1_0);
671 pred = c7x::strm_agen<1, vec>::get_vpred();
672 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
673 __vstore_pred(pred, pStore, vs1_1);
675 pred = c7x::strm_agen<0, vec>::get_vpred();
676 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
677 __vstore_pred(pred, pStore, vs2_0);
679 pred = c7x::strm_agen<1, vec>::get_vpred();
680 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
681 __vstore_pred(pred, pStore, vs2_1);
684 if (vertical != rowPair) {
685 dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
686 vec cv1 = __vload_dup(pcv1);
687 dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
688 vec sv1 = __vload_dup(psv1);
690 vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
691 vec v2_1 = c7x::strm_eng<1, vec>::get_adv();
693 vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
694 v1_0 = (v2_0 * cv1) - (v1_0 * sv1);
695 vec vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
696 v1_1 = (v2_1 * cv1) - (v1_1 * sv1);
698 pred = c7x::strm_agen<0, vec>::get_vpred();
699 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
700 __vstore_pred(pred, pStore, vs1_0);
702 pred = c7x::strm_agen<1, vec>::get_vpred();
703 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
704 __vstore_pred(pred, pStore, vs1_1);
707 pred = c7x::strm_agen<0, vec>::get_vpred();
708 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
709 __vstore_pred(pred, pStore, v1_0);
711 pred = c7x::strm_agen<1, vec>::get_vpred();
712 pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
713 __vstore_pred(pred, pStore, v1_1);
717 if (se0ICNT2 != se1ICNT2) {
718 vec v1_0 = c7x::strm_eng<0, vec>::get_adv();
719 __vpred pred; vec *pStore;
720 int32_t vertical = 0;
722 for (vertical = 0; vertical < rowPair - 3; vertical += 4) {
723 dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
724 vec cv1 = __vload_dup(pcv1);
725 dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
726 vec sv1 = __vload_dup(psv1);
727 dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
728 vec cv2 = __vload_dup(pcv2);
729 dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
730 vec sv2 = __vload_dup(psv2);
731 dataType *pcv3 = c7x::strm_agen<2, dataType>::get_adv(cV);
732 vec cv3 = __vload_dup(pcv3);
733 dataType *psv3 = c7x::strm_agen<3, dataType>::get_adv(sV);
734 vec sv3 = __vload_dup(psv3);
735 dataType *pcv4 = c7x::strm_agen<2, dataType>::get_adv(cV);
736 vec cv4 = __vload_dup(pcv4);
737 dataType *psv4 = c7x::strm_agen<3, dataType>::get_adv(sV);
738 vec sv4 = __vload_dup(psv4);
740 vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
741 vec v3_0 = c7x::strm_eng<0, vec>::get_adv();
742 vec v4_0 = c7x::strm_eng<0, vec>::get_adv();
743 vec v5_0 = c7x::strm_eng<0, vec>::get_adv();
745 vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
746 vec temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
748 vec vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
749 vec temps3_0 = (v3_0 * cv2) - (temps2_0 * sv2);
751 vec vs3_0 = (v4_0 * sv3) + (temps3_0 * cv3);
752 vec temps4_0 = (v4_0 * cv3) - (temps3_0 * sv3);
754 vec vs4_0 = (v5_0 * sv4) + (temps4_0 * cv4);
755 v1_0 = (v5_0 * cv4) - (temps4_0 * sv4);
757 pred = c7x::strm_agen<0, vec>::get_vpred();
758 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
759 __vstore_pred(pred, pStore, vs1_0);
761 pred = c7x::strm_agen<0, vec>::get_vpred();
762 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
763 __vstore_pred(pred, pStore, vs2_0);
765 pred = c7x::strm_agen<0, vec>::get_vpred();
766 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
767 __vstore_pred(pred, pStore, vs3_0);
769 pred = c7x::strm_agen<0, vec>::get_vpred();
770 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
771 __vstore_pred(pred, pStore, vs4_0);
774 for (; vertical < rowPair - 1; vertical += 2) {
775 dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
776 vec cv1 = __vload_dup(pcv1);
777 dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
778 vec sv1 = __vload_dup(psv1);
779 dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
780 vec cv2 = __vload_dup(pcv2);
781 dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
782 vec sv2 = __vload_dup(psv2);
784 vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
785 vec v3_0 = c7x::strm_eng<0, vec>::get_adv();
787 vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
788 vec temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
790 vec vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
791 v1_0 = (v3_0 * cv2) - (temps2_0 * sv2);
793 pred = c7x::strm_agen<0, vec>::get_vpred();
794 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
795 __vstore_pred(pred, pStore, vs1_0);
797 pred = c7x::strm_agen<0, vec>::get_vpred();
798 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
799 __vstore_pred(pred, pStore, vs2_0);
802 if (vertical != rowPair) {
803 dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
804 vec cv1 = __vload_dup(pcv1);
805 dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
806 vec sv1 = __vload_dup(psv1);
808 vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
810 vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
811 v1_0 = (v2_0 * cv1) - (v1_0 * sv1);
813 pred = c7x::strm_agen<0, vec>::get_vpred();
814 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
815 __vstore_pred(pred, pStore, vs1_0);
818 pred = c7x::strm_agen<0, vec>::get_vpred();
819 pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
820 __vstore_pred(pred, pStore, v1_0);
844 template <
typename dataType>
856 typedef typename c7x::make_full_vector<dataType>::type vec;
857 int32_t eleCount = c7x::element_count_of<vec>::value;
859 __SE_TEMPLATE_v1 se0Params;
860 __SA_TEMPLATE_v1 sa0Params;
861 __SE_TEMPLATE_v1 se1Params;
862 __SA_TEMPLATE_v1 sa1Params;
863 __SA_TEMPLATE_v1 sa2Params;
864 __SA_TEMPLATE_v1 sa3Params;
866 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
867 sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
869 se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
870 sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
872 sa2Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
873 sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
876 int32_t nTile = DSPLIB_ceilingDiv(Ncols, (lenTile * eleCount));
878 se0Params.ICNT0 = Ncols;
881 sa0Params.ICNT0 = Ncols;
884 se1Params.ICNT1 = Nrows;
885 se1Params.DIM1 = rowUStride;
886 se1Params.ICNT2 = nTile;
888 se1Params.DECDIM1_WIDTH = Ncols;
890 sa1Params.ICNT1 = Nrows;
891 sa1Params.DIM1 = rowUStride;
892 sa1Params.ICNT2 = nTile;
893 sa1Params.DECDIM1_WIDTH = Ncols;
895 sa2Params.ICNT0 = Nrows;
896 sa2Params.ICNT1 = nTile;
898 sa3Params.ICNT0 = Nrows;
899 sa3Params.ICNT1 = nTile;
902 dataType *pSE1 = U + rowUStride;
903 __SE0_OPEN(pSE0, se0Params);
904 __SE1_OPEN(pSE1, se1Params);
905 __SA0_OPEN(sa0Params);
906 __SA1_OPEN(sa1Params);
907 __SA2_OPEN(sa2Params);
908 __SA3_OPEN(sa3Params);
910 for (int32_t tile = 0; tile < nTile; tile++) {
911 vec r1 = c7x::strm_eng<0, vec>::get_adv();
912 vec r2 = c7x::strm_eng<0, vec>::get_adv();
913 vec r3 = c7x::strm_eng<0, vec>::get_adv();
914 vec r4 = c7x::strm_eng<0, vec>::get_adv();
915 vec r5 = c7x::strm_eng<0, vec>::get_adv();
916 vec r6 = c7x::strm_eng<0, vec>::get_adv();
917 vec r7 = c7x::strm_eng<0, vec>::get_adv();
918 vec r8 = c7x::strm_eng<0, vec>::get_adv();
920 dataType *pcU = c7x::strm_agen<2, dataType>::get_adv(cU);
921 vec vcU = __vload_dup(pcU);
922 dataType *psU = c7x::strm_agen<3, dataType>::get_adv(sU);
923 vec vsU = __vload_dup(psU);
924 for (int32_t vertical = 0; vertical < Nrows; vertical++) {
925 vec v1 = c7x::strm_eng<1, vec>::get_adv();
926 vec v2 = c7x::strm_eng<1, vec>::get_adv();
927 vec v3 = c7x::strm_eng<1, vec>::get_adv();
928 vec v4 = c7x::strm_eng<1, vec>::get_adv();
929 vec v5 = c7x::strm_eng<1, vec>::get_adv();
930 vec v6 = c7x::strm_eng<1, vec>::get_adv();
931 vec v7 = c7x::strm_eng<1, vec>::get_adv();
932 vec v8 = c7x::strm_eng<1, vec>::get_adv();
934 vec vs1 = v1 * vcU - r1 * vsU;
935 r1 = v1 * vsU + r1 * vcU;
937 vec vs2 = v2 * vcU - r2 * vsU;
938 r2 = v2 * vsU + r2 * vcU;
940 vec vs3 = v3 * vcU - r3 * vsU;
941 r3 = v3 * vsU + r3 * vcU;
943 vec vs4 = v4 * vcU - r4 * vsU;
944 r4 = v4 * vsU + r4 * vcU;
946 vec vs5 = v5 * vcU - r5 * vsU;
947 r5 = v5 * vsU + r5 * vcU;
949 vec vs6 = v6 * vcU - r6 * vsU;
950 r6 = v6 * vsU + r6 * vcU;
952 vec vs7 = v7 * vcU - r7 * vsU;
953 r7 = v7 * vsU + r7 * vcU;
955 vec vs8 = v8 * vcU - r8 * vsU;
956 r8 = v8 * vsU + r8 * vcU;
958 pcU = c7x::strm_agen<2, dataType>::get_adv(cU);
959 vcU = __vload_dup(pcU);
960 psU = c7x::strm_agen<3, dataType>::get_adv(sU);
961 vsU = __vload_dup(psU);
963 __vpred pred = c7x::strm_agen<1, vec>::get_vpred();
964 vec *pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
965 __vstore_pred(pred, pStore, vs1);
967 pred = c7x::strm_agen<1, vec>::get_vpred();
968 pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
969 __vstore_pred(pred, pStore, vs2);
971 pred = c7x::strm_agen<1, vec>::get_vpred();
972 pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
973 __vstore_pred(pred, pStore, vs3);
975 pred = c7x::strm_agen<1, vec>::get_vpred();
976 pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
977 __vstore_pred(pred, pStore, vs4);
979 pred = c7x::strm_agen<1, vec>::get_vpred();
980 pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
981 __vstore_pred(pred, pStore, vs5);
983 pred = c7x::strm_agen<1, vec>::get_vpred();
984 pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
985 __vstore_pred(pred, pStore, vs6);
987 pred = c7x::strm_agen<1, vec>::get_vpred();
988 pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
989 __vstore_pred(pred, pStore, vs7);
991 pred = c7x::strm_agen<1, vec>::get_vpred();
992 pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
993 __vstore_pred(pred, pStore, vs8);
995 __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
996 vec *pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
997 __vstore_pred(pred1, pStore1, r1);
999 pred1 = c7x::strm_agen<0, vec>::get_vpred();
1000 pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1001 __vstore_pred(pred1, pStore1, r2);
1003 pred1 = c7x::strm_agen<0, vec>::get_vpred();
1004 pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1005 __vstore_pred(pred1, pStore1, r3);
1007 pred1 = c7x::strm_agen<0, vec>::get_vpred();
1008 pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1009 __vstore_pred(pred1, pStore1, r4);
1011 pred1 = c7x::strm_agen<0, vec>::get_vpred();
1012 pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1013 __vstore_pred(pred1, pStore1, r5);
1015 pred1 = c7x::strm_agen<0, vec>::get_vpred();
1016 pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1017 __vstore_pred(pred1, pStore1, r6);
1019 pred1 = c7x::strm_agen<0, vec>::get_vpred();
1020 pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1021 __vstore_pred(pred1, pStore1, r7);
1023 pred1 = c7x::strm_agen<0, vec>::get_vpred();
1024 pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1025 __vstore_pred(pred1, pStore1, r8);
void DSPLIB_diag_negate_v_ci(dataType *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
Negates the values of a row in V.
void DSPLIB_diag_proc_init_ci(DSPLIB_kernelHandle handle)
void DSPLIB_diag_rotation_check_ci(dataType *diag, dataType *superdiag, dataType epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
Updates values of "m" and "rotation_test" flag vased on the values present in "diag",...
double constEpsilon< double >()
uint64_t movePredicate< double >(__vpred pred)
template void DSPLIB_diag_rotation_proc_ci< float >(float *U, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowUStride, float *cU, float *sU, uint8_t *pBlock)
template void DSPLIB_diag_rotation_check_ci< double >(double *diag, double *superdiag, double epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
template void DSPLIB_diag_sqrt_ci< double >(double *superdiag, double *diag, int32_t length, uint8_t *pBlock)
uint64_t movePredicate(__vpred pred)
Moves predicate register to a 64-bit register.
template void DSPLIB_diag_epsilon_ci< float >(float *diag, float *superdiag, float *epsilon, int32_t Ncols, uint8_t *pBlock)
float constEpsilon< float >()
template void DSPLIB_diag_epsilon_ci< double >(double *diag, double *superdiag, double *epsilon, int32_t Ncols, uint8_t *pBlock)
void DSPLIB_diag_epsilon_ci(dataType *diag, dataType *superdiag, dataType *epsilon, int32_t Ncols, uint8_t *pBlock)
Updates "epsilon" value based on absolute max values from "diag" and "superdiag" vectors.
template void DSPLIB_diag_rotation_proc_ci< double >(double *U, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowUStride, double *cU, double *sU, uint8_t *pBlock)
void DSPLIB_diag_proc_ci(dataType *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, dataType *cV, dataType *sV, uint8_t *pBlock)
Updates rows of V' and U' based on the precalculated cV/cU and sV/sU vectors.
template void DSPLIB_diag_sqrt_ci< float >(float *superdiag, float *diag, int32_t length, uint8_t *pBlock)
template void DSPLIB_diag_negate_v_ci< float >(float *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
uint64_t movePredicate< float >(__vpred pred)
template void DSPLIB_diag_negate_v_ci< double >(double *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
template void DSPLIB_diag_proc_init_ci< double >(DSPLIB_kernelHandle handle)
void DSPLIB_diag_sqrt_ci(dataType *superdiag, dataType *diag, int32_t length, uint8_t *pBlock)
Calculates the reciprocal of square-roots of "diag" and "superdiag" vectors.
template void DSPLIB_diag_rotation_check_ci< float >(float *diag, float *superdiag, float epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
template void DSPLIB_diag_proc_init_ci< float >(DSPLIB_kernelHandle handle)
void DSPLIB_diag_rotation_proc_ci(dataType *U, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowUStride, dataType *cU, dataType *sU, uint8_t *pBlock)
Updates rows of U' based on the precalculated cU and sU vectors.
template void DSPLIB_diag_proc_ci< double >(double *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, double *cV, double *sV, uint8_t *pBlock)
template void DSPLIB_diag_proc_ci< float >(float *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, float *cV, float *sV, uint8_t *pBlock)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_SVD_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters