DSPLIB User Guide
DSPLIB_svd_diag_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * *
3  * module name :DSPLIB *
4  * *
5  * module descripton :Digital Signal Processing Library module for C7x+MMA *
6  * *
7  * Copyright (C) 2017-2018 Texas Instruments Incorporated - https://www.ti.com/ *
8  * ALL RIGHTS RESERVED *
9  * *
10  ******************************************************************************/
11 
23 /* *****************************************************************************
24  *
25  * INCLUDES
26  *
27  ***************************************************************************** */
28 #include "DSPLIB_svd_priv.h"
29 
30 /* *****************************************************************************
31  *
32  * INITIALIZATION
33  *
34  ***************************************************************************** */
35 
36 template <typename dataType> void DSPLIB_diag_proc_init_ci(DSPLIB_kernelHandle handle)
37 {
38  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
39  DSPLIB_svd_PrivArgs *pKerPrivArgs = (DSPLIB_svd_PrivArgs *) handle;
40  uint8_t *pBlock = pKerPrivArgs->bufPblock;
41  int32_t strideVRow = pKerPrivArgs->strideVRows;
42  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
43  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
44  __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
45 
46  typedef typename c7x::make_full_vector<dataType>::type vec;
47  int32_t eleCount = c7x::element_count_of<vec>::value;
48  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
49  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
50  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
51 
52  int32_t rowVStride = strideVRow / sizeof(dataType);
53 
54  se0Params.ICNT0 = eleCount;
55  se0Params.DIM1 = rowVStride;
56  se0Params.DIM2 = eleCount * 2;
57  se0Params.DIMFMT = __SE_DIMFMT_3D;
58  se0Params.ELETYPE = SE_ELETYPE;
59  se0Params.VECLEN = SE_VECLEN;
60  se0Params.DECDIM1 = __SE_DECDIM_DIM2;
61 
62  se1Params.ICNT0 = eleCount;
63  se1Params.DIM1 = -eleCount;
64  se1Params.DIMFMT = __SE_DIMFMT_2D;
65  se1Params.ELETYPE = SE_ELETYPE;
66  se1Params.VECLEN = SE_VECLEN;
67 
68  sa0Params.ICNT0 = eleCount;
69  sa0Params.DIM1 = rowVStride;
70  sa0Params.DIM2 = eleCount * 2;
71  sa0Params.DIMFMT = __SA_DIMFMT_3D;
72  sa0Params.VECLEN = SA_VECLEN;
73  sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
74 
75  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (19 * SE_PARAM_SIZE)) = se1Params;
76  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (20 * SE_PARAM_SIZE)) = se0Params;
77  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (21 * SE_PARAM_SIZE)) = sa0Params;
78 
79  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
80 }
83 
84 /* *****************************************************************************
85  *
86  * IMPLEMENTATION
87  *
88  ***************************************************************************** */
89 
90 template <typename dataType> inline dataType constEpsilon();
91 template <> inline float constEpsilon<float>() { return FLT_EPSILON; }
92 template <> inline double constEpsilon<double>() { return DBL_EPSILON; }
93 
97 template <typename dataType>
98 void DSPLIB_diag_epsilon_ci(dataType *diag, dataType *superdiag, dataType *epsilon, int32_t Ncols, uint8_t *pBlock)
99 {
100  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
101 
102  __SE_TEMPLATE_v1 se0Params, se1Params;
103 
104  se0Params = se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (11 * SE_PARAM_SIZE));
105 
106  typedef typename c7x::make_full_vector<dataType>::type vec;
107  int32_t eleCount = c7x::element_count_of<vec>::value;
108 
109  int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
110 
111  se0Params.ICNT0 = Ncols;
112  se1Params.ICNT0 = Ncols;
113 
114  __SE0_OPEN(diag, se0Params);
115  __SE1_OPEN(superdiag, se1Params);
116 
117  vec max1, max2;
118  max1 = max2 = (vec) 0;
119  int32_t horizontal = 0;
120  for (horizontal = 0; horizontal < nVec - 1; horizontal += 2) {
121  vec v1 = c7x::strm_eng<0, vec>::get_adv();
122  vec v2 = c7x::strm_eng<1, vec>::get_adv();
123 
124  vec v3 = c7x::strm_eng<0, vec>::get_adv();
125  vec v4 = c7x::strm_eng<1, vec>::get_adv();
126 
127  vec add1 = __abs(v1) + __abs(v2);
128  vec add2 = __abs(v3) + __abs(v4);
129 
130  max1 = __max(max1, add1);
131  max2 = __max(max2, add2);
132  }
133 
134  if (horizontal != nVec) {
135  vec v1 = c7x::strm_eng<0, vec>::get_adv();
136  vec v2 = c7x::strm_eng<1, vec>::get_adv();
137 
138  vec add1 = __abs(v1) + __abs(v2);
139 
140  max1 = __max(max1, add1);
141  }
142 
143  max1 = __max(max1, max2);
144 
145  dataType maxVal = c7x_horizontal_max_fp<dataType, vec>(max1);
146 
147  *epsilon = constEpsilon<dataType>() * maxVal;
148 
149  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
150 }
151 template void
152 DSPLIB_diag_epsilon_ci<float>(float *diag, float *superdiag, float *epsilon, int32_t Ncols, uint8_t *pBlock);
153 template void
154 DSPLIB_diag_epsilon_ci<double>(double *diag, double *superdiag, double *epsilon, int32_t Ncols, uint8_t *pBlock);
155 
159 template <typename dataType> inline uint64_t movePredicate(__vpred pred);
160 
161 template <> inline uint64_t movePredicate<float>(__vpred pred) { return _mvpw(pred); }
162 
163 template <> inline uint64_t movePredicate<double>(__vpred pred) { return _mvpd(pred); }
164 
169 template <typename dataType>
170 void DSPLIB_diag_rotation_check_ci(dataType *diag,
171  dataType *superdiag,
172  dataType epsilon,
173  int32_t *m,
174  int32_t *rotation_test,
175  int32_t Ncols,
176  uint8_t *pBlock)
177 {
178  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
179 
180  __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (19 * SE_PARAM_SIZE));
181 
182  typedef typename c7x::make_full_vector<dataType>::type vec;
183  int32_t eleCount = c7x::element_count_of<vec>::value;
184  int32_t horizontal = Ncols;
185  int32_t nVec = (Ncols - 1) / eleCount;
186  dataType *diagStart = diag - Ncols + 2;
187  dataType *superdiagStart = superdiag - Ncols + 1;
188  bool breakLoop = false;
189 
190  if (nVec > 0) {
191  se0Params.ICNT1 = nVec;
192 
193  dataType *pSE0 = diag - (eleCount - 1);
194  dataType *pSE1 = superdiag - (eleCount - 1);
195 
196  __SE0_OPEN(pSE0, se0Params);
197  __SE1_OPEN(pSE1, se0Params);
198 
199  vec vecEpsilon = (vec) epsilon;
200  for (horizontal = Ncols; horizontal > eleCount; horizontal -= eleCount) {
201  /* SUPERDIAG */
202  vec v1SD = c7x::strm_eng<1, vec>::get_adv();
203  vec v2SD = __abs(v1SD);
204  __vpred predSD = __cmp_le_pred(v2SD, vecEpsilon);
205 
206  uint64_t predStoreSD = movePredicate<dataType>(predSD);
207  uint64_t leftMostBitSD = __leftmost_bit_detect_one(predStoreSD);
208  int32_t minIndexSD = 63 - leftMostBitSD;
209 
210  /* DIAG */
211  vec v1D = c7x::strm_eng<0, vec>::get_adv();
212  vec v2D = __abs(v1D);
213  __vpred predD = __cmp_le_pred(v2D, vecEpsilon);
214 
215  uint64_t predStoreD = movePredicate<dataType>(predD);
216  uint64_t leftMostBitD = __leftmost_bit_detect_one(predStoreD);
217  int32_t minIndexD = 63 - leftMostBitD;
218 
219  if (minIndexD >= 0 || minIndexSD >= 0) {
220 
221 
222 #if !defined(ENABLE_LDRA_COVERAGE)
223 /* This part of code checks for test f convergence / cancellation condition
224  Ref. Singular Value Decomposition and Least Squares Solutions. G. H. Golub et al
225  We use these conditions in order to calculte correct results if and when they occur */
226  if (minIndexSD >= minIndexD) {
227  /* update rotation flag */
228  *rotation_test = 0;
229  *m = horizontal - eleCount + minIndexSD;
230  }
231  else {
232  *m = horizontal - eleCount + minIndexD;
233  }
234 #else
235  /* update rotation flag */
236  *rotation_test = 0;
237  *m = horizontal - eleCount + minIndexSD;
238 
239 #endif
240  breakLoop = true;
241  break;
242 
243  }
244  }
245  __SE0_CLOSE();
246  __SE1_CLOSE();
247  }
248 
249  if ((!breakLoop)) {
250  int32_t i = 0;
251  for (i = horizontal - 1; i > 0; i--) {
252  if (fabs(superdiagStart[i]) <= epsilon) {
253  *rotation_test = 0;
254  break;
255  }
256  if (fabs(diagStart[i - 1]) <= epsilon) {
257  break;
258  }
259  } /* for (m=k;m>=0;m--) */
260  if (i == 0) {
261  *rotation_test = 0;
262  }
263  *m = i;
264  }
265 
266  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
267 }
268 template void DSPLIB_diag_rotation_check_ci<float>(float *diag,
269  float *superdiag,
270  float epsilon,
271  int32_t *m,
272  int32_t *rotation_test,
273  int32_t Ncols,
274  uint8_t *pBlock);
275 template void DSPLIB_diag_rotation_check_ci<double>(double *diag,
276  double *superdiag,
277  double epsilon,
278  int32_t *m,
279  int32_t *rotation_test,
280  int32_t Ncols,
281  uint8_t *pBlock);
282 
286 template <typename dataType>
287 void DSPLIB_diag_negate_v_ci(dataType *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
288 {
289  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
290 
291  __SE_TEMPLATE_v1 se0Params, se1Params;
292  __SA_TEMPLATE_v1 sa0Params, sa1Params;
293 
294  se0Params = se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (11 * SE_PARAM_SIZE));
295  sa0Params = sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
296 
297  typedef typename c7x::make_full_vector<dataType>::type vec;
298  int32_t eleCount = c7x::element_count_of<vec>::value;
299 
300  int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
301  int32_t totalIter DSPLIB_ceilingDiv(nVec, 2);
302  int32_t se0ICNT0 = (nVec / 2) * eleCount;
303  int32_t se1ICNT0 = Ncols - se0ICNT0;
304 
305  se0Params.ICNT0 = sa0Params.ICNT0 = se0ICNT0;
306  se1Params.ICNT0 = sa1Params.ICNT0 = se1ICNT0;
307 
308  dataType *pSE0 = V;
309  dataType *pSE1 = V + se0ICNT0;
310  __SE1_OPEN(pSE1, se1Params);
311  __SA1_OPEN(sa1Params);
312 
313  if (se0ICNT0 > 0) {
314  __SE0_OPEN(pSE0, se0Params);
315  __SA0_OPEN(sa0Params);
316  }
317  for (int32_t horizontal = 0; horizontal < totalIter; horizontal++) {
318  vec v1 = c7x::strm_eng<0, vec>::get_adv();
319  vec v2 = c7x::strm_eng<1, vec>::get_adv();
320 
321  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
322  vec *pV1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
323  __vstore_pred(pred1, pV1, -v1);
324 
325  __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
326  vec *pV2 = c7x::strm_agen<1, vec>::get_adv(pSE1);
327  __vstore_pred(pred2, pV2, -v2);
328  }
329 
330  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
331 }
332 template void DSPLIB_diag_negate_v_ci<float>(float *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock);
333 template void DSPLIB_diag_negate_v_ci<double>(double *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock);
334 
338 template <typename dataType>
339 void DSPLIB_diag_sqrt_ci(dataType *superdiag, dataType *diag, int32_t length, uint8_t *pBlock)
340 {
341  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
342 
343  __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (11 * SE_PARAM_SIZE));
344  __SA_TEMPLATE_v1 sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
345 
346  typedef typename c7x::make_full_vector<dataType>::type vec;
347  int32_t eleCount = c7x::element_count_of<vec>::value;
348  int32_t nVec = DSPLIB_ceilingDiv(length, eleCount);
349 
350  se0Params.ICNT0 = sa0Params.ICNT0 = length;
351 
352  __SE0_OPEN(superdiag, se0Params);
353  __SE1_OPEN(diag, se0Params);
354  __SA0_OPEN(sa0Params);
355  __SA1_OPEN(sa0Params);
356 
357  /* Constant values */
358  vec half = (vec) 0.5;
359  vec OneP5 = (vec) 1.5;
360  vec zero = (vec) 0;
361  vec maxValue = (vec) std::numeric_limits<dataType>::max();
362 
363  for (int32_t i = 0; i < nVec; i++) {
364  vec vSD = c7x::strm_eng<0, vec>::get_adv();
365  vec vD = c7x::strm_eng<1, vec>::get_adv();
366 
367  vec p0SD = __recip_sqrt(vSD);
368  vec d0SD = p0SD * vSD;
369  vec p1SD = OneP5 - d0SD * p0SD * half;
370  vec ySD = p0SD * p1SD;
371 
372  d0SD = ySD * vSD;
373  p1SD = OneP5 - d0SD * ySD * half;
374  ySD = vSD * ySD * p1SD;
375 
376  vec p0D = __recip_sqrt(vD);
377  vec d0D = p0D * vD;
378  vec p1D = OneP5 - d0D * p0D * half;
379  vec yD = p0D * p1D;
380 
381  d0D = yD * vD;
382  p1D = OneP5 - d0D * yD * half;
383  yD = vD * yD * p1D;
384 
385  __vpred cmp_lezeroSD = __cmp_le_pred(vSD, zero);
386  ySD = __select(cmp_lezeroSD, zero, ySD);
387  __vpred cmp_gtmaxSD = __cmp_le_pred(maxValue, vSD);
388  vec outSD = __select(cmp_gtmaxSD, maxValue, ySD);
389 
390  __vpred cmp_lezeroD = __cmp_le_pred(vD, zero);
391  yD = __select(cmp_lezeroD, zero, yD);
392  __vpred cmp_gtmaxD = __cmp_le_pred(maxValue, vD);
393  vec outD = __select(cmp_gtmaxD, maxValue, yD);
394 
395  __vpred predSD = c7x::strm_agen<0, vec>::get_vpred();
396  vec *pSD = c7x::strm_agen<0, vec>::get_adv(superdiag);
397  __vstore_pred(predSD, pSD, outSD);
398 
399  __vpred predD = c7x::strm_agen<1, vec>::get_vpred();
400  vec *pD = c7x::strm_agen<1, vec>::get_adv(diag);
401  __vstore_pred(predD, pD, outD);
402  }
403 
404  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
405 }
406 template void DSPLIB_diag_sqrt_ci<float>(float *superdiag, float *diag, int32_t length, uint8_t *pBlock);
407 template void DSPLIB_diag_sqrt_ci<double>(double *superdiag, double *diag, int32_t length, uint8_t *pBlock);
408 
412 template <typename dataType>
413 void DSPLIB_diag_proc_ci(dataType *V,
414  int32_t startRow,
415  int32_t Nrows,
416  int32_t Ncols,
417  int32_t rowVStride,
418  dataType *cV,
419  dataType *sV,
420  uint8_t *pBlock)
421 {
422  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
423  typedef typename c7x::make_full_vector<dataType>::type vec;
424  int32_t eleCount = c7x::element_count_of<vec>::value;
425 
426  dataType *vStart = V + startRow * rowVStride;
427 
428  __SE_TEMPLATE_v1 se0Params, se1Params;
429  __SA_TEMPLATE_v1 sa0Params, sa1Params, sa2Params, sa3Params;
430  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (20 * SE_PARAM_SIZE));
431  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (20 * SE_PARAM_SIZE));
432  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (21 * SE_PARAM_SIZE));
433  sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (21 * SE_PARAM_SIZE));
434  sa2Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
435  sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
436 
437  int32_t rowPair = Nrows - 1;
438  int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
439  int32_t se1ICNT2 = nVec / 2;
440  int32_t se0ICNT2 = nVec - se1ICNT2;
441 
442  se0Params.ICNT1 = sa0Params.ICNT1 = Nrows;
443  se0Params.ICNT2 = sa0Params.ICNT2 = se0ICNT2;
444  se0Params.DECDIM1_WIDTH = sa0Params.DECDIM1_WIDTH = Ncols;
445 
446  se1Params.ICNT1 = sa1Params.ICNT1 = Nrows;
447  se1Params.ICNT2 = sa1Params.ICNT2 = se1ICNT2;
448  se1Params.DECDIM1_WIDTH = sa1Params.DECDIM1_WIDTH = Ncols - eleCount;
449 
450  se0Params.DIM1 = sa0Params.DIM1 = se1Params.DIM1 = sa1Params.DIM1 = rowVStride;
451 
452  sa2Params.ICNT0 = sa3Params.ICNT0 = rowPair;
453  sa2Params.ICNT1 = sa3Params.ICNT1 = se0ICNT2;
454 
455  __SE0_OPEN(vStart, se0Params);
456  __SA0_OPEN(sa0Params);
457  __SA2_OPEN(sa2Params);
458  __SA3_OPEN(sa3Params);
459 
460  if (se1ICNT2 > 0) {
461  __SE1_OPEN(vStart + eleCount, se1Params);
462  __SA1_OPEN(sa1Params);
463  __vpred pred; vec *pStore;
464  for (int32_t horizontal = 0; horizontal < se1ICNT2; horizontal++) {
465  vec v1_0 = c7x::strm_eng<0, vec>::get_adv();
466  vec v1_1 = c7x::strm_eng<1, vec>::get_adv();
467 
468  int32_t vertical = 0;
469 
470  if (rowPair >= 4) {
471  dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
472  vec cv1 = __vload_dup(pcv1);
473  dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
474  vec sv1 = __vload_dup(psv1);
475  dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
476  vec cv2 = __vload_dup(pcv2);
477  dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
478  vec sv2 = __vload_dup(psv2);
479  dataType *pcv3 = c7x::strm_agen<2, dataType>::get_adv(cV);
480  vec cv3 = __vload_dup(pcv3);
481  dataType *psv3 = c7x::strm_agen<3, dataType>::get_adv(sV);
482  vec sv3 = __vload_dup(psv3);
483  dataType *pcv4 = c7x::strm_agen<2, dataType>::get_adv(cV);
484  vec cv4 = __vload_dup(pcv4);
485  dataType *psv4 = c7x::strm_agen<3, dataType>::get_adv(sV);
486  vec sv4 = __vload_dup(psv4);
487  vec v2_0, v2_1, v3_0, v3_1, v4_0, v4_1, v5_0, v5_1;
488  vec vs1_0, temps2_0, vs1_1, temps2_1;
489  vec vs2_0, temps3_0, vs2_1, temps3_1;
490  vec vs3_0, temps4_0, vs3_1, temps4_1;
491  vec vs4_0, vs4_1;
492  for (vertical = 0; vertical < rowPair - 7; vertical += 4) {
493  v2_0 = c7x::strm_eng<0, vec>::get_adv();
494  v2_1 = c7x::strm_eng<1, vec>::get_adv();
495 
496  v3_0 = c7x::strm_eng<0, vec>::get_adv();
497  v3_1 = c7x::strm_eng<1, vec>::get_adv();
498 
499  v4_0 = c7x::strm_eng<0, vec>::get_adv();
500  v4_1 = c7x::strm_eng<1, vec>::get_adv();
501 
502  v5_0 = c7x::strm_eng<0, vec>::get_adv();
503  v5_1 = c7x::strm_eng<1, vec>::get_adv();
504 
505  vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
506  temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
507  vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
508  temps2_1 = (v2_1 * cv1) - (v1_1 * sv1);
509 
510  vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
511  temps3_0 = (v3_0 * cv2) - (temps2_0 * sv2);
512  vs2_1 = (v3_1 * sv2) + (temps2_1 * cv2);
513  temps3_1 = (v3_1 * cv2) - (temps2_1 * sv2);
514 
515  vs3_0 = (v4_0 * sv3) + (temps3_0 * cv3);
516  temps4_0 = (v4_0 * cv3) - (temps3_0 * sv3);
517  vs3_1 = (v4_1 * sv3) + (temps3_1 * cv3);
518  temps4_1 = (v4_1 * cv3) - (temps3_1 * sv3);
519 
520  vs4_0 = (v5_0 * sv4) + (temps4_0 * cv4);
521  v1_0 = (v5_0 * cv4) - (temps4_0 * sv4);
522  vs4_1 = (v5_1 * sv4) + (temps4_1 * cv4);
523  v1_1 = (v5_1 * cv4) - (temps4_1 * sv4);
524 
525  pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
526  cv1 = __vload_dup(pcv1);
527  psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
528  sv1 = __vload_dup(psv1);
529  pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
530  cv2 = __vload_dup(pcv2);
531  psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
532  sv2 = __vload_dup(psv2);
533  pcv3 = c7x::strm_agen<2, dataType>::get_adv(cV);
534  cv3 = __vload_dup(pcv3);
535  psv3 = c7x::strm_agen<3, dataType>::get_adv(sV);
536  sv3 = __vload_dup(psv3);
537  pcv4 = c7x::strm_agen<2, dataType>::get_adv(cV);
538  cv4 = __vload_dup(pcv4);
539  psv4 = c7x::strm_agen<3, dataType>::get_adv(sV);
540  sv4 = __vload_dup(psv4);
541 
542  pred = c7x::strm_agen<0, vec>::get_vpred();
543  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
544  __vstore_pred(pred, pStore, vs1_0);
545 
546  pred = c7x::strm_agen<1, vec>::get_vpred();
547  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
548  __vstore_pred(pred, pStore, vs1_1);
549 
550  pred = c7x::strm_agen<0, vec>::get_vpred();
551  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
552  __vstore_pred(pred, pStore, vs2_0);
553 
554  pred = c7x::strm_agen<1, vec>::get_vpred();
555  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
556  __vstore_pred(pred, pStore, vs2_1);
557 
558  pred = c7x::strm_agen<0, vec>::get_vpred();
559  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
560  __vstore_pred(pred, pStore, vs3_0);
561 
562  pred = c7x::strm_agen<1, vec>::get_vpred();
563  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
564  __vstore_pred(pred, pStore, vs3_1);
565 
566  pred = c7x::strm_agen<0, vec>::get_vpred();
567  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
568  __vstore_pred(pred, pStore, vs4_0);
569 
570  pred = c7x::strm_agen<1, vec>::get_vpred();
571  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
572  __vstore_pred(pred, pStore, vs4_1);
573  }
574  v2_0 = c7x::strm_eng<0, vec>::get_adv();
575  v2_1 = c7x::strm_eng<1, vec>::get_adv();
576 
577  v3_0 = c7x::strm_eng<0, vec>::get_adv();
578  v3_1 = c7x::strm_eng<1, vec>::get_adv();
579 
580  v4_0 = c7x::strm_eng<0, vec>::get_adv();
581  v4_1 = c7x::strm_eng<1, vec>::get_adv();
582 
583  v5_0 = c7x::strm_eng<0, vec>::get_adv();
584  v5_1 = c7x::strm_eng<1, vec>::get_adv();
585 
586  vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
587  temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
588  vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
589  temps2_1 = (v2_1 * cv1) - (v1_1 * sv1);
590 
591  vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
592  temps3_0 = (v3_0 * cv2) - (temps2_0 * sv2);
593  vs2_1 = (v3_1 * sv2) + (temps2_1 * cv2);
594  temps3_1 = (v3_1 * cv2) - (temps2_1 * sv2);
595 
596  vs3_0 = (v4_0 * sv3) + (temps3_0 * cv3);
597  temps4_0 = (v4_0 * cv3) - (temps3_0 * sv3);
598  vs3_1 = (v4_1 * sv3) + (temps3_1 * cv3);
599  temps4_1 = (v4_1 * cv3) - (temps3_1 * sv3);
600 
601  vs4_0 = (v5_0 * sv4) + (temps4_0 * cv4);
602  v1_0 = (v5_0 * cv4) - (temps4_0 * sv4);
603  vs4_1 = (v5_1 * sv4) + (temps4_1 * cv4);
604  v1_1 = (v5_1 * cv4) - (temps4_1 * sv4);
605 
606  pred = c7x::strm_agen<0, vec>::get_vpred();
607  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
608  __vstore_pred(pred, pStore, vs1_0);
609 
610  pred = c7x::strm_agen<1, vec>::get_vpred();
611  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
612  __vstore_pred(pred, pStore, vs1_1);
613 
614  pred = c7x::strm_agen<0, vec>::get_vpred();
615  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
616  __vstore_pred(pred, pStore, vs2_0);
617 
618  pred = c7x::strm_agen<1, vec>::get_vpred();
619  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
620  __vstore_pred(pred, pStore, vs2_1);
621 
622  pred = c7x::strm_agen<0, vec>::get_vpred();
623  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
624  __vstore_pred(pred, pStore, vs3_0);
625 
626  pred = c7x::strm_agen<1, vec>::get_vpred();
627  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
628  __vstore_pred(pred, pStore, vs3_1);
629 
630  pred = c7x::strm_agen<0, vec>::get_vpred();
631  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
632  __vstore_pred(pred, pStore, vs4_0);
633 
634  pred = c7x::strm_agen<1, vec>::get_vpred();
635  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
636  __vstore_pred(pred, pStore, vs4_1);
637 
638  vertical += 4;
639  }
640 
641  for (; vertical < rowPair - 1; vertical += 2) {
642  dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
643  vec cv1 = __vload_dup(pcv1);
644  dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
645  vec sv1 = __vload_dup(psv1);
646  dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
647  vec cv2 = __vload_dup(pcv2);
648  dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
649  vec sv2 = __vload_dup(psv2);
650 
651  vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
652  vec v2_1 = c7x::strm_eng<1, vec>::get_adv();
653 
654  vec v3_0 = c7x::strm_eng<0, vec>::get_adv();
655  vec v3_1 = c7x::strm_eng<1, vec>::get_adv();
656 
657  vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
658  vec temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
659  vec vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
660  vec temps2_1 = (v2_1 * cv1) - (v1_1 * sv1);
661 
662  vec vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
663  v1_0 = (v3_0 * cv2) - (temps2_0 * sv2);
664  vec vs2_1 = (v3_1 * sv2) + (temps2_1 * cv2);
665  v1_1 = (v3_1 * cv2) - (temps2_1 * sv2);
666 
667  pred = c7x::strm_agen<0, vec>::get_vpred();
668  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
669  __vstore_pred(pred, pStore, vs1_0);
670 
671  pred = c7x::strm_agen<1, vec>::get_vpred();
672  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
673  __vstore_pred(pred, pStore, vs1_1);
674 
675  pred = c7x::strm_agen<0, vec>::get_vpred();
676  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
677  __vstore_pred(pred, pStore, vs2_0);
678 
679  pred = c7x::strm_agen<1, vec>::get_vpred();
680  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
681  __vstore_pred(pred, pStore, vs2_1);
682  }
683 
684  if (vertical != rowPair) {
685  dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
686  vec cv1 = __vload_dup(pcv1);
687  dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
688  vec sv1 = __vload_dup(psv1);
689 
690  vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
691  vec v2_1 = c7x::strm_eng<1, vec>::get_adv();
692 
693  vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
694  v1_0 = (v2_0 * cv1) - (v1_0 * sv1);
695  vec vs1_1 = (v2_1 * sv1) + (v1_1 * cv1);
696  v1_1 = (v2_1 * cv1) - (v1_1 * sv1);
697 
698  pred = c7x::strm_agen<0, vec>::get_vpred();
699  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
700  __vstore_pred(pred, pStore, vs1_0);
701 
702  pred = c7x::strm_agen<1, vec>::get_vpred();
703  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
704  __vstore_pred(pred, pStore, vs1_1);
705  }
706 
707  pred = c7x::strm_agen<0, vec>::get_vpred();
708  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
709  __vstore_pred(pred, pStore, v1_0);
710 
711  pred = c7x::strm_agen<1, vec>::get_vpred();
712  pStore = c7x::strm_agen<1, vec>::get_adv(vStart + eleCount);
713  __vstore_pred(pred, pStore, v1_1);
714  }
715  }
716 
717  if (se0ICNT2 != se1ICNT2) {
718  vec v1_0 = c7x::strm_eng<0, vec>::get_adv();
719  __vpred pred; vec *pStore;
720  int32_t vertical = 0;
721 
722  for (vertical = 0; vertical < rowPair - 3; vertical += 4) {
723  dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
724  vec cv1 = __vload_dup(pcv1);
725  dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
726  vec sv1 = __vload_dup(psv1);
727  dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
728  vec cv2 = __vload_dup(pcv2);
729  dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
730  vec sv2 = __vload_dup(psv2);
731  dataType *pcv3 = c7x::strm_agen<2, dataType>::get_adv(cV);
732  vec cv3 = __vload_dup(pcv3);
733  dataType *psv3 = c7x::strm_agen<3, dataType>::get_adv(sV);
734  vec sv3 = __vload_dup(psv3);
735  dataType *pcv4 = c7x::strm_agen<2, dataType>::get_adv(cV);
736  vec cv4 = __vload_dup(pcv4);
737  dataType *psv4 = c7x::strm_agen<3, dataType>::get_adv(sV);
738  vec sv4 = __vload_dup(psv4);
739 
740  vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
741  vec v3_0 = c7x::strm_eng<0, vec>::get_adv();
742  vec v4_0 = c7x::strm_eng<0, vec>::get_adv();
743  vec v5_0 = c7x::strm_eng<0, vec>::get_adv();
744 
745  vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
746  vec temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
747 
748  vec vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
749  vec temps3_0 = (v3_0 * cv2) - (temps2_0 * sv2);
750 
751  vec vs3_0 = (v4_0 * sv3) + (temps3_0 * cv3);
752  vec temps4_0 = (v4_0 * cv3) - (temps3_0 * sv3);
753 
754  vec vs4_0 = (v5_0 * sv4) + (temps4_0 * cv4);
755  v1_0 = (v5_0 * cv4) - (temps4_0 * sv4);
756 
757  pred = c7x::strm_agen<0, vec>::get_vpred();
758  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
759  __vstore_pred(pred, pStore, vs1_0);
760 
761  pred = c7x::strm_agen<0, vec>::get_vpred();
762  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
763  __vstore_pred(pred, pStore, vs2_0);
764 
765  pred = c7x::strm_agen<0, vec>::get_vpred();
766  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
767  __vstore_pred(pred, pStore, vs3_0);
768 
769  pred = c7x::strm_agen<0, vec>::get_vpred();
770  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
771  __vstore_pred(pred, pStore, vs4_0);
772  }
773 
774  for (; vertical < rowPair - 1; vertical += 2) {
775  dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
776  vec cv1 = __vload_dup(pcv1);
777  dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
778  vec sv1 = __vload_dup(psv1);
779  dataType *pcv2 = c7x::strm_agen<2, dataType>::get_adv(cV);
780  vec cv2 = __vload_dup(pcv2);
781  dataType *psv2 = c7x::strm_agen<3, dataType>::get_adv(sV);
782  vec sv2 = __vload_dup(psv2);
783 
784  vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
785  vec v3_0 = c7x::strm_eng<0, vec>::get_adv();
786 
787  vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
788  vec temps2_0 = (v2_0 * cv1) - (v1_0 * sv1);
789 
790  vec vs2_0 = (v3_0 * sv2) + (temps2_0 * cv2);
791  v1_0 = (v3_0 * cv2) - (temps2_0 * sv2);
792 
793  pred = c7x::strm_agen<0, vec>::get_vpred();
794  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
795  __vstore_pred(pred, pStore, vs1_0);
796 
797  pred = c7x::strm_agen<0, vec>::get_vpred();
798  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
799  __vstore_pred(pred, pStore, vs2_0);
800  }
801 
802  if (vertical != rowPair) {
803  dataType *pcv1 = c7x::strm_agen<2, dataType>::get_adv(cV);
804  vec cv1 = __vload_dup(pcv1);
805  dataType *psv1 = c7x::strm_agen<3, dataType>::get_adv(sV);
806  vec sv1 = __vload_dup(psv1);
807 
808  vec v2_0 = c7x::strm_eng<0, vec>::get_adv();
809 
810  vec vs1_0 = (v2_0 * sv1) + (v1_0 * cv1);
811  v1_0 = (v2_0 * cv1) - (v1_0 * sv1);
812 
813  pred = c7x::strm_agen<0, vec>::get_vpred();
814  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
815  __vstore_pred(pred, pStore, vs1_0);
816  }
817 
818  pred = c7x::strm_agen<0, vec>::get_vpred();
819  pStore = c7x::strm_agen<0, vec>::get_adv(vStart);
820  __vstore_pred(pred, pStore, v1_0);
821  }
822  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
823 }
824 template void DSPLIB_diag_proc_ci<float>(float *V,
825  int32_t startRow,
826  int32_t Nrows,
827  int32_t Ncols,
828  int32_t rowVStride,
829  float *cV,
830  float *sV,
831  uint8_t *pBlock);
832 template void DSPLIB_diag_proc_ci<double>(double *V,
833  int32_t startRow,
834  int32_t Nrows,
835  int32_t Ncols,
836  int32_t rowVStride,
837  double *cV,
838  double *sV,
839  uint8_t *pBlock);
840 
844 template <typename dataType>
846  int32_t startRow,
847  int32_t Nrows,
848  int32_t Ncols,
849  int32_t rowUStride,
850  dataType *cU,
851  dataType *sU,
852  uint8_t *pBlock)
853 {
854  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
855 
856  typedef typename c7x::make_full_vector<dataType>::type vec;
857  int32_t eleCount = c7x::element_count_of<vec>::value;
858 
859  __SE_TEMPLATE_v1 se0Params; /* For First Row */
860  __SA_TEMPLATE_v1 sa0Params;
861  __SE_TEMPLATE_v1 se1Params; /* For Other Rows */
862  __SA_TEMPLATE_v1 sa1Params;
863  __SA_TEMPLATE_v1 sa2Params; /* For cU */
864  __SA_TEMPLATE_v1 sa3Params; /* For sU */
865 
866  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
867  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
868 
869  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
870  sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
871 
872  sa2Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
873  sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
874 
875  int32_t lenTile = 8;
876  int32_t nTile = DSPLIB_ceilingDiv(Ncols, (lenTile * eleCount));
877 
878  se0Params.ICNT0 = Ncols;
879  se0Params.ICNT1 = 1;
880 
881  sa0Params.ICNT0 = Ncols;
882  sa0Params.ICNT1 = 1;
883 
884  se1Params.ICNT1 = Nrows;
885  se1Params.DIM1 = rowUStride;
886  se1Params.ICNT2 = nTile;
887  se1Params.ICNT3 = 1;
888  se1Params.DECDIM1_WIDTH = Ncols;
889 
890  sa1Params.ICNT1 = Nrows;
891  sa1Params.DIM1 = rowUStride;
892  sa1Params.ICNT2 = nTile;
893  sa1Params.DECDIM1_WIDTH = Ncols;
894 
895  sa2Params.ICNT0 = Nrows;
896  sa2Params.ICNT1 = nTile;
897 
898  sa3Params.ICNT0 = Nrows;
899  sa3Params.ICNT1 = nTile;
900 
901  dataType *pSE0 = U;
902  dataType *pSE1 = U + rowUStride;
903  __SE0_OPEN(pSE0, se0Params);
904  __SE1_OPEN(pSE1, se1Params);
905  __SA0_OPEN(sa0Params);
906  __SA1_OPEN(sa1Params);
907  __SA2_OPEN(sa2Params);
908  __SA3_OPEN(sa3Params);
909 
910  for (int32_t tile = 0; tile < nTile; tile++) {
911  vec r1 = c7x::strm_eng<0, vec>::get_adv();
912  vec r2 = c7x::strm_eng<0, vec>::get_adv();
913  vec r3 = c7x::strm_eng<0, vec>::get_adv();
914  vec r4 = c7x::strm_eng<0, vec>::get_adv();
915  vec r5 = c7x::strm_eng<0, vec>::get_adv();
916  vec r6 = c7x::strm_eng<0, vec>::get_adv();
917  vec r7 = c7x::strm_eng<0, vec>::get_adv();
918  vec r8 = c7x::strm_eng<0, vec>::get_adv();
919 
920  dataType *pcU = c7x::strm_agen<2, dataType>::get_adv(cU);
921  vec vcU = __vload_dup(pcU);
922  dataType *psU = c7x::strm_agen<3, dataType>::get_adv(sU);
923  vec vsU = __vload_dup(psU);
924  for (int32_t vertical = 0; vertical < Nrows; vertical++) {
925  vec v1 = c7x::strm_eng<1, vec>::get_adv();
926  vec v2 = c7x::strm_eng<1, vec>::get_adv();
927  vec v3 = c7x::strm_eng<1, vec>::get_adv();
928  vec v4 = c7x::strm_eng<1, vec>::get_adv();
929  vec v5 = c7x::strm_eng<1, vec>::get_adv();
930  vec v6 = c7x::strm_eng<1, vec>::get_adv();
931  vec v7 = c7x::strm_eng<1, vec>::get_adv();
932  vec v8 = c7x::strm_eng<1, vec>::get_adv();
933 
934  vec vs1 = v1 * vcU - r1 * vsU;
935  r1 = v1 * vsU + r1 * vcU;
936 
937  vec vs2 = v2 * vcU - r2 * vsU;
938  r2 = v2 * vsU + r2 * vcU;
939 
940  vec vs3 = v3 * vcU - r3 * vsU;
941  r3 = v3 * vsU + r3 * vcU;
942 
943  vec vs4 = v4 * vcU - r4 * vsU;
944  r4 = v4 * vsU + r4 * vcU;
945 
946  vec vs5 = v5 * vcU - r5 * vsU;
947  r5 = v5 * vsU + r5 * vcU;
948 
949  vec vs6 = v6 * vcU - r6 * vsU;
950  r6 = v6 * vsU + r6 * vcU;
951 
952  vec vs7 = v7 * vcU - r7 * vsU;
953  r7 = v7 * vsU + r7 * vcU;
954 
955  vec vs8 = v8 * vcU - r8 * vsU;
956  r8 = v8 * vsU + r8 * vcU;
957 
958  pcU = c7x::strm_agen<2, dataType>::get_adv(cU);
959  vcU = __vload_dup(pcU);
960  psU = c7x::strm_agen<3, dataType>::get_adv(sU);
961  vsU = __vload_dup(psU);
962 
963  __vpred pred = c7x::strm_agen<1, vec>::get_vpred();
964  vec *pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
965  __vstore_pred(pred, pStore, vs1);
966 
967  pred = c7x::strm_agen<1, vec>::get_vpred();
968  pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
969  __vstore_pred(pred, pStore, vs2);
970 
971  pred = c7x::strm_agen<1, vec>::get_vpred();
972  pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
973  __vstore_pred(pred, pStore, vs3);
974 
975  pred = c7x::strm_agen<1, vec>::get_vpred();
976  pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
977  __vstore_pred(pred, pStore, vs4);
978 
979  pred = c7x::strm_agen<1, vec>::get_vpred();
980  pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
981  __vstore_pred(pred, pStore, vs5);
982 
983  pred = c7x::strm_agen<1, vec>::get_vpred();
984  pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
985  __vstore_pred(pred, pStore, vs6);
986 
987  pred = c7x::strm_agen<1, vec>::get_vpred();
988  pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
989  __vstore_pred(pred, pStore, vs7);
990 
991  pred = c7x::strm_agen<1, vec>::get_vpred();
992  pStore = c7x::strm_agen<1, vec>::get_adv(pSE1);
993  __vstore_pred(pred, pStore, vs8);
994  }
995  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
996  vec *pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
997  __vstore_pred(pred1, pStore1, r1);
998 
999  pred1 = c7x::strm_agen<0, vec>::get_vpred();
1000  pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1001  __vstore_pred(pred1, pStore1, r2);
1002 
1003  pred1 = c7x::strm_agen<0, vec>::get_vpred();
1004  pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1005  __vstore_pred(pred1, pStore1, r3);
1006 
1007  pred1 = c7x::strm_agen<0, vec>::get_vpred();
1008  pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1009  __vstore_pred(pred1, pStore1, r4);
1010 
1011  pred1 = c7x::strm_agen<0, vec>::get_vpred();
1012  pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1013  __vstore_pred(pred1, pStore1, r5);
1014 
1015  pred1 = c7x::strm_agen<0, vec>::get_vpred();
1016  pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1017  __vstore_pred(pred1, pStore1, r6);
1018 
1019  pred1 = c7x::strm_agen<0, vec>::get_vpred();
1020  pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1021  __vstore_pred(pred1, pStore1, r7);
1022 
1023  pred1 = c7x::strm_agen<0, vec>::get_vpred();
1024  pStore1 = c7x::strm_agen<0, vec>::get_adv(pSE0);
1025  __vstore_pred(pred1, pStore1, r8);
1026  }
1027 
1028  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
1029 }
1031  int32_t startRow,
1032  int32_t Nrows,
1033  int32_t Ncols,
1034  int32_t rowUStride,
1035  float *cU,
1036  float *sU,
1037  uint8_t *pBlock);
1039  int32_t startRow,
1040  int32_t Nrows,
1041  int32_t Ncols,
1042  int32_t rowUStride,
1043  double *cU,
1044  double *sU,
1045  uint8_t *pBlock);
1046 
1047 /* ======================================================================== */
1048 /* End of file: DSPLIB_svd_diag_ci.cpp */
1049 /* ======================================================================== */
void DSPLIB_diag_negate_v_ci(dataType *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
Negates the values of a row in V.
void DSPLIB_diag_proc_init_ci(DSPLIB_kernelHandle handle)
void DSPLIB_diag_rotation_check_ci(dataType *diag, dataType *superdiag, dataType epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
Updates values of "m" and "rotation_test" flag vased on the values present in "diag",...
double constEpsilon< double >()
uint64_t movePredicate< double >(__vpred pred)
template void DSPLIB_diag_rotation_proc_ci< float >(float *U, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowUStride, float *cU, float *sU, uint8_t *pBlock)
template void DSPLIB_diag_rotation_check_ci< double >(double *diag, double *superdiag, double epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
template void DSPLIB_diag_sqrt_ci< double >(double *superdiag, double *diag, int32_t length, uint8_t *pBlock)
uint64_t movePredicate(__vpred pred)
Moves predicate register to a 64-bit register.
template void DSPLIB_diag_epsilon_ci< float >(float *diag, float *superdiag, float *epsilon, int32_t Ncols, uint8_t *pBlock)
float constEpsilon< float >()
template void DSPLIB_diag_epsilon_ci< double >(double *diag, double *superdiag, double *epsilon, int32_t Ncols, uint8_t *pBlock)
dataType constEpsilon()
void DSPLIB_diag_epsilon_ci(dataType *diag, dataType *superdiag, dataType *epsilon, int32_t Ncols, uint8_t *pBlock)
Updates "epsilon" value based on absolute max values from "diag" and "superdiag" vectors.
template void DSPLIB_diag_rotation_proc_ci< double >(double *U, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowUStride, double *cU, double *sU, uint8_t *pBlock)
void DSPLIB_diag_proc_ci(dataType *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, dataType *cV, dataType *sV, uint8_t *pBlock)
Updates rows of V' and U' based on the precalculated cV/cU and sV/sU vectors.
template void DSPLIB_diag_sqrt_ci< float >(float *superdiag, float *diag, int32_t length, uint8_t *pBlock)
template void DSPLIB_diag_negate_v_ci< float >(float *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
uint64_t movePredicate< float >(__vpred pred)
template void DSPLIB_diag_negate_v_ci< double >(double *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
template void DSPLIB_diag_proc_init_ci< double >(DSPLIB_kernelHandle handle)
void DSPLIB_diag_sqrt_ci(dataType *superdiag, dataType *diag, int32_t length, uint8_t *pBlock)
Calculates the reciprocal of square-roots of "diag" and "superdiag" vectors.
template void DSPLIB_diag_rotation_check_ci< float >(float *diag, float *superdiag, float epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
template void DSPLIB_diag_proc_init_ci< float >(DSPLIB_kernelHandle handle)
void DSPLIB_diag_rotation_proc_ci(dataType *U, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowUStride, dataType *cU, dataType *sU, uint8_t *pBlock)
Updates rows of U' based on the precalculated cU and sU vectors.
template void DSPLIB_diag_proc_ci< double >(double *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, double *cV, double *sV, uint8_t *pBlock)
template void DSPLIB_diag_proc_ci< float >(float *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, float *cV, float *sV, uint8_t *pBlock)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_SVD_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters