DSPLIB User Guide
DSPLIB_svd_bidiag_uFinal_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * *
3  * module name :DSPLIB *
4  * *
5  * module descripton :Digital Signal Processing Library module for C7x+MMA *
6  * *
7  * Copyright (C) 2017-2018 Texas Instruments Incorporated - https://www.ti.com/ *
8  * ALL RIGHTS RESERVED *
9  * *
10  ******************************************************************************/
11 
23 /* *****************************************************************************
24  *
25  * INCLUDES
26  *
27  ***************************************************************************** */
28 
29 #include "DSPLIB_svd_priv.h"
30 
31 /* *****************************************************************************
32  *
33  * INITIALIZATION
34  *
35  ***************************************************************************** */
36 
40 template <typename dataType> void DSPLIB_bidiag_uFinal_init_ci(DSPLIB_kernelHandle handle)
41 {
42  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
43  DSPLIB_svd_PrivArgs *pKerPrivArgs = (DSPLIB_svd_PrivArgs *) handle;
44  uint8_t *pBlock = pKerPrivArgs->bufPblock;
45  int32_t strideU = pKerPrivArgs->strideU;
46 
47  __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
48  __SE_TEMPLATE_v1 se3Params = __gen_SE_TEMPLATE_v1();
49  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
50  __SA_TEMPLATE_v1 sa1Params = __gen_SA_TEMPLATE_v1();
51  __SA_TEMPLATE_v1 sa2Params = __gen_SA_TEMPLATE_v1();
52  __SA_TEMPLATE_v1 sa3Params = __gen_SA_TEMPLATE_v1();
53  __SA_TEMPLATE_v1 sa4Params = __gen_SA_TEMPLATE_v1();
54 
55  typedef typename c7x::make_full_vector<dataType>::type vec;
56  int32_t eleCount = c7x::element_count_of<vec>::value;
57  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
58  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
59 
60  int32_t colUStride = strideU / sizeof(dataType);
61 
62  se1Params.ICNT0 = 1;
63  se1Params.DIM1 = colUStride;
64  se1Params.DIM2 = 0;
65  se1Params.DIMFMT = __SE_DIMFMT_3D;
66  se1Params.ELETYPE = SE_ELETYPE;
67  se1Params.VECLEN = __SE_VECLEN_1ELEM;
68  se1Params.GRPDUP = __SE_GRPDUP_ON;
69 
70  sa0Params.ICNT0 = eleCount;
71  sa0Params.DIM1 = colUStride;
72  sa0Params.DIM2 = eleCount * 2;
73  sa0Params.DIMFMT = __SA_DIMFMT_3D;
74  sa0Params.VECLEN = SA_VECLEN;
75  sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
76 
77  se3Params.ICNT0 = 1;
78  se3Params.DIM1 = colUStride + 1;
79  se3Params.DIMFMT = __SE_DIMFMT_2D;
80  se3Params.ELETYPE = SE_ELETYPE;
81  se3Params.VECLEN = __SE_VECLEN_1ELEM;
82 
83  sa3Params.ICNT0 = 1;
84  sa3Params.DIM1 = colUStride + 1;
85  sa3Params.DIMFMT = __SA_DIMFMT_2D;
86  sa3Params.VECLEN = __SA_VECLEN_1ELEM;
87 
88  sa1Params.ICNT0 = 1;
89  sa1Params.DIM1 = colUStride;
90  sa1Params.DIMFMT = __SA_DIMFMT_2D;
91  sa1Params.VECLEN = __SA_VECLEN_1ELEM;
92 
93  sa2Params.DIM1 = colUStride;
94  sa2Params.DIMFMT = __SA_DIMFMT_2D;
95  sa2Params.VECLEN = SA_VECLEN;
96 
97  sa4Params.ICNT0 = 1;
98  sa4Params.DIM1 = colUStride;
99  sa4Params.DIM2 = 0;
100  sa4Params.DIMFMT = __SA_DIMFMT_3D;
101  sa4Params.VECLEN = __SA_VECLEN_1ELEM;
102 
103  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (12 * SE_PARAM_SIZE)) = se1Params;
104  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (13 * SE_PARAM_SIZE)) = sa0Params;
105  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (14 * SE_PARAM_SIZE)) = se3Params;
106  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (15 * SE_PARAM_SIZE)) = sa3Params;
107  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (16 * SE_PARAM_SIZE)) = sa1Params;
108  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (17 * SE_PARAM_SIZE)) = sa2Params;
109  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (18 * SE_PARAM_SIZE)) = sa4Params;
110 
111  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
112 }
115 
116 /* *****************************************************************************
117  *
118  * IMPLEMENTATION
119  *
120  ***************************************************************************** */
121 
126 template <typename dataType>
127 void DSPLIB_bidiag_uFinal_expand_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, uint8_t *pBlock)
128 {
129  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
130 
131  __SA_TEMPLATE_v1 sa0Params, sa1Params;
132  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (17 * SE_PARAM_SIZE));
133  sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (17 * SE_PARAM_SIZE));
134 
135  typedef typename c7x::make_full_vector<dataType>::type vec;
136  int32_t eleCount = c7x::element_count_of<vec>::value;
137 
138  int32_t expandCols = Nrows - Ncols; /* Number of columns to be filled with zeros */
139  int32_t sa0ICNT = Nrows / 2;
140  int32_t sa1ICNT = Nrows - sa0ICNT;
141 
142  sa0Params.ICNT0 = sa1Params.ICNT0 = expandCols;
143  sa0Params.ICNT1 = sa0ICNT;
144  sa1Params.ICNT1 = sa1ICNT;
145 
146  int32_t nVec = DSPLIB_ceilingDiv(expandCols, eleCount);
147  int32_t totalIter = sa0ICNT * nVec;
148  int32_t uOffsetSA0 = Ncols;
149  int32_t uOffsetSA1 = Ncols + (sa0ICNT * colUStride); /* start halfway through rows */
150 
151  __SA1_OPEN(sa1Params);
152  /* if (sa0ICNT > 0) */ {
153  __SA0_OPEN(sa0Params);
154  for (int32_t iter = 0; iter < totalIter; iter++) {
155  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
156  vec *pStoreVec1 = c7x::strm_agen<0, vec>::get_adv(U + uOffsetSA0);
157  __vstore_pred(pred1, pStoreVec1, (vec) 0);
158 
159  __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
160  vec *pStoreVec2 = c7x::strm_agen<1, vec>::get_adv(U + uOffsetSA1);
161  __vstore_pred(pred2, pStoreVec2, (vec) 0);
162  }
163  __SA0_CLOSE();
164  }
165 
166  if (sa0ICNT != sa1ICNT) {
167  for (int32_t horizontal = 0; horizontal < nVec; horizontal++) {
168  __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
169  vec *pStoreVec2 = c7x::strm_agen<1, vec>::get_adv(U + uOffsetSA1);
170  __vstore_pred(pred2, pStoreVec2, (vec) 0);
171  }
172  }
173  __SA1_CLOSE();
174 
175  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
176 }
177 template void
178 DSPLIB_bidiag_uFinal_expand_ci<float>(float *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, uint8_t *pBlock);
179 template void
180 DSPLIB_bidiag_uFinal_expand_ci<double>(double *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, uint8_t *pBlock);
181 
186 template <typename dataType>
188  int32_t Nrows,
189  int32_t Ncols,
190  int32_t colUStride,
191  dataType s,
192  dataType *U1,
193  uint8_t *pBlock)
194 {
195  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
196 
197  typedef typename c7x::make_full_vector<dataType>::type vec;
198  int32_t eleCount = c7x::element_count_of<vec>::value;
199 
200  if (s != 0) {
201  __SE_TEMPLATE_v1 se0Params;
202  __SE_TEMPLATE_v1 se1Params;
203  __SA_TEMPLATE_v1 sa0Params;
204  __SA_TEMPLATE_v1 sa1Params;
205 
206  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
207  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (12 * SE_PARAM_SIZE));
208  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (13 * SE_PARAM_SIZE));
209  sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (13 * SE_PARAM_SIZE));
210 
211  int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
212  int32_t sa1ICNT2 = nVec / 2;
213  int32_t sa0ICNT2 = nVec - sa1ICNT2;
214 
215  sa0Params.ICNT1 = Nrows;
216  sa0Params.ICNT2 = sa0ICNT2;
217  sa0Params.DECDIM1_WIDTH = Ncols;
218 
219  sa1Params.ICNT1 = Nrows;
220  sa1Params.ICNT2 = sa1ICNT2;
221  sa1Params.DECDIM1_WIDTH = Ncols - eleCount;
222 
223  se0Params.ICNT2 = nVec;
224 
225  se1Params.ICNT1 = Nrows;
226  se1Params.ICNT2 = sa0ICNT2;
227 
228  dataType reciprocalFactor = getRecip((U[0] * s));
229  vec reciprocalVec = (vec) reciprocalFactor;
230 
231  __SE1_OPEN(U, se1Params);
232  __SE0_OPEN(U + colUStride, se0Params);
233  __SA0_OPEN(sa0Params);
234  if (sa1ICNT2 > 0) {
235  __SA1_OPEN(sa1Params);
236 
237  for (int32_t horizontal = 0; horizontal < sa1ICNT2; horizontal++) {
238  vec uCol1 = c7x::strm_eng<0, vec>::get_adv();
239  vec uCol2 = c7x::strm_eng<0, vec>::get_adv();
240  for (int32_t vertical = 0; vertical < Nrows; vertical++) {
241  vec uEle = c7x::strm_eng<1, vec>::get_adv();
242 
243  vec v1 = uCol1 * uEle * reciprocalVec;
244  vec v2 = uCol2 * uEle * reciprocalVec;
245 
246  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
247  vec *pStoreVec1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
248  __vstore_pred(pred1, pStoreVec1, v1);
249 
250  __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
251  vec *pStoreVec2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + eleCount);
252  __vstore_pred(pred2, pStoreVec2, v2);
253  }
254  }
255  __SA1_CLOSE();
256  }
257 
258  if (sa1ICNT2 != sa0ICNT2) {
259  vec uCol1 = c7x::strm_eng<0, vec>::get_adv();
260  for (int32_t vertical = 0; vertical < Nrows; vertical++) {
261  vec uEle = c7x::strm_eng<1, vec>::get_adv();
262 
263  vec v1 = uCol1 * uEle * reciprocalVec;
264 
265  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
266  vec *pStoreVec1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
267  __vstore_pred(pred1, pStoreVec1, v1);
268  }
269  }
270 
271  __SE0_CLOSE();
272  __SE1_CLOSE();
273  __SA0_CLOSE();
274 
275  __SE_TEMPLATE_v1 se3Params;
276  __SA_TEMPLATE_v1 sa3Params;
277 
278  se3Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (14 * SE_PARAM_SIZE));
279  sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (15 * SE_PARAM_SIZE));
280 
281  se3Params.ICNT1 = sa3Params.ICNT1 = Ncols;
282 
283  __SE0_OPEN(U + 1 + colUStride, se3Params);
284  __SA0_OPEN(sa3Params);
285 
286  for (int32_t diag = 0; diag < Ncols; diag++) {
287  vec diagEle = c7x::strm_eng<0, vec>::get_adv();
288  dataType *pStore = c7x::strm_agen<0, dataType>::get_adv(U + 1 + colUStride);
289  *pStore = diagEle.s[0] + 1;
290  }
291  }
292  else {
293  __SA_TEMPLATE_v1 sa3Params;
294 
295  sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (15 * SE_PARAM_SIZE));
296 
297  sa3Params.ICNT1 = Ncols;
298 
299  __SA0_OPEN(sa3Params);
300 
301  for (int32_t diag = 0; diag < Ncols; diag++) {
302  dataType *pStore = c7x::strm_agen<0, dataType>::get_adv(U + 1 + colUStride);
303  *pStore = 1;
304  }
305  }
306 
307  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
308 }
310  int32_t Nrows,
311  int32_t Ncols,
312  int32_t colUStride,
313  float s,
314  float *U1,
315  uint8_t *pBlock);
317  int32_t Nrows,
318  int32_t Ncols,
319  int32_t colUStride,
320  double s,
321  double *U1,
322  uint8_t *pBlock);
323 
327 template <typename dataType>
328 void DSPLIB_bidiag_uFinal_normalize_ci(dataType *U, int32_t Nrows, dataType s, int32_t colUStride, uint8_t *pBlock)
329 {
330  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
331 
332  __SE_TEMPLATE_v1 se0Params;
333  __SA_TEMPLATE_v1 sa0Params;
334  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
335  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (16 * SE_PARAM_SIZE));
336 
337  typedef typename c7x::make_full_vector<dataType>::type vec;
338  int32_t eleCount = c7x::element_count_of<vec>::value;
339 
340  int32_t nVec = Nrows / eleCount;
341  int32_t remainingRows = Nrows - (nVec * eleCount);
342  dataType normFactor = getRecip(s);
343  vec invNormFactor = (vec) normFactor;
344 
345  se0Params.ICNT2 = DSPLIB_ceilingDiv(Nrows, eleCount);
346  sa0Params.ICNT1 = Nrows;
347 
348  __SA0_OPEN(sa0Params);
349 
350  if (s != 0) {
351  __SE0_OPEN(U, se0Params);
352 
353  for (int32_t vertical = 0; vertical < nVec; vertical++) {
354  vec v1 = c7x::strm_eng<0, vec>::get_adv();
355 
356  v1 *= invNormFactor;
357 
358  for (int32_t i = 0; i < eleCount; i++) {
359  dataType *pStoreVec1 = c7x::strm_agen<0, dataType>::get_adv(U);
360  *pStoreVec1 = v1.s[i];
361  }
362  }
363 
364  if (remainingRows > 0) {
365  vec v1 = c7x::strm_eng<0, vec>::get_adv();
366  v1 *= invNormFactor;
367  for (int32_t i = 0; i < remainingRows; i++) {
368  dataType *pStoreVec1 = c7x::strm_agen<0, dataType>::get_adv(U);
369  *pStoreVec1 = v1.s[i];
370  }
371  }
372  __SE0_CLOSE();
373  }
374  else {
375 
376  for (int32_t vertical = 0; vertical < Nrows; vertical++) {
377  dataType *pStoreVec1 = c7x::strm_agen<0, dataType>::get_adv(U);
378  *pStoreVec1 = 0;
379  }
380  }
381  __SA0_CLOSE();
382 
383  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
384 }
385 template void
386 DSPLIB_bidiag_uFinal_normalize_ci<float>(float *U, int32_t Nrows, float s, int32_t colUStride, uint8_t *pBlock);
387 template void
388 DSPLIB_bidiag_uFinal_normalize_ci<double>(double *U, int32_t Nrows, double s, int32_t colUStride, uint8_t *pBlock);
389 
394 template <typename dataType>
395 void DSPLIB_bidiag_uFinal_ci(dataType *U,
396  int32_t Nrows,
397  int32_t Ncols,
398  int32_t colUStride,
399  dataType s,
400  dataType *U1,
401  uint8_t *pBlock)
402 {
403  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
404  typedef typename c7x::make_full_vector<dataType>::type vec;
405  int32_t eleCount = c7x::element_count_of<vec>::value;
406 
407  __SA_TEMPLATE_v1 sa2ParamsUpdate0;
408 
409  sa2ParamsUpdate0 = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
410 
411  int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
412 
413  sa2ParamsUpdate0.ICNT0 = Ncols;
414  /************************************ UPDATE 0 *********************************************
415  ****************************************************************************************** */
416  __SA2_OPEN(sa2ParamsUpdate0);
417  for (int32_t horizontal = 0; horizontal < nVec; horizontal++) {
418  __vpred pred = c7x::strm_agen<2, vec>::get_vpred();
419  vec *pStoreVec = c7x::strm_agen<2, vec>::get_adv(U + 1);
420  __vstore_pred(pred, pStoreVec, (vec) 0);
421  }
422  __SA2_CLOSE();
423 
424  if (s != 0) {
425  /************************************ CALCULATE si *****************************************
426  ****************************************************************************************** */
427  vec siNormFactor = (vec) getRecip((U[0] * s));
428 
429  __SE_TEMPLATE_v1 se0Params;
430  __SE_TEMPLATE_v1 se1Params;
431  __SA_TEMPLATE_v1 sa2Params;
432 
433  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
434  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
435  sa2Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
436 
437  int32_t lenTile = 8;
438  int32_t nTiles = DSPLIB_ceilingDiv(Ncols, eleCount * lenTile);
439  int32_t se1ICNT1 = Nrows / 2;
440  int32_t se0ICNT1 = Nrows - se1ICNT1;
441 
442  se0Params.ICNT1 = se0ICNT1;
443  se0Params.ICNT2 = nTiles;
444  se0Params.DECDIM1_WIDTH = Ncols;
445 
446  se1Params.ICNT1 = se1ICNT1;
447  se1Params.ICNT2 = nTiles;
448  se1Params.DECDIM1_WIDTH = Ncols;
449 
450  sa2Params.ICNT1 = lenTile * nTiles;
451  sa2Params.DECDIM1_WIDTH = Ncols;
452 
453  dataType *siStore = (dataType *) U1;
454 
455  __SA_TEMPLATE_v1 sa3Params;
456  sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (18 * SE_PARAM_SIZE));
457  sa3Params.ICNT1 = Nrows;
458  sa3Params.ICNT2 = 2 * nTiles;
459 
460  __SE0_OPEN(U + 1, se0Params);
461  __SA2_OPEN(sa2Params);
462  __SA3_OPEN(sa3Params);
463 
464  /* if (se1ICNT1 > 0) */ /* if number of rows > 1 */
465  {
466  __SE1_OPEN(U + 1 + colUStride, se1Params);
467  }
468  for (int32_t tile = 0; tile < nTiles; tile++) {
469  vec acc1, acc2, acc3, acc4, acc5, acc6, acc7, acc8;
470  acc1 = acc2 = acc3 = acc4 = acc5 = acc6 = acc7 = acc8 = (vec) 0;
471  for (int32_t vertical = 0; vertical < se1ICNT1; vertical++) {
472  vec v01 = c7x::strm_eng<0, vec>::get_adv();
473  vec v02 = c7x::strm_eng<0, vec>::get_adv();
474  vec v03 = c7x::strm_eng<0, vec>::get_adv();
475  vec v04 = c7x::strm_eng<0, vec>::get_adv();
476  vec v05 = c7x::strm_eng<0, vec>::get_adv();
477  vec v06 = c7x::strm_eng<0, vec>::get_adv();
478  vec v07 = c7x::strm_eng<0, vec>::get_adv();
479  vec v08 = c7x::strm_eng<0, vec>::get_adv();
480 
481  vec v11 = c7x::strm_eng<1, vec>::get_adv();
482  vec v12 = c7x::strm_eng<1, vec>::get_adv();
483  vec v13 = c7x::strm_eng<1, vec>::get_adv();
484  vec v14 = c7x::strm_eng<1, vec>::get_adv();
485  vec v15 = c7x::strm_eng<1, vec>::get_adv();
486  vec v16 = c7x::strm_eng<1, vec>::get_adv();
487  vec v17 = c7x::strm_eng<1, vec>::get_adv();
488  vec v18 = c7x::strm_eng<1, vec>::get_adv();
489 
490  dataType *pU1 = c7x::strm_agen<3, dataType>::get_adv(U);
491  vec u1 = __vload_dup(pU1);
492  dataType *pU2 = c7x::strm_agen<3, dataType>::get_adv(U);
493  vec u2 = __vload_dup(pU2);
494 
495  acc1 += v01 * u1 + v11 * u2;
496  acc2 += v02 * u1 + v12 * u2;
497  acc3 += v03 * u1 + v13 * u2;
498  acc4 += v04 * u1 + v14 * u2;
499  acc5 += v05 * u1 + v15 * u2;
500  acc6 += v06 * u1 + v16 * u2;
501  acc7 += v07 * u1 + v17 * u2;
502  acc8 += v08 * u1 + v18 * u2;
503  }
504 
505  if (se1ICNT1 != se0ICNT1) /* For last odd numbered row */
506  {
507  vec v01 = c7x::strm_eng<0, vec>::get_adv();
508  vec v02 = c7x::strm_eng<0, vec>::get_adv();
509  vec v03 = c7x::strm_eng<0, vec>::get_adv();
510  vec v04 = c7x::strm_eng<0, vec>::get_adv();
511  vec v05 = c7x::strm_eng<0, vec>::get_adv();
512  vec v06 = c7x::strm_eng<0, vec>::get_adv();
513  vec v07 = c7x::strm_eng<0, vec>::get_adv();
514  vec v08 = c7x::strm_eng<0, vec>::get_adv();
515 
516  dataType *pU1 = c7x::strm_agen<3, dataType>::get_adv(U);
517  vec u1 = __vload_dup(pU1);
518 
519  acc1 += v01 * u1;
520  acc2 += v02 * u1;
521  acc3 += v03 * u1;
522  acc4 += v04 * u1;
523  acc5 += v05 * u1;
524  acc6 += v06 * u1;
525  acc7 += v07 * u1;
526  acc8 += v08 * u1;
527  }
528 
529  __vpred pred = c7x::strm_agen<2, vec>::get_vpred();
530  vec *pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
531  __vstore_pred(pred, pStoreVec, acc1 * siNormFactor);
532 
533  pred = c7x::strm_agen<2, vec>::get_vpred();
534  pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
535  __vstore_pred(pred, pStoreVec, acc2 * siNormFactor);
536 
537  pred = c7x::strm_agen<2, vec>::get_vpred();
538  pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
539  __vstore_pred(pred, pStoreVec, acc3 * siNormFactor);
540 
541  pred = c7x::strm_agen<2, vec>::get_vpred();
542  pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
543  __vstore_pred(pred, pStoreVec, acc4 * siNormFactor);
544 
545  pred = c7x::strm_agen<2, vec>::get_vpred();
546  pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
547  __vstore_pred(pred, pStoreVec, acc5 * siNormFactor);
548 
549  pred = c7x::strm_agen<2, vec>::get_vpred();
550  pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
551  __vstore_pred(pred, pStoreVec, acc6 * siNormFactor);
552 
553  pred = c7x::strm_agen<2, vec>::get_vpred();
554  pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
555  __vstore_pred(pred, pStoreVec, acc7 * siNormFactor);
556 
557  pred = c7x::strm_agen<2, vec>::get_vpred();
558  pStoreVec = c7x::strm_agen<2, vec>::get_adv(siStore);
559  __vstore_pred(pred, pStoreVec, acc8 * siNormFactor);
560  }
561 
562  /************************************ UPDATE COLUMNS ***************************************
563  ****************************************************************************************** */
564 
565  __SA_TEMPLATE_v1 sa0Params;
566  __SA_TEMPLATE_v1 sa1Params;
567 
568  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
569  sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
570  sa2Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
571 
572  sa0Params.ICNT1 = se0ICNT1;
573  sa0Params.ICNT2 = nTiles;
574  sa0Params.DECDIM1_WIDTH = Ncols;
575 
576  sa1Params.ICNT1 = se1ICNT1;
577  sa1Params.ICNT2 = nTiles;
578  sa1Params.DECDIM1_WIDTH = Ncols;
579 
580  __SA0_OPEN(sa0Params);
581  /* if (se1ICNT1 > 0) */ {
582  __SA1_OPEN(sa1Params);
583  }
584  for (int32_t tile = 0; tile < nTiles; tile++) {
585  __vpred pred = c7x::strm_agen<2, vec>::get_vpred();
586  vec *pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
587  vec si1 = __vload_pred(pred, pSi);
588 
589  pred = c7x::strm_agen<2, vec>::get_vpred();
590  pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
591  vec si2 = __vload_pred(pred, pSi);
592 
593  pred = c7x::strm_agen<2, vec>::get_vpred();
594  pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
595  vec si3 = __vload_pred(pred, pSi);
596 
597  pred = c7x::strm_agen<2, vec>::get_vpred();
598  pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
599  vec si4 = __vload_pred(pred, pSi);
600 
601  pred = c7x::strm_agen<2, vec>::get_vpred();
602  pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
603  vec si5 = __vload_pred(pred, pSi);
604 
605  pred = c7x::strm_agen<2, vec>::get_vpred();
606  pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
607  vec si6 = __vload_pred(pred, pSi);
608 
609  pred = c7x::strm_agen<2, vec>::get_vpred();
610  pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
611  vec si7 = __vload_pred(pred, pSi);
612 
613  pred = c7x::strm_agen<2, vec>::get_vpred();
614  pSi = c7x::strm_agen<2, vec>::get_adv(siStore);
615  vec si8 = __vload_pred(pred, pSi);
616 
617  /* if (Nrows >= 2) */ {
618  dataType *pU1 = c7x::strm_agen<3, dataType>::get_adv(U);
619  vec u1 = __vload_dup(pU1);
620  dataType *pU2 = c7x::strm_agen<3, dataType>::get_adv(U);
621  vec u2 = __vload_dup(pU2);
622  for (int32_t vertical = 0; vertical < se1ICNT1 - 1; vertical++) {
623  vec v01 = c7x::strm_eng<0, vec>::get_adv();
624  vec v02 = c7x::strm_eng<0, vec>::get_adv();
625  vec v03 = c7x::strm_eng<0, vec>::get_adv();
626  vec v04 = c7x::strm_eng<0, vec>::get_adv();
627  vec v05 = c7x::strm_eng<0, vec>::get_adv();
628  vec v06 = c7x::strm_eng<0, vec>::get_adv();
629  vec v07 = c7x::strm_eng<0, vec>::get_adv();
630  vec v08 = c7x::strm_eng<0, vec>::get_adv();
631 
632  vec v11 = c7x::strm_eng<1, vec>::get_adv();
633  vec v12 = c7x::strm_eng<1, vec>::get_adv();
634  vec v13 = c7x::strm_eng<1, vec>::get_adv();
635  vec v14 = c7x::strm_eng<1, vec>::get_adv();
636  vec v15 = c7x::strm_eng<1, vec>::get_adv();
637  vec v16 = c7x::strm_eng<1, vec>::get_adv();
638  vec v17 = c7x::strm_eng<1, vec>::get_adv();
639  vec v18 = c7x::strm_eng<1, vec>::get_adv();
640 
641  vec ele1 = u1;
642  vec ele2 = u2;
643 
644  v01 += ele1 * si1;
645  v02 += ele1 * si2;
646  v03 += ele1 * si3;
647  v04 += ele1 * si4;
648  v05 += ele1 * si5;
649  v06 += ele1 * si6;
650  v07 += ele1 * si7;
651  v08 += ele1 * si8;
652 
653  v11 += ele2 * si1;
654  v12 += ele2 * si2;
655  v13 += ele2 * si3;
656  v14 += ele2 * si4;
657  v15 += ele2 * si5;
658  v16 += ele2 * si6;
659  v17 += ele2 * si7;
660  v18 += ele2 * si8;
661 
662  pU1 = c7x::strm_agen<3, dataType>::get_adv(U);
663  u1 = __vload_dup(pU1);
664  pU2 = c7x::strm_agen<3, dataType>::get_adv(U);
665  u2 = __vload_dup(pU2);
666 
667  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
668  vec *p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
669  __vstore_pred(pred1, p1, v01);
670 
671  pred1 = c7x::strm_agen<0, vec>::get_vpred();
672  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
673  __vstore_pred(pred1, p1, v02);
674 
675  pred1 = c7x::strm_agen<0, vec>::get_vpred();
676  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
677  __vstore_pred(pred1, p1, v03);
678 
679  pred1 = c7x::strm_agen<0, vec>::get_vpred();
680  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
681  __vstore_pred(pred1, p1, v04);
682 
683  pred1 = c7x::strm_agen<0, vec>::get_vpred();
684  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
685  __vstore_pred(pred1, p1, v05);
686 
687  pred1 = c7x::strm_agen<0, vec>::get_vpred();
688  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
689  __vstore_pred(pred1, p1, v06);
690 
691  pred1 = c7x::strm_agen<0, vec>::get_vpred();
692  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
693  __vstore_pred(pred1, p1, v07);
694 
695  pred1 = c7x::strm_agen<0, vec>::get_vpred();
696  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
697  __vstore_pred(pred1, p1, v08);
698 
699  __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
700  vec *p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
701  __vstore_pred(pred2, p2, v11);
702 
703  pred2 = c7x::strm_agen<1, vec>::get_vpred();
704  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
705  __vstore_pred(pred2, p2, v12);
706 
707  pred2 = c7x::strm_agen<1, vec>::get_vpred();
708  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
709  __vstore_pred(pred2, p2, v13);
710 
711  pred2 = c7x::strm_agen<1, vec>::get_vpred();
712  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
713  __vstore_pred(pred2, p2, v14);
714 
715  pred2 = c7x::strm_agen<1, vec>::get_vpred();
716  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
717  __vstore_pred(pred2, p2, v15);
718 
719  pred2 = c7x::strm_agen<1, vec>::get_vpred();
720  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
721  __vstore_pred(pred2, p2, v16);
722 
723  pred2 = c7x::strm_agen<1, vec>::get_vpred();
724  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
725  __vstore_pred(pred2, p2, v17);
726 
727  pred2 = c7x::strm_agen<1, vec>::get_vpred();
728  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
729  __vstore_pred(pred2, p2, v18);
730  }
731 
732  {
733  vec v01 = c7x::strm_eng<0, vec>::get_adv();
734  vec v02 = c7x::strm_eng<0, vec>::get_adv();
735  vec v03 = c7x::strm_eng<0, vec>::get_adv();
736  vec v04 = c7x::strm_eng<0, vec>::get_adv();
737  vec v05 = c7x::strm_eng<0, vec>::get_adv();
738  vec v06 = c7x::strm_eng<0, vec>::get_adv();
739  vec v07 = c7x::strm_eng<0, vec>::get_adv();
740  vec v08 = c7x::strm_eng<0, vec>::get_adv();
741 
742  vec v11 = c7x::strm_eng<1, vec>::get_adv();
743  vec v12 = c7x::strm_eng<1, vec>::get_adv();
744  vec v13 = c7x::strm_eng<1, vec>::get_adv();
745  vec v14 = c7x::strm_eng<1, vec>::get_adv();
746  vec v15 = c7x::strm_eng<1, vec>::get_adv();
747  vec v16 = c7x::strm_eng<1, vec>::get_adv();
748  vec v17 = c7x::strm_eng<1, vec>::get_adv();
749  vec v18 = c7x::strm_eng<1, vec>::get_adv();
750 
751  vec ele1 = u1;
752  vec ele2 = u2;
753 
754  v01 += ele1 * si1;
755  v02 += ele1 * si2;
756  v03 += ele1 * si3;
757  v04 += ele1 * si4;
758  v05 += ele1 * si5;
759  v06 += ele1 * si6;
760  v07 += ele1 * si7;
761  v08 += ele1 * si8;
762 
763  v11 += ele2 * si1;
764  v12 += ele2 * si2;
765  v13 += ele2 * si3;
766  v14 += ele2 * si4;
767  v15 += ele2 * si5;
768  v16 += ele2 * si6;
769  v17 += ele2 * si7;
770  v18 += ele2 * si8;
771 
772  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
773  vec *p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
774  __vstore_pred(pred1, p1, v01);
775 
776  pred1 = c7x::strm_agen<0, vec>::get_vpred();
777  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
778  __vstore_pred(pred1, p1, v02);
779 
780  pred1 = c7x::strm_agen<0, vec>::get_vpred();
781  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
782  __vstore_pred(pred1, p1, v03);
783 
784  pred1 = c7x::strm_agen<0, vec>::get_vpred();
785  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
786  __vstore_pred(pred1, p1, v04);
787 
788  pred1 = c7x::strm_agen<0, vec>::get_vpred();
789  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
790  __vstore_pred(pred1, p1, v05);
791 
792  pred1 = c7x::strm_agen<0, vec>::get_vpred();
793  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
794  __vstore_pred(pred1, p1, v06);
795 
796  pred1 = c7x::strm_agen<0, vec>::get_vpred();
797  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
798  __vstore_pred(pred1, p1, v07);
799 
800  pred1 = c7x::strm_agen<0, vec>::get_vpred();
801  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
802  __vstore_pred(pred1, p1, v08);
803 
804  __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
805  vec *p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
806  __vstore_pred(pred2, p2, v11);
807 
808  pred2 = c7x::strm_agen<1, vec>::get_vpred();
809  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
810  __vstore_pred(pred2, p2, v12);
811 
812  pred2 = c7x::strm_agen<1, vec>::get_vpred();
813  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
814  __vstore_pred(pred2, p2, v13);
815 
816  pred2 = c7x::strm_agen<1, vec>::get_vpred();
817  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
818  __vstore_pred(pred2, p2, v14);
819 
820  pred2 = c7x::strm_agen<1, vec>::get_vpred();
821  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
822  __vstore_pred(pred2, p2, v15);
823 
824  pred2 = c7x::strm_agen<1, vec>::get_vpred();
825  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
826  __vstore_pred(pred2, p2, v16);
827 
828  pred2 = c7x::strm_agen<1, vec>::get_vpred();
829  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
830  __vstore_pred(pred2, p2, v17);
831 
832  pred2 = c7x::strm_agen<1, vec>::get_vpred();
833  p2 = c7x::strm_agen<1, vec>::get_adv(U + 1 + colUStride);
834  __vstore_pred(pred2, p2, v18);
835  }
836  }
837 
838  if (se0ICNT1 != se1ICNT1) {
839  vec v01 = c7x::strm_eng<0, vec>::get_adv();
840  vec v02 = c7x::strm_eng<0, vec>::get_adv();
841  vec v03 = c7x::strm_eng<0, vec>::get_adv();
842  vec v04 = c7x::strm_eng<0, vec>::get_adv();
843  vec v05 = c7x::strm_eng<0, vec>::get_adv();
844  vec v06 = c7x::strm_eng<0, vec>::get_adv();
845  vec v07 = c7x::strm_eng<0, vec>::get_adv();
846  vec v08 = c7x::strm_eng<0, vec>::get_adv();
847 
848  dataType *pU1 = c7x::strm_agen<3, dataType>::get_adv(U);
849  vec u1 = __vload_dup(pU1);
850 
851  v01 += u1 * si1;
852  v02 += u1 * si2;
853  v03 += u1 * si3;
854  v04 += u1 * si4;
855  v05 += u1 * si5;
856  v06 += u1 * si6;
857  v07 += u1 * si7;
858  v08 += u1 * si8;
859 
860  __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
861  vec *p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
862  __vstore_pred(pred1, p1, v01);
863 
864  pred1 = c7x::strm_agen<0, vec>::get_vpred();
865  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
866  __vstore_pred(pred1, p1, v02);
867 
868  pred1 = c7x::strm_agen<0, vec>::get_vpred();
869  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
870  __vstore_pred(pred1, p1, v03);
871 
872  pred1 = c7x::strm_agen<0, vec>::get_vpred();
873  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
874  __vstore_pred(pred1, p1, v04);
875 
876  pred1 = c7x::strm_agen<0, vec>::get_vpred();
877  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
878  __vstore_pred(pred1, p1, v05);
879 
880  pred1 = c7x::strm_agen<0, vec>::get_vpred();
881  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
882  __vstore_pred(pred1, p1, v06);
883 
884  pred1 = c7x::strm_agen<0, vec>::get_vpred();
885  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
886  __vstore_pred(pred1, p1, v07);
887 
888  pred1 = c7x::strm_agen<0, vec>::get_vpred();
889  p1 = c7x::strm_agen<0, vec>::get_adv(U + 1);
890  __vstore_pred(pred1, p1, v08);
891  }
892  }
893 
894  __SE0_CLOSE();
895  __SA0_CLOSE();
896  __SA2_CLOSE();
897  __SA3_CLOSE();
898  /* if (se1ICNT1 > 0) */ {
899  __SE1_CLOSE();
900  __SA1_CLOSE();
901  }
902  }
903  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
904 }
905 template void DSPLIB_bidiag_uFinal_ci<float>(float *U,
906  int32_t Nrows,
907  int32_t Ncols,
908  int32_t colUStride,
909  float s,
910  float *U1,
911  uint8_t *pBlock);
912 template void DSPLIB_bidiag_uFinal_ci<double>(double *U,
913  int32_t Nrows,
914  int32_t Ncols,
915  int32_t colUStride,
916  double s,
917  double *U1,
918  uint8_t *pBlock);
919 
920 /* ======================================================================== */
921 /* End of file: DSPLIB_svd_bidiag_uFinal_ci.cpp */
922 /* ======================================================================== */
void DSPLIB_bidiag_uFinal_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, dataType s, dataType *U1, uint8_t *pBlock)
This function implements the process corresponding to the "update U" loop in natural implementation.
void DSPLIB_bidiag_uFinal_initalize_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, dataType s, dataType *U1, uint8_t *pBlock)
This function implements the process corresponding to the "initial U" loop in natural implementation.
void DSPLIB_bidiag_uFinal_init_ci(DSPLIB_kernelHandle handle)
template void DSPLIB_bidiag_uFinal_expand_ci< double >(double *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, uint8_t *pBlock)
template void DSPLIB_bidiag_uFinal_expand_ci< float >(float *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, uint8_t *pBlock)
void DSPLIB_bidiag_uFinal_normalize_ci(dataType *U, int32_t Nrows, dataType s, int32_t colUStride, uint8_t *pBlock)
This function normalizes the column of input matrix U.
void DSPLIB_bidiag_uFinal_expand_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, uint8_t *pBlock)
This function expands columns of U matrix to get a square matrix and fill the columns with '0' values...
template void DSPLIB_bidiag_uFinal_init_ci< float >(DSPLIB_kernelHandle handle)
template void DSPLIB_bidiag_uFinal_ci< double >(double *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, double s, double *U1, uint8_t *pBlock)
template void DSPLIB_bidiag_uFinal_init_ci< double >(DSPLIB_kernelHandle handle)
template void DSPLIB_bidiag_uFinal_initalize_ci< float >(float *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, float s, float *U1, uint8_t *pBlock)
template void DSPLIB_bidiag_uFinal_ci< float >(float *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, float s, float *U1, uint8_t *pBlock)
template void DSPLIB_bidiag_uFinal_normalize_ci< double >(double *U, int32_t Nrows, double s, int32_t colUStride, uint8_t *pBlock)
template void DSPLIB_bidiag_uFinal_initalize_ci< double >(double *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, double s, double *U1, uint8_t *pBlock)
template void DSPLIB_bidiag_uFinal_normalize_ci< float >(float *U, int32_t Nrows, float s, int32_t colUStride, uint8_t *pBlock)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
dataType getRecip(dataType value)
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
Structure that is reserved for internal use by the kernel.
uint32_t strideU
Stride between rows of U matrix
uint8_t bufPblock[DSPLIB_SVD_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters