DSPLIB User Guide
DSPLIB_svd_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date Aug 2023 Author: Asheesh Bhardwaj
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "DSPLIB_svd_priv.h"
48 
49 /* *****************************************************************************
50  *
51  * DEFINES
52  *
53  ***************************************************************************** */
54 
55 #define MAX_ITERATION_COUNT 30
56 
57 /* *****************************************************************************
58  *
59  * INITIALIZATION
60  *
61  ***************************************************************************** */
62 
63 template <typename dataType>
65  const DSPLIB_bufParams2D_t *bufParamsIn,
66  const DSPLIB_bufParams2D_t *bufParamsU,
67  const DSPLIB_bufParams2D_t *bufParamsV,
68  const DSPLIB_bufParams1D_t *bufParamsDiag,
69  const DSPLIB_bufParams1D_t *bufParamsSuperDiag,
70  const DSPLIB_svdInitArgs *pKerInitArgs)
71 {
72  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
73 
74  DSPLIB_bidiag_u_init_ci<dataType>(handle);
75  DSPLIB_bidiag_v_init_ci<dataType>(handle);
76 
77  DSPLIB_bidiag_uFinal_init_ci<dataType>(handle);
78 
79  DSPLIB_diag_proc_init_ci<dataType>(handle);
80 
81  DSPLIB_singularSort_swap_init_ci<dataType>(handle);
82 
83  DSPLIB_svd_matTrans_init_ci<dataType>(handle, pKerInitArgs);
84 
85  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
86  return DSPLIB_SUCCESS;
87 }
88 
90  const DSPLIB_bufParams2D_t *bufParamsIn,
91  const DSPLIB_bufParams2D_t *bufParamsU,
92  const DSPLIB_bufParams2D_t *bufParamsV,
93  const DSPLIB_bufParams1D_t *bufParamsDiag,
94  const DSPLIB_bufParams1D_t *bufParamsSuperDiag,
95  const DSPLIB_svdInitArgs *pKerInitArgs);
96 
98  const DSPLIB_bufParams2D_t *bufParamsIn,
99  const DSPLIB_bufParams2D_t *bufParamsU,
100  const DSPLIB_bufParams2D_t *bufParamsV,
101  const DSPLIB_bufParams1D_t *bufParamsDiag,
102  const DSPLIB_bufParams1D_t *bufParamsSuperDiag,
103  const DSPLIB_svdInitArgs *pKerInitArgs);
104 
105 /* *****************************************************************************
106  *
107  * IMPLEMENTATION
108  *
109  ***************************************************************************** */
110 
111 template <typename dataType>
113  const int Ncols,
114  dataType *U,
115  dataType *V,
116  dataType *diag,
117  dataType *superdiag,
118  const int colUStride,
119  const int colVStride,
120  uint32_t enableReducedForm,
121  dataType *U1,
122  uint8_t *pBlock)
123 {
124  int i;
125  dataType s, scale, half_norm_squared;
126 
127  /* Householder processing */
128  s = 0;
129  scale = 0;
130  for (i = 0; i < Ncols; i++) {
131  superdiag[i] = scale * s;
132  /* process columns */
133  scale = DSPLIB_bidiag_uCol_halfnorm_ci<dataType>(&U[i + i * colUStride], Nrows - i, Ncols, colUStride,
134  &half_norm_squared, U1, &s, pBlock);
135  U[i + i * colUStride] = U[i + i * colUStride] - (s * scale);
136  if (Ncols - (i + 1) > 0 && scale > 0) {
137  DSPLIB_bidiag_uCol_ci<dataType>(&U[i + i * colUStride], Nrows - i, Ncols - (i + 1), colUStride,
138  half_norm_squared, U1, scale, pBlock);
139  }
140  diag[i] = s * scale;
141  /* process rows */
142  s = 0;
143  scale = 0;
144  if ((i != Ncols - 1)) {
145  scale = DSPLIB_bidiag_uRow_halfnorm_ci<dataType>(&U[(i + 1) + (i * colUStride)], Nrows - i, Ncols - (i + 1),
146  colUStride, &half_norm_squared, U1, &s, &superdiag[i + 1],
147  pBlock);
148  if ((scale > 0)) {
149  DSPLIB_bidiag_uRow_ci<dataType>(&U[(i + 1) + (i * colUStride)], Nrows - (i + 1), Ncols - (i + 1),
150  colUStride, &superdiag[i + 1], U1, scale, pBlock);
151  }
152  } /* if ((i<Nrows)&&(i!=Ncols-1)) */
153  } /* for (i=0;i<Ncols;i++) */
154 
155  /* update V */
156  V[(Ncols - 1) + (Ncols - 1) * colVStride] = 1;
157  s = superdiag[Ncols - 1];
158  for (i = Ncols - 2; i >= 0; i--) {
159  DSPLIB_bidiag_v_ci<dataType>(&V[i + (i) *colVStride], Ncols - i - 1, Ncols - i, colVStride, s, U1,
160  &U[(i) + i * colUStride], colUStride, pBlock);
161  s = superdiag[i];
162  } /* for (i=Ncols-2;i>=0;i--) */
163  if (enableReducedForm == 0u) {
164  /* expand U to from Nrows x Ncols to */
165  /* Nrows x Nrows */
166  if (Nrows > Ncols) {
167  DSPLIB_bidiag_uFinal_expand_ci<dataType>(U, Nrows, Ncols, colUStride, pBlock);
168  }
169  {
170  i = Ncols - 1;
171  s = diag[i];
172  if (Nrows - (i + 1) > 0) {
173 
174  DSPLIB_bidiag_uFinal_initalize_ci(&U[i + i * colUStride], Nrows - i, Nrows - (i + 1), colUStride, s, U1,
175  pBlock);
176  }
177  DSPLIB_bidiag_uFinal_normalize_ci(&U[i + i * colUStride], Nrows - i, s, colUStride, pBlock);
178  U[i + i * colUStride] += 1;
179  }
180 
181  /* update U */
182  for (i = Ncols - 2; i >= 0; i--) {
183  s = diag[i];
184  DSPLIB_bidiag_uFinal_ci(&U[i + i * colUStride], Nrows - i, Nrows - (i + 1), colUStride, s, U1, pBlock);
185  DSPLIB_bidiag_uFinal_normalize_ci(&U[i + i * colUStride], Nrows - i, s, colUStride, pBlock);
186  U[i + i * colUStride] += 1;
187  } /* for (i=Ncols-1;i>=0;i--) */
188  } /* if (!enableReducedForm) */
189 
190  else /* if (enableReducedForm) */
191  {
192  /* update U */
193  for (i = Ncols - 1; i >= 0; i--) {
194  s = diag[i];
195  if (i != Ncols - 1) {
196  DSPLIB_bidiag_uFinal_ci(&U[i + i * colUStride], Nrows - i, Ncols - (i + 1), colUStride, s, U1, pBlock);
197  }
198  DSPLIB_bidiag_uFinal_normalize_ci(&U[i + i * colUStride], Nrows - i, s, colUStride, pBlock);
199 
200  U[i + i * colUStride] += 1;
201  } /* for (i=Ncols-1;i>=0;i--) */
202  } /* if (enableReducedForm) */
203  return 0;
204 }
205 template int DSPLIB_svd_convert_to_bidiag_ci<float>(const int Nrows,
206  const int Ncols,
207  float *U,
208  float *V,
209  float *diag,
210  float *superdiag,
211  const int colUStride,
212  const int colVStride,
213  uint32_t enableReducedForm,
214  float *U1,
215  uint8_t *pBlock);
216 template int DSPLIB_svd_convert_to_bidiag_ci<double>(const int Nrows,
217  const int Ncols,
218  double *U,
219  double *V,
220  double *diag,
221  double *superdiag,
222  const int colUStride,
223  const int colVStride,
224  uint32_t enableReducedForm,
225  double *U1,
226  uint8_t *pBlock);
227 
228 template <typename dataType> static inline dataType getSqrt(dataType a)
229 {
230  const dataType Half = 0.5f;
231  const dataType OneP5 = 1.5f;
232  dataType x, y;
233  // int i;
234 
235  x = __recip_sqrt(a); /* compute square root reciprocal */
236 
237  // #pragma UNROLL(1) /* PRAGMA: do not unroll this loop */
238  // for (i = 0; i < 2; i++)
239  {
240  x = x * (OneP5 - (a * x * x * Half));
241  x = x * (OneP5 - (a * x * x * Half));
242  }
243  y = a * x;
244 
245 
246  return (y);
247 }
248 template float getSqrt<float>(float a);
249 template double getSqrt<double>(double a);
250 
251 template <typename dataType> static inline dataType getRecipSqrt(dataType a)
252 {
253  const dataType Half = 0.5f;
254  const dataType OneP5 = 1.5f;
255  dataType x;
256  // int i;
257 
258  x = __recip_sqrt(a); // compute square root reciprocal
259 
260  // PRAGMA: do not unroll this loop
261  // #pragma UNROLL(1)
262  // for (i = 0; i < 2; i++)
263  {
264  x = x * (OneP5 - (a * x * x * Half));
265  x = x * (OneP5 - (a * x * x * Half));
266  }
267 
268  return x;
269 }
270 template float getRecipSqrt<float>(float a);
271 template double getRecipSqrt<double>(double a);
272 
273 template <typename dataType>
274 int DSPLIB_svd_bidiag_to_diag_ci(const int Nrows,
275  const int Ncols,
276  dataType *U,
277  dataType *V,
278  dataType *diag,
279  dataType *superdiag,
280  dataType *pTemp,
281  const int colUStride,
282  const int rowUStride,
283  const int colVStride,
284  const int rowVStride,
285  uint32_t enableReducedForm,
286  uint8_t *pBlock)
287 {
288  int i, k, rotation_test, iter, total_iter;
289  int m = 0;
290  dataType x, y, z, epsilon;
291  dataType c, s, f, g, h;
292  dataType *cU = &pTemp[0 * Ncols];
293  dataType *sU = &pTemp[1 * Ncols];
294  dataType *cV = &pTemp[2 * Ncols];
295  dataType *sV = &pTemp[3 * Ncols];
296  iter = 0;
297  total_iter = 0;
298  /* ------------------------------------------------------------------- */
299  /* find max in col */
300  /* ------------------------------------------------------------------- */
301 
302  DSPLIB_diag_epsilon_ci(diag, superdiag, &epsilon, Ncols, pBlock);
303 
304  for (k = Ncols - 1; k >= 0; k--) {
305  total_iter += iter;
306  iter = 0;
307  while (true) {
308  rotation_test = 1;
309  DSPLIB_diag_rotation_check_ci(&diag[k - 1], &superdiag[k], epsilon, &m, &rotation_test, (k + 1), pBlock);
310 
311  if (rotation_test) {
312  int32_t loopCnt = 0;
313  c = 0;
314  s = 1;
315  for (i = m; i <= k; i++) {
316  f = s * superdiag[i];
317  superdiag[i] = c * superdiag[i];
318 #if !defined(ENABLE_LDRA_COVERAGE)
319 /* This part of code checks for "test f convergence" part condition
320  Ref. Singular Value Decomposition and Least Squares Solutions. G. H. Golub et al
321  We use these conditions in order to calculate correct results if and when they occur */
322  if (fabs(f) <= epsilon) {
323  break;
324  }
325 #endif
326  g = diag[i];
327  h = getSqrt(f * f + g * g);
328  diag[i] = h;
329  dataType recipH = getRecip(h);
330  c = g * recipH;
331  s = -f * recipH;
332  cU[loopCnt] = c;
333  sU[loopCnt] = s;
334  loopCnt++;
335  }
336 
337 #if ORIGINAL_IMPL
338  for (i = 0; i <= loopCnt - 1; i++) {
339  for (row = 0; row < Nrows; row++) {
340  y = U[(m - 1) * rowUStride + row];
341  z = U[(i + m) * rowUStride + row];
342  U[(m - 1) * rowUStride + row] = y * cU[i] + z * sU[i];
343  U[(i + m) * rowUStride + row] = -y * sU[i] + z * cU[i];
344  }
345  } /* for (i=m;i<=k;i++) */
346 #else
347  DSPLIB_diag_rotation_proc_ci<dataType>(&U[(m - 1) * rowUStride], m, loopCnt, Nrows, rowUStride, cU, sU,
348  pBlock);
349 #endif
350  } /* if (rotation_test) */
351 
352  z = diag[k];
353  if (m == k) { /* "test f convergence" part */
354  if (z < 0) {
355  diag[k] = -z;
356  DSPLIB_diag_negate_v_ci(&V[k * rowVStride], Ncols, rowVStride, pBlock);
357  } /* if (z>0) */
358  break;
359  } /* if (m==k) */
360  else {
361 #if !defined(ENABLE_LDRA_COVERAGE)
362 /* This part of code retricts the count of "test f splitting" part
363  Ref. Singular Value Decomposition and Least Squares Solutions. G. H. Golub et al
364  We use these conditions in order to break the while loop to avoid infinite loop */
365  if (iter >= MAX_ITERATION_COUNT) {
366  total_iter = -1;
367  break;
368  }
369 #endif
370  iter++;
371  x = diag[m];
372  y = diag[k - 1];
373  g = superdiag[k - 1];
374  h = superdiag[k];
375  f = ((y - z) * (y + z) + (g - h) * (g + h)) * getRecip((2 * h * y));
376  g = getSqrt(f * f + 1);
377  if (f < 0) {
378  g = -g;
379  }
380  f = ((x - z) * (x + z) + h * (y *getRecip((f + g)) - h)) * getRecip(x);
381 
382  /* next QR transformation */
383  c = 1;
384  s = 1;
385 
386  uint32_t loopCnt = (k - m);
387  i = m + 1;
388  uint32_t cnt = 0;
389  dataType c1, s1, c2, s2, c3, s3, recipz;
390  for (; cnt < loopCnt - 1; cnt = cnt + 2) {
391  h = s * superdiag[i];
392  g = superdiag[i] * c;
393  z = (f * f + h * h);
394  superdiag[i - 1] = z;
395 
396  recipz = getRecipSqrt(z);
397 
398  /* 21 cycles*/
399  c1 = f * recipz;
400  s1 = h * recipz;
401  f = x * c1 + g * s1;
402  g = -x * s1 + g * c1;
403  h = diag[i] * s1;
404  y = c1 * diag[i];
405  z = (f * f + h * h);
406  diag[i - 1] = z;
407 
408  c3 = c1;
409  s3 = s1;
410  cV[cnt] = c1;
411  sV[cnt] = s1;
412 #if !defined(ENABLE_LDRA_COVERAGE)
413 /* This part of code test value of z in order to avoid the "inf" result */
414  if (z != 0) {
415  recipz = getRecipSqrt(z);
416  c3 = f * recipz;
417  s3 = h * recipz;
418  }
419 #else
420  recipz = getRecipSqrt(z);
421  c3 = f * recipz;
422  s3 = h * recipz;
423 #endif
424  /* 22 cycle */
425  f = c3 * g + s3 * y;
426  x = -s3 * g + c3 * y;
427  h = s3 * superdiag[i + 1];
428  g = superdiag[i + 1] * c3;
429  z = (f * f + h * h);
430  superdiag[i] = z;
431  cU[cnt] = c3;
432  sU[cnt] = s3;
433  recipz = getRecipSqrt(z);
434 
435  /* 21 cycle*/
436  c2 = f * recipz;
437  s2 = h * recipz;
438  f = x * c2 + g * s2;
439  g = -x * s2 + g * c2;
440  h = diag[i + 1] * s2;
441  y = c2 * diag[i + 1];
442 
443  z = (f * f + h * h);
444  diag[i] = z;
445  c = c2;
446  s = s2;
447  cV[cnt + 1] = c2;
448  sV[cnt + 1] = s2;
449 #if !defined(ENABLE_LDRA_COVERAGE)
450 /* This part of code test value of z in order to avoid the "inf" result */
451  if (z != 0) {
452  /* 1 + 2 *19 + 19*/
453  recipz = getRecipSqrt(z);
454  c = f * recipz;
455  s = h * recipz;
456  }
457 
458 #else
459  recipz = getRecipSqrt(z);
460  c = f * recipz;
461  s = h * recipz;
462 #endif
463  f = c * g + s * y;
464  x = -s * g + c * y;
465  /* 11 cycle */
466  cU[cnt + 1] = c;
467  sU[cnt + 1] = s;
468  i = i + 2;
469  } /* loopCnt */
470  if (cnt < loopCnt) {
471  g = superdiag[i];
472  y = diag[i];
473  h = s * g;
474  g = g * c;
475  z = (f * f + h * h);
476  recipz = getRecipSqrt(z);
477  superdiag[i - 1] = z;
478  c = f * recipz;
479  s = h * recipz;
480  f = x * c + g * s;
481  g = -x * s + g * c;
482  h = y * s;
483  y = c * y;
484  cV[cnt] = c;
485  sV[cnt] = s;
486  z = (f * f + h * h);
487  recipz = getRecipSqrt(z);
488  diag[i - 1] = z;
489 #if !defined(ENABLE_LDRA_COVERAGE)
490 /* This part of code test value of z in order to avoid the "inf" result */
491  if (z != 0) {
492  c = f * recipz;
493  s = h * recipz;
494  }
495 #else
496  c = f * recipz;
497  s = h * recipz;
498 #endif
499  cU[cnt] = c;
500  sU[cnt] = s;
501  f = c * g + s * y;
502  x = -s * g + c * y;
503  }
504  DSPLIB_diag_sqrt_ci<dataType>(&superdiag[m], &diag[m], (k - m), pBlock);
505  DSPLIB_diag_proc_ci(V, m, loopCnt + 1, Ncols, rowVStride, cV, sV, pBlock);
506  DSPLIB_diag_proc_ci(U, m, loopCnt + 1, Nrows, rowUStride, cU, sU, pBlock);
507  superdiag[m] = 0;
508  superdiag[k] = f;
509  diag[k] = x;
510  } /* if (m==k) */
511  } /* while (1==1) */
512  } /* for (k=Ncols-1:k>=0;k--) */
513 
514  return total_iter;
515 }
516 template int DSPLIB_svd_bidiag_to_diag_ci<float>(const int Nrows,
517  const int Ncols,
518  float *U,
519  float *V,
520  float *diag,
521  float *superdiag,
522  float *pTemp,
523  const int colUStride,
524  const int rowUStride,
525  const int colVStride,
526  const int rowVStride,
527  uint32_t enableReducedForm,
528  uint8_t *pBlock);
529 template int DSPLIB_svd_bidiag_to_diag_ci<double>(const int Nrows,
530  const int Ncols,
531  double *U,
532  double *V,
533  double *diag,
534  double *superdiag,
535  double *pTemp,
536  const int colUStride,
537  const int rowUStride,
538  const int colVStride,
539  const int rowVStride,
540  uint32_t enableReducedForm,
541  uint8_t *pBlock);
542 
543 template <typename dataType>
545  const int Ncols,
546  dataType *U,
547  dataType *U1,
548  dataType *V,
549  dataType *V1,
550  dataType *singular_values,
551  dataType *pScratch,
552  const int colUStride,
553  const int rowUStride,
554  const int colVStride,
555  const int rowVStride,
556  uint32_t enableReducedForm,
557  uint8_t *pBlock)
558 {
559  int32_t *maxIndArr = (int32_t *) &pScratch[1 * Ncols];
560  dataType *sortedSingular = &pScratch[2 * Ncols];
561  /* -----------------------------------------------------------------------------------
562  Sort the singular values in descending order and change the order of column vectors
563  in U and V matrix. Here the input U and V matrix are in transposed for therefore
564  rows vectors are ordered and the after the function call matrices are transposed back
565  --------------------------------------------------------------------------------------*/
566 
567  DSPLIB_svd_blk_move_ci<dataType>(sortedSingular, singular_values, 1, Ncols, 0, 0, pBlock);
568  /* Sort singular values in descending order and store the indices in maxIndArr buffer */
569  DSPLIB_singularSort_index_ci(singular_values, sortedSingular, maxIndArr, Ncols, pBlock);
570 
571  /* Use the indices stored in maxIndArr buffer to reorder the rows of U' and V' and store in
572  U1 and V1 buffer respectively*/
573  DSPLIB_singularSort_swap_ci(U, Ncols, Nrows, rowUStride, maxIndArr, U1, pBlock);
574  DSPLIB_singularSort_swap_ci(V, Ncols, Ncols, rowVStride, maxIndArr, V1, pBlock);
575 
576  /* Copy back reordered matrices from U1 and V1 to U and V respectively */
577  DSPLIB_svd_blk_move_ci<dataType>(U, U1, Ncols, Nrows, colUStride, rowUStride, pBlock);
578  DSPLIB_svd_blk_move_ci<dataType>(V, V1, Ncols, Ncols, colVStride, rowVStride, pBlock);
579 
580  return 0;
581 }
582 template int DSPLIB_svd_sort_singular_values_ci<float>(const int Nrows,
583  const int Ncols,
584  float *U,
585  float *U1,
586  float *V,
587  float *V1,
588  float *singular_values,
589  float *pScratch,
590  const int colUStride,
591  const int rowUStride,
592  const int colVStride,
593  const int rowVStride,
594  uint32_t enableReducedForm,
595  uint8_t *pBlock);
596 template int DSPLIB_svd_sort_singular_values_ci<double>(const int Nrows,
597  const int Ncols,
598  double *U,
599  double *U1,
600  double *V,
601  double *V1,
602  double *singular_values,
603  double *pScratch,
604  const int colUStride,
605  const int rowUStride,
606  const int colVStride,
607  const int rowVStride,
608  uint32_t enableReducedForm,
609  uint8_t *pBlock);
610 
611 template <typename dataType>
613  void *restrict pA,
614  void *restrict pU,
615  void *restrict pV,
616  void *restrict pDiag,
617  void *restrict pSuperDiag,
618  void *restrict pU1,
619  void *restrict pV1,
620  void *restrict pScratch)
621 {
622  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
623 
624  DSPLIB_STATUS status = DSPLIB_SUCCESS;
625 
626  DSPLIB_svd_PrivArgs *pKerPrivArgs = (DSPLIB_svd_PrivArgs *) handle;
627  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs1 = &pKerPrivArgs->pMatTransKerPrivArgs1;
628  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs2 = &pKerPrivArgs->pMatTransKerPrivArgs2;
629  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs3 = &pKerPrivArgs->pMatTransKerPrivArgs3;
630  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs4 = &pKerPrivArgs->pMatTransKerPrivArgs4;
631  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs5 = &pKerPrivArgs->pMatTransKerPrivArgs5;
632 
633  /* Typecast void pointers to respective data type */
634  dataType *pALocal = (dataType *) pA;
635  dataType *pULocal = (dataType *) pU;
636  dataType *pVLocal = (dataType *) pV;
637  dataType *pDiagLocal = (dataType *) pDiag;
638  dataType *pSuperDiagLocal = (dataType *) pSuperDiag;
639  dataType *pU1Local = (dataType *) pU1;
640  dataType *pV1Local = (dataType *) pV1;
641  dataType *pScratchLocal = (dataType *) pScratch;
642 
643  DSPLIB_DEBUGPRINTFN(0, "pALocal: %p pOutLocal: %p\n", pALocal, pULocal);
644 
645  uint8_t *pBlock = pKerPrivArgs->bufPblock;
646  uint32_t Nrows = pKerPrivArgs->heightIn;
647  uint32_t Ncols = pKerPrivArgs->widthIn;
648  int32_t strideIn = pKerPrivArgs->strideIn;
649  int32_t strideU = pKerPrivArgs->strideU;
650  int32_t strideURows = pKerPrivArgs->strideURows;
651  int32_t strideV = pKerPrivArgs->strideV;
652  int32_t strideVRows = pKerPrivArgs->strideVRows;
653  uint32_t enableReducedForm = pKerPrivArgs->enableReducedForm;
654 
655  int Nrows1 = 0;
656  int Ncols1 = 0;
657 
658  /* ------------------------------------------------------------------- */
659  /* copy A matrix to U */
660  /* ------------------------------------------------------------------- */
661 
662  if (Nrows >= Ncols) {
663  Nrows1 = Nrows;
664  Ncols1 = Ncols;
665  }
666  else {
667  Nrows1 = Ncols;
668  Ncols1 = Nrows;
669  }
670 
671  int32_t dataSize = sizeof(dataType);
672  int32_t colUStride = strideU / dataSize;
673  int32_t rowUStride = strideURows / dataSize;
674  int32_t colVStride = strideV / dataSize;
675  int32_t rowVStride = strideVRows / dataSize;
676  int32_t colAStride = strideIn / dataSize;
677  if (Nrows >= Ncols) {
678  /* Copy A to U */
679  DSPLIB_svd_blk_move_ci<dataType>(pULocal, pALocal, Nrows1, Ncols1, colUStride, colAStride, pBlock);
680  }
681  else {
682  /* Copy A' to U */
683  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs1, pALocal, pULocal);
684  }
685 
686  /* ------------------------------------------------------------------- */
687  /* convert A to bidiagonal matrix using Householder reflections */
688  /* ------------------------------------------------------------------- */
689  DSPLIB_svd_convert_to_bidiag_ci<dataType>(Nrows1, Ncols1, pULocal, pV1Local, pDiagLocal, pSuperDiagLocal, colUStride,
690  colVStride, enableReducedForm, pScratchLocal, pBlock);
691  /* ------------------------------------------------------------------- */
692  /* convert bidiagonal to diagonal using Givens rotations */
693  /* ------------------------------------------------------------------- */
694 
695  if (enableReducedForm == 0u) {
696  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs2, pULocal, pU1Local);
697  }
698  else {
699  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs3, pULocal, pU1Local);
700  }
701 
702 #if !defined(ENABLE_LDRA_COVERAGE)
703  int svd_status = DSPLIB_svd_bidiag_to_diag_ci<dataType>(Nrows1, Ncols1, pU1Local, pV1Local, pDiagLocal, pSuperDiagLocal,
704  pScratchLocal, colUStride, rowUStride, colVStride, rowVStride,
705  enableReducedForm, pBlock);
706 #else
707  DSPLIB_svd_bidiag_to_diag_ci<dataType>(Nrows1, Ncols1, pU1Local, pV1Local, pDiagLocal, pSuperDiagLocal,
708  pScratchLocal, colUStride, rowUStride, colVStride, rowVStride,
709  enableReducedForm, pBlock);
710 #endif
711 
712  /* ------------------------------------------------------------------- */
713  /* sort singular values in descending order */
714  /* ------------------------------------------------------------------- */
715  DSPLIB_svd_sort_singular_values_ci<dataType>(Nrows1, Ncols1, pU1Local, pULocal, pV1Local, pVLocal, pDiagLocal,
716  pScratchLocal, colUStride, rowUStride, colVStride, rowVStride,
717  enableReducedForm, pBlock);
718 
719  if (enableReducedForm == 0u) {
720  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs2, pU1Local, pULocal);
721  }
722  else {
723  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs4, pU1Local, pULocal);
724  }
725  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs5, pV1Local, pVLocal);
726 
727  /* ------------------------------------------------------------------- */
728  /* switch U and V */
729  /* ------------------------------------------------------------------- */
730  if (Ncols > Nrows) {
731  if (enableReducedForm == 0u) {
732  DSPLIB_svd_blk_move_ci<dataType>(pU1Local, pVLocal, Nrows, Nrows, rowUStride, colVStride, pBlock);
733  DSPLIB_svd_blk_move_ci<dataType>(pVLocal, pULocal, Ncols, Ncols, colVStride, colUStride, pBlock);
734  DSPLIB_svd_blk_move_ci<dataType>(pULocal, pU1Local, Nrows, Nrows, colUStride, rowUStride, pBlock);
735  }
736  else {
737  DSPLIB_svd_blk_move_ci<dataType>(pU1Local, pVLocal, Nrows, Nrows, rowUStride, colVStride, pBlock);
738  DSPLIB_svd_blk_move_ci<dataType>(pVLocal, pULocal, Ncols, Nrows, colVStride, colUStride, pBlock);
739  DSPLIB_svd_blk_move_ci<dataType>(pULocal, pU1Local, Nrows, Nrows, colUStride, rowUStride, pBlock);
740  }
741  }
742 
743 #if !defined(ENABLE_LDRA_COVERAGE)
744  if(svd_status < 0){
745  status = DSPLIB_ERR_FAILURE;
746  }
747 #endif
748  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
749  return status;
750 }
751 
753  void *restrict pA,
754  void *restrict pU,
755  void *restrict pV,
756  void *restrict pDiag,
757  void *restrict pSuperDiag,
758  void *restrict pU1,
759  void *restrict pV1,
760  void *restrict pScratch);
761 
763  void *restrict pA,
764  void *restrict pU,
765  void *restrict pV,
766  void *restrict pDiag,
767  void *restrict pSuperDiag,
768  void *restrict pU1,
769  void *restrict pV1,
770  void *restrict pScratch);
771 /* ======================================================================== */
772 /* End of file: DSPLIB_svd_ci.cpp */
773 /* ======================================================================== */
void DSPLIB_bidiag_uFinal_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, dataType s, dataType *U1, uint8_t *pBlock)
This function implements the process corresponding to the "update U" loop in natural implementation.
void DSPLIB_bidiag_uFinal_initalize_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, dataType s, dataType *U1, uint8_t *pBlock)
This function implements the process corresponding to the "initial U" loop in natural implementation.
void DSPLIB_bidiag_uFinal_normalize_ci(dataType *U, int32_t Nrows, dataType s, int32_t colUStride, uint8_t *pBlock)
This function normalizes the column of input matrix U.
template int DSPLIB_svd_bidiag_to_diag_ci< float >(const int Nrows, const int Ncols, float *U, float *V, float *diag, float *superdiag, float *pTemp, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_svd_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
DSPLIB_STATUS DSPLIB_svd_exec_ci(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
This function is the main execution function for the C7x implementation of the kernel....
template int DSPLIB_svd_bidiag_to_diag_ci< double >(const int Nrows, const int Ncols, double *U, double *V, double *diag, double *superdiag, double *pTemp, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template float getSqrt< float >(float a)
template int DSPLIB_svd_sort_singular_values_ci< double >(const int Nrows, const int Ncols, double *U, double *U1, double *V, double *V1, double *singular_values, double *pScratch, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template int DSPLIB_svd_convert_to_bidiag_ci< float >(const int Nrows, const int Ncols, float *U, float *V, float *diag, float *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm, float *U1, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_svd_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svdInitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_svd_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
static dataType getRecipSqrt(dataType a)
static dataType getSqrt(dataType a)
int DSPLIB_svd_sort_singular_values_ci(const int Nrows, const int Ncols, dataType *U, dataType *U1, dataType *V, dataType *V1, dataType *singular_values, dataType *pScratch, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template int DSPLIB_svd_sort_singular_values_ci< float >(const int Nrows, const int Ncols, float *U, float *U1, float *V, float *V1, float *singular_values, float *pScratch, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template double getRecipSqrt< double >(double a)
int DSPLIB_svd_bidiag_to_diag_ci(const int Nrows, const int Ncols, dataType *U, dataType *V, dataType *diag, dataType *superdiag, dataType *pTemp, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template int DSPLIB_svd_convert_to_bidiag_ci< double >(const int Nrows, const int Ncols, double *U, double *V, double *diag, double *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm, double *U1, uint8_t *pBlock)
int DSPLIB_svd_convert_to_bidiag_ci(const int Nrows, const int Ncols, dataType *U, dataType *V, dataType *diag, dataType *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm, dataType *U1, uint8_t *pBlock)
DSPLIB_STATUS DSPLIB_svd_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svdInitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_svd_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svdInitArgs *pKerInitArgs)
template float getRecipSqrt< float >(float a)
template double getSqrt< double >(double a)
#define MAX_ITERATION_COUNT
void DSPLIB_diag_negate_v_ci(dataType *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
Negates the values of a row in V.
void DSPLIB_diag_rotation_check_ci(dataType *diag, dataType *superdiag, dataType epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
Updates values of "m" and "rotation_test" flag vased on the values present in "diag",...
void DSPLIB_diag_epsilon_ci(dataType *diag, dataType *superdiag, dataType *epsilon, int32_t Ncols, uint8_t *pBlock)
Updates "epsilon" value based on absolute max values from "diag" and "superdiag" vectors.
void DSPLIB_diag_proc_ci(dataType *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, dataType *cV, dataType *sV, uint8_t *pBlock)
Updates rows of V' and U' based on the precalculated cV/cU and sV/sU vectors.
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
void DSPLIB_singularSort_index_ci(dataType *singular_values, dataType *singularBuffer, int32_t *maxIndArr, int32_t Ncols, uint8_t *pBlock)
This function sorts the singular values in descending order and also records the max index values for...
dataType getRecip(dataType value)
void DSPLIB_singularSort_swap_ci(dataType *V, int32_t Nrows, int32_t Ncols, int32_t rowVStride, int32_t *sortIndex, dataType *vBuff, uint8_t *pBlock)
This function uses the max index values calculated from DSPLIB_singularSort_index_ci to shuffle the r...
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
@ DSPLIB_ERR_FAILURE
Definition: DSPLIB_types.h:153
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
Structure that is reserved for internal use by the kernel.
Structure containing the parameters to initialize the kernel.
Definition: DSPLIB_svd.h:129
Structure that is reserved for internal use by the kernel.
uint32_t widthIn
Size of input buffer for different batches DSPLIB_svd_init that will be retrieved and used by DSPLIB_...
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs5
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs2
uint32_t strideU
Stride between rows of U matrix
uint8_t bufPblock[DSPLIB_SVD_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs4
uint32_t enableReducedForm
Flag for enabling the calculation of reduced form enableReducedForm = 1 for reduced form SVD calc ena...
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs1
Privargs for the matTrans kernel.
int32_t strideIn
Stride between rows of input data matrix
uint32_t strideV
Stride between rows of V matrix
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs3
uint32_t heightIn
Height of input data matrix