DSPLIB User Guide
DSPLIB_svd_cn.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date Aug 2023 Author: Asheesh Bhardwaj
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "DSPLIB_svd_priv.h"
48 
49 /*******************************************************************************
50  *
51  * DEFINES
52  *
53  ******************************************************************************/
54 #define MAX_ITERATION_COUNT 30
55 
56 /* *****************************************************************************
57  *
58  * INITIALIZATION
59  *
60  ***************************************************************************** */
61 
63  const DSPLIB_bufParams2D_t *bufParamsIn,
64  const DSPLIB_bufParams2D_t *bufParamsU,
65  const DSPLIB_bufParams2D_t *bufParamsV,
66  const DSPLIB_bufParams1D_t *bufParamsDiag,
67  const DSPLIB_bufParams1D_t *bufParamsSuperDiag,
68  const DSPLIB_svdInitArgs *pKerInitArgs)
69 {
70  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
71 
72  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
73  return DSPLIB_SUCCESS;
74 }
75 
76 /* *****************************************************************************
77  *
78  * IMPLEMENTATION
79  *
80  ***************************************************************************** */
81 template <typename dataType>
82 int DSPF_sp_convert_to_bidiag_cn(const int Nrows,
83  const int Ncols,
84  dataType *U,
85  dataType *V,
86  dataType *diag,
87  dataType *superdiag,
88  const int colUStride,
89  const int colVStride,
90  uint32_t enableReducedForm)
91 {
92  int i, j, k;
93  dataType s, s2, si, scale, half_norm_squared;
94 
95  /* Householder processing */
96  s = 0;
97  scale = 0;
98 
99  for (i = 0; i < Ncols; i++) {
100  superdiag[i] = scale * s;
101  /* process columns */
102  scale = 0;
103 
104  for (j = i; j < Nrows; j++) {
105  scale += fabs(U[i + j * colUStride]);
106  }
107 
108  if (scale > 0) {
109  s2 = 0;
110  for (j = i; j < Nrows; j++) {
111  U[i + j * colUStride] = U[i + j * colUStride] / scale;
112  s2 += U[i + j * colUStride] * U[i + j * colUStride];
113  }
114  if (U[i + i * colUStride] < 0) {
115  s = sqrt(s2);
116  }
117  else {
118  s = -sqrt(s2);
119  }
120  half_norm_squared = U[i + i * colUStride] * s - s2;
121  U[i + i * colUStride] -= s;
122 
123  for (j = i + 1; j < Ncols; j++) {
124 
125  si = 0;
126  for (k = i; k < Nrows; k++) {
127  si += U[i + k * colUStride] * U[j + k * colUStride];
128  }
129  si = si / half_norm_squared;
130  for (k = i; k < Nrows; k++) {
131  U[j + k * colUStride] += si * U[i + k * colUStride];
132  }
133  }
134  } /* if (scale>0) */
135 
136  for (j = i; j < Nrows; j++) {
137  U[i + j * colUStride] *= scale;
138  }
139  diag[i] = s * scale;
140  /* process rows */
141  s = 0;
142  scale = 0;
143 
144  if ((i != Ncols - 1)) {
145  for (j = i + 1; j < Ncols; j++) {
146  scale += fabs(U[j + i * colUStride]);
147  }
148 
149  if (scale > 0) {
150  s2 = 0;
151  for (j = i + 1; j < Ncols; j++) {
152  U[j + i * colUStride] = U[j + i * colUStride] / scale;
153  s2 += U[j + i * colUStride] * U[j + i * colUStride];
154  }
155  j--;
156  if (U[j + i * colUStride] < 0) {
157  s = sqrt(s2);
158  }
159  else {
160  s = -sqrt(s2);
161  }
162  half_norm_squared = U[i + 1 + i * colUStride] * s - s2;
163  U[i + 1 + i * colUStride] -= s;
164 
165  for (k = i + 1; k < Ncols; k++) {
166  superdiag[k] = U[k + i * colUStride] / half_norm_squared;
167  }
168  /* if (i < Nrows - 1) */
169  {
170  for (j = i + 1; j < Nrows; j++) {
171  si = 0;
172  for (k = i + 1; k < Ncols; k++) {
173  si += U[k + i * colUStride] * U[k + j * colUStride];
174  }
175  for (k = i + 1; k < Ncols; k++) {
176  U[k + j * colUStride] += si * superdiag[k];
177  }
178  }
179  }
180  } /* if (scale>0) */
181 
182  for (k = i + 1; k < Ncols; k++) {
183  U[k + i * colUStride] *= scale;
184  }
185  } /* if ((i<Nrows)&&(i!=Ncols-1)) */
186 
187  } /* for (i=0;i<Ncols;i++) */
188 
189  /* update V */
190  V[(Ncols - 1) + (Ncols - 1) * colVStride] = 1;
191  s = superdiag[Ncols - 1];
192  for (i = Ncols - 2; i >= 0; i--) {
193  if (s != 0) {
194  for (j = i + 1; j < Ncols; j++) {
195  V[i + j * colVStride] = U[j + i * colUStride] / (U[i + 1 + i * colUStride] * s);
196  }
197 
198  for (j = i + 1; j < Ncols; j++) {
199  si = 0;
200  for (k = i + 1; k < Ncols; k++) {
201  si += U[k + i * colUStride] * V[j + k * colVStride];
202  }
203  for (k = i + 1; k < Ncols; k++) {
204  V[j + k * colVStride] += si * V[i + k * colVStride];
205  }
206  }
207  } /* if (s!=0) */
208 
209  for (j = i + 1; j < Ncols; j++) {
210 
211  V[j + i * colVStride] = 0;
212  V[i + j * colVStride] = 0;
213  }
214  V[i + i * colVStride] = 1;
215  s = superdiag[i];
216  } /* for (i=Ncols-2;i>=0;i--) */
217 
218  if (enableReducedForm == 0u) {
219  /* expand U to from Nrows x Ncols to */
220  /* Nrows x Nrows */
221  if (Nrows > Ncols) {
222  for (i = Nrows - 1; i >= 0; i--) {
223  for (j = Nrows - 1; j >= 0; j--) {
224  if (j <= Ncols - 1) {
225  U[j + i * colUStride] = U[j + i * colUStride];
226  }
227  else {
228  U[j + i * colUStride] = 0;
229  }
230  }
231  }
232  }
233 
234  /* update U */
235  for (i = Ncols - 1; i >= 0; i--) {
236  s = diag[i];
237  for (j = i + 1; j < Ncols; j++) {
238  U[j + i * colUStride] = 0;
239  }
240 
241  if (s != 0) {
242 
243  for (j = i + 1; j < Nrows; j++) {
244 
245  si = 0;
246  for (k = i + 1; k < Nrows; k++) {
247  si += U[i + k * colUStride] * U[j + k * colUStride];
248  }
249  si = si / (U[i + i * colUStride] * s);
250  for (k = i; k < Nrows; k++) {
251  U[j + k * colUStride] += si * U[i + k * colUStride];
252  }
253  }
254 
255  /* initial U */
256  if (i == Ncols - 1) {
257  for (j = i; j < Nrows; j++) {
258  for (k = Nrows - 1; k >= i + 1; k--) {
259  U[k + j * colUStride] =
260  U[i + j * colUStride] * U[i + k * colUStride] / (U[i + i * colUStride] * s);
261  if (j == k) {
262  U[k + j * colUStride] += 1;
263  }
264  }
265  }
266  }
267  for (j = i; j < Nrows; j++) {
268  U[i + j * colUStride] = U[i + j * colUStride] / s;
269  }
270  } /* if (s!=0) */
271  else {
272  if (i == Ncols - 1) {
273  for (k = 1; k <= Nrows - Ncols; k++) {
274  U[i + k + (i + k) * colUStride] = 1;
275  }
276  }
277  for (j = i; j < Nrows; j++) {
278  U[i + j * colUStride] = 0;
279  }
280  } /* if (s!=0) */
281  U[i + i * colUStride] += 1;
282  } /* for (i=Ncols-1;i>=0;i--) */
283  }
284 
285  else {
286  /* update U */
287  for (i = Ncols - 1; i >= 0; i--) {
288  s = diag[i];
289  for (j = i + 1; j < Ncols; j++) {
290  U[j + i * colUStride] = 0;
291  }
292  if (s != 0) {
293  for (j = i + 1; j < Ncols; j++) {
294  si = 0;
295  for (k = i + 1; k < Nrows; k++) {
296  si += U[i + k * colUStride] * U[j + k * colUStride];
297  }
298  si = si / (U[i + i * colUStride] * s);
299  for (k = i; k < Nrows; k++) {
300  U[j + k * colUStride] += si * U[i + k * colUStride];
301  }
302  }
303  for (j = i; j < Nrows; j++) {
304  U[i + j * colUStride] = U[i + j * colUStride] / s;
305  }
306  }
307  else { /* if (s!=0) */
308  for (j = i; j < Nrows; j++) {
309  U[i + j * colUStride] = 0;
310  }
311  } /* if (s!=0) */
312  U[i + i * colUStride] += 1;
313  } /* for (i=Ncols-1;i>=0;i--) */
314  }
315 
316  return 0;
317 }
318 template int DSPF_sp_convert_to_bidiag_cn<float>(const int Nrows,
319  const int Ncols,
320  float *U,
321  float *V,
322  float *diag,
323  float *superdiag,
324  const int colUStride,
325  const int colVStride,
326  uint32_t enableReducedForm);
327 template int DSPF_sp_convert_to_bidiag_cn<double>(const int Nrows,
328  const int Ncols,
329  double *U,
330  double *V,
331  double *diag,
332  double *superdiag,
333  const int colUStride,
334  const int colVStride,
335  uint32_t enableReducedForm);
336 
337 template <typename dataType>
338 int DSPF_sp_bidiag_to_diag_cn(const int Nrows,
339  const int Ncols,
340  dataType *U,
341  dataType *V,
342  dataType *diag,
343  dataType *superdiag,
344  const int colUStride,
345  const int colVStride,
346  uint32_t enableReducedForm)
347 {
348 
349  int row, i, k, m, rotation_test, iter, total_iter;
350  dataType x, y, z, epsilon;
351  dataType c, s, f, g, h;
352 
353  iter = 0;
354  total_iter = 0;
355 
356  /* ------------------------------------------------------------------- */
357  /* find max in col */
358  /* ------------------------------------------------------------------- */
359  x = 0;
360  for (i = 0; i < Ncols; i++) {
361  y = fabs(diag[i]) + fabs(superdiag[i]);
362  if (x < y) {
363  x = y;
364  }
365  }
366  if (sizeof(dataType) == 4) {
367  epsilon = FLT_EPSILON * x;
368  }
369  else {
370  epsilon = DBL_EPSILON * x;
371  }
372 
373  for (k = Ncols - 1; k >= 0; k--) {
374  total_iter += iter;
375  iter = 0;
376  while (true) {
377  rotation_test = 1;
378 
379 
380  /* Coverage issue fixes: superdiag[0] is always 0. Hence loop modified to m > 0. */
381  // for (m = k; m >= 0; m--) {
382  // if (fabs(superdiag[m]) <= epsilon) {
383  // rotation_test = 0;
384  // break;
385  // }
386  // if (fabs(diag[m - 1]) <= epsilon) {
387  // break;
388  // }
389  // } /* for (m=k;m>=0;m--) */
390 
391  for (m = k; m > 0; m--) {
392  if (fabs(superdiag[m]) <= epsilon) {
393  rotation_test = 0;
394  break; /* goto "test f convergence" part */
395  }
396  if (fabs(diag[m - 1]) <= epsilon) {
397  break; /* goto "cancellation" part */
398  }
399  } /* for (m=k;m>=0;m--) */
400 
401  if (m == 0) {
402  rotation_test = 0;
403  }
404 
405 
406  if (rotation_test) { /* "cancellation" part */
407  c = 0;
408  s = 1;
409  for (i = m; i <= k; i++) {
410  f = s * superdiag[i];
411  superdiag[i] = c * superdiag[i];
412 #if !defined(ENABLE_LDRA_COVERAGE)
413 /* This part of code checks for "test f convergence" part condition
414  Ref. Singular Value Decomposition and Least Squares Solutions. G. H. Golub et al
415  We use these conditions in order to calculate correct results if and when they occur */
416  if (fabs(f) <= epsilon) {
417  break;
418  }
419 #endif
420  g = diag[i];
421  h = sqrt(f * f + g * g);
422  diag[i] = h;
423  c = g / h;
424  s = -f / h;
425 
426  if (enableReducedForm == 0u) {
427  for (row = 0; row < Nrows; row++) {
428  y = U[m - 1 + row * colUStride];
429  z = U[i + row * colUStride];
430  U[m - 1 + row * colUStride] = y * c + z * s;
431  U[i + row * colUStride] = -y * s + z * c;
432  }
433  }
434  else {
435  for (row = 0; row < Nrows; row++) {
436  y = U[m - 1 + row * colUStride];
437  z = U[i + row * colUStride];
438  U[m - 1 + row * colUStride] = y * c + z * s;
439  U[i + row * colUStride] = -y * s + z * c;
440  }
441  }
442 
443  } /* for (i=m;i<=k;i++) */
444  } /* if (rotation_test) */
445 
446  z = diag[k];
447  if (m == k) { /* "test f convergence" part */
448  if (z < 0) {
449  diag[k] = -z;
450  for (row = 0; row < Ncols; row++) {
451  V[k + row * colVStride] = -V[k + row * colVStride];
452  }
453  } /* if (z>0) */
454  break;
455  } /* if (m==k) */
456  else {
457 #if !defined(ENABLE_LDRA_COVERAGE)
458 /* This part of code retricts the count of "test f splitting" part
459  Ref. Singular Value Decomposition and Least Squares Solutions. G. H. Golub et al
460  We use these conditions in order to break the while loop to avoid infinite loop */
461  if (iter >= MAX_ITERATION_COUNT) {
462  total_iter = -1;
463  break;
464  }
465 #endif
466  iter++;
467  x = diag[m];
468  y = diag[k - 1];
469  g = superdiag[k - 1];
470  h = superdiag[k];
471  f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2 * h * y);
472  g = sqrt(f * f + 1);
473  if (f < 0) {
474  g = -g;
475  }
476  f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x;
477  /* next QR transformation */
478  c = 1;
479  s = 1;
480  for (i = m + 1; i <= k; i++) {
481  g = superdiag[i];
482  y = diag[i];
483  h = s * g;
484  g = g * c;
485  z = sqrt(f * f + h * h);
486  superdiag[i - 1] = z;
487  c = f / z;
488  s = h / z;
489  f = x * c + g * s;
490  g = -x * s + g * c;
491  h = y * s;
492  y = c * y;
493 
494  for (row = 0; row < Ncols; row++) {
495  x = V[i - 1 + row * colVStride];
496  z = V[i + row * colVStride];
497  V[i - 1 + row * colVStride] = x * c + z * s;
498  V[i + row * colVStride] = -x * s + z * c;
499  }
500  z = sqrt(f * f + h * h);
501  diag[i - 1] = z;
502 #if !defined(ENABLE_LDRA_COVERAGE)
503 /* This part of code test value of z in order to avoid the "inf" result */
504  if (z != 0) {
505  c = f / z;
506  s = h / z;
507  }
508 #else
509  c = f / z;
510  s = h / z;
511 #endif
512  f = c * g + s * y;
513  x = -s * g + c * y;
514 
515  if (enableReducedForm == 0u) {
516  for (row = 0; row < Nrows; row++) {
517  y = U[i - 1 + row * colUStride];
518  z = U[i + row * colUStride];
519  U[i - 1 + row * colUStride] = c * y + s * z;
520  U[i + row * colUStride] = -s * y + c * z;
521  }
522  }
523  else {
524  for (row = 0; row < Nrows; row++) {
525  y = U[i - 1 + row * colUStride];
526  z = U[i + row * colUStride];
527  U[i - 1 + row * colUStride] = c * y + s * z;
528  U[i + row * colUStride] = -s * y + c * z;
529  }
530  }
531  } /* for (i=m+1;i<=k;i++) */
532  superdiag[m] = 0;
533  superdiag[k] = f;
534  diag[k] = x;
535  } /* if (m==k) */
536  } /* while (1==1) */
537  } /* for (k=Ncols-1:k>=0;k--) */
538 
539  return total_iter;
540 }
541 template int DSPF_sp_bidiag_to_diag_cn<float>(const int Nrows,
542  const int Ncols,
543  float *U,
544  float *V,
545  float *diag,
546  float *superdiag,
547  const int colUStride,
548  const int colVStride,
549  uint32_t enableReducedForm);
550 template int DSPF_sp_bidiag_to_diag_cn<double>(const int Nrows,
551  const int Ncols,
552  double *U,
553  double *V,
554  double *diag,
555  double *superdiag,
556  const int colUStride,
557  const int colVStride,
558  uint32_t enableReducedForm);
559 
560 template <typename dataType>
562  const int Ncols,
563  dataType *U,
564  dataType *V,
565  dataType *singular_values,
566  const int colUStride,
567  const int colVStride,
568  uint32_t enableReducedForm)
569 {
570 
571  int i, j, row, max_index;
572  dataType temp;
573  for (i = 0; i < Ncols - 1; i++) {
574  max_index = i;
575  for (j = i + 1; j < Ncols; j++) {
576  if (singular_values[j] > singular_values[max_index]) {
577  max_index = j;
578  }
579  }
580  if (max_index != i) {
581  temp = singular_values[i];
582  singular_values[i] = singular_values[max_index];
583  singular_values[max_index] = temp;
584  if (enableReducedForm == 0u) {
585  for (row = 0; row < Nrows; row++) {
586  temp = U[max_index + row * colUStride];
587  U[max_index + row * colUStride] = U[i + row * colUStride];
588  U[i + row * colUStride] = temp;
589  }
590  }
591  else {
592  for (row = 0; row < Nrows; row++) {
593  temp = U[max_index + row * colUStride];
594  U[max_index + row * colUStride] = U[i + row * colUStride];
595  U[i + row * colUStride] = temp;
596  }
597  }
598 
599  for (row = 0; row < Ncols; row++) {
600  temp = V[max_index + row * colVStride];
601  V[max_index + row * colVStride] = V[i + row * colVStride];
602  V[i + row * colVStride] = temp;
603  }
604  }
605  }
606  return 0;
607 }
608 template int DSPF_sp_sort_singular_values_cn<float>(const int Nrows,
609  const int Ncols,
610  float *U,
611  float *V,
612  float *singular_values,
613  const int colUStride,
614  const int colVStride,
615  uint32_t enableReducedForm);
616 template int DSPF_sp_sort_singular_values_cn<double>(const int Nrows,
617  const int Ncols,
618  double *U,
619  double *V,
620  double *singular_values,
621  const int colUStride,
622  const int colVStride,
623  uint32_t enableReducedForm);
624 
625 template <typename dataType>
627  const int Nrows,
628  const int Ncols,
629  dataType *A,
630  dataType *U,
631  dataType *V,
632  dataType *U1,
633  dataType *V1,
634  dataType *diag,
635  dataType *superdiag,
636  const int32_t strideIn,
637  const int32_t strideU,
638  const int32_t strideV,
639  uint32_t enableReducedForm)
640 {
641  DSPLIB_DEBUGPRINTFN(0, "Entering function pA: %p\n", A);
642 
643  int row, col, Nrows1, Ncols1, status;
644  /* ------------------------------------------------------------------- */
645  /* copy A matrix to U */
646  /* ------------------------------------------------------------------- */
647 
648  if (Nrows >= Ncols) {
649  Nrows1 = Nrows;
650  Ncols1 = Ncols;
651  }
652  else {
653  Nrows1 = Ncols;
654  Ncols1 = Nrows;
655  }
656  int32_t dataSize = sizeof(dataType);
657  int32_t colUStride = strideU / dataSize;
658  int32_t colVStride = strideV / dataSize;
659  int32_t colAStride = strideIn / dataSize;
660 
661  if (Nrows >= Ncols) {
662  /* Copy A to U */
663  for (row = 0; row < Nrows1; row++) {
664  for (col = 0; col < Ncols1; col++) {
665  U[col + row * colUStride] = A[col + row * colAStride];
666  }
667  }
668  }
669  else {
670  /* Copy A' to U */
671  for (row = 0; row < Nrows1; row++) {
672  for (col = 0; col < Ncols1; col++) {
673  U[col + row * colUStride] = A[row + col * colAStride];
674  }
675  }
676  }
677 
678  /* ------------------------------------------------------------------- */
679  /* convert A to bidiagonal matrix using Householder reflections */
680  /* ------------------------------------------------------------------- */
681  DSPF_sp_convert_to_bidiag_cn<dataType>(Nrows1, Ncols1, U, V, diag, superdiag, colUStride, colVStride,
682  enableReducedForm);
683 
684  /* ------------------------------------------------------------------- */
685  /* convert bidiagonal to diagonal using Givens rotations */
686  /* ------------------------------------------------------------------- */
687  status = DSPF_sp_bidiag_to_diag_cn<dataType>(Nrows1, Ncols1, U, V, diag, superdiag, colUStride, colVStride,
688  enableReducedForm);
689 
690  /* ------------------------------------------------------------------- */
691  /* sort singular values in descending order */
692  /* ------------------------------------------------------------------- */
693  DSPF_sp_sort_singular_values_cn<dataType>(Nrows1, Ncols1, U, V, diag, colUStride, colVStride, enableReducedForm);
694 
695  /* ------------------------------------------------------------------- */
696  /* switch U and V */
697  /* ------------------------------------------------------------------- */
698  if (Ncols > Nrows) {
699 
700  if (enableReducedForm == 0u) {
701  memcpy(U1, V, sizeof(dataType) * Nrows * colVStride);
702  memcpy(V, U, sizeof(dataType) * Ncols * colUStride);
703  memcpy(U, U1, sizeof(dataType) * Nrows * colUStride);
704  }
705  else {
706  memcpy(U1, V, sizeof(dataType) * Ncols * colVStride);
707  memcpy(V, U, sizeof(dataType) * Ncols * colUStride);
708  memcpy(U, U1, sizeof(dataType) * Nrows * colUStride);
709  }
710  }
711 
712  DSPLIB_DEBUGPRINTFN(0, "Exiting function with status: %d\n", status);
713 
714  return status;
715 }
716 template int DSPF_sp_svd_cn<float>(DSPLIB_svd_PrivArgs *pKerPrivArgs,
717  const int Nrows,
718  const int Ncols,
719  float *A,
720  float *U,
721  float *V,
722  float *U1,
723  float *V1,
724  float *diag,
725  float *superdiag,
726  const int32_t strideIn,
727  const int32_t strideU,
728  const int32_t strideV,
729  uint32_t enableReducedForm);
730 template int DSPF_sp_svd_cn<double>(DSPLIB_svd_PrivArgs *pKerPrivArgs,
731  const int Nrows,
732  const int Ncols,
733  double *A,
734  double *U,
735  double *V,
736  double *U1,
737  double *V1,
738  double *diag,
739  double *superdiag,
740  const int32_t strideIn,
741  const int32_t strideU,
742  const int32_t strideV,
743  uint32_t enableReducedForm);
744 
745 template <typename dataType>
747  void *restrict pA,
748  void *restrict pU,
749  void *restrict pV,
750  void *restrict pDiag,
751  void *restrict pSuperDiag,
752  void *restrict pU1,
753  void *restrict pV1,
754  void *restrict pScratch)
755 {
756  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
757 
758  DSPLIB_STATUS status = DSPLIB_SUCCESS;
759 
760  DSPLIB_svd_PrivArgs *pKerPrivArgs = (DSPLIB_svd_PrivArgs *) handle;
761  uint32_t heightIn = pKerPrivArgs->heightIn;
762  uint32_t widthIn = pKerPrivArgs->widthIn;
763  int32_t strideIn = pKerPrivArgs->strideIn;
764  int32_t strideU = pKerPrivArgs->strideU;
765  int32_t strideV = pKerPrivArgs->strideV;
766  uint32_t enableReducedForm = pKerPrivArgs->enableReducedForm;
767 
768  /* Typecast void pointers to respective data type */
769  dataType *pALocal = (dataType *) pA;
770  dataType *pULocal = (dataType *) pU;
771  dataType *pVLocal = (dataType *) pV;
772  dataType *pDiagLocal = (dataType *) pDiag;
773  dataType *pSuperDiagLocal = (dataType *) pSuperDiag;
774  dataType *pU1Local = (dataType *) pU1;
775  dataType *pV1Local = (dataType *) pV1;
776 
777  DSPLIB_DEBUGPRINTFN(0, "pALocal: %p pOutLocal: %p widthIn: %d heightIn: %d\n", pALocal, pULocal, widthIn, heightIn);
778 
779 #if !defined(ENABLE_LDRA_COVERAGE)
780  int svd_status = DSPF_sp_svd_cn<dataType>(pKerPrivArgs, heightIn, widthIn, pALocal, pULocal, pVLocal, pU1Local, pV1Local, pDiagLocal,
781  pSuperDiagLocal, strideIn, strideU, strideV, enableReducedForm);
782  if(svd_status < 0){
783  status = DSPLIB_ERR_FAILURE;
784  }
785 #else
786  DSPF_sp_svd_cn<dataType>(pKerPrivArgs, heightIn, widthIn, pALocal, pULocal, pVLocal, pU1Local, pV1Local, pDiagLocal,
787  pSuperDiagLocal, strideIn, strideU, strideV, enableReducedForm);
788 #endif
789  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", status);
790  return status;
791 }
792 
794  void *restrict pA,
795  void *restrict pU,
796  void *restrict pV,
797  void *restrict pDiag,
798  void *restrict pSuperDiag,
799  void *restrict pU1,
800  void *restrict pV1,
801  void *restrict pScratch);
802 
804  void *restrict pA,
805  void *restrict pU,
806  void *restrict pV,
807  void *restrict pDiag,
808  void *restrict pSuperDiag,
809  void *restrict pU1,
810  void *restrict pV1,
811  void *restrict pScratch);
812 
813 /* ======================================================================== */
814 /* End of file: DSPLIB_svd_cn.cpp */
815 /* ======================================================================== */
DSPLIB_STATUS DSPLIB_svd_exec_cn(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
This function is the main execution function for the natural C implementation of the kernel....
template int DSPF_sp_sort_singular_values_cn< double >(const int Nrows, const int Ncols, double *U, double *V, double *singular_values, const int colUStride, const int colVStride, uint32_t enableReducedForm)
template int DSPF_sp_convert_to_bidiag_cn< float >(const int Nrows, const int Ncols, float *U, float *V, float *diag, float *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm)
int DSPF_sp_bidiag_to_diag_cn(const int Nrows, const int Ncols, dataType *U, dataType *V, dataType *diag, dataType *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm)
template int DSPF_sp_convert_to_bidiag_cn< double >(const int Nrows, const int Ncols, double *U, double *V, double *diag, double *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm)
DSPLIB_STATUS DSPLIB_svd_init_cn(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svdInitArgs *pKerInitArgs)
This function is the initialization function for the natural C implementation of the kernel....
template DSPLIB_STATUS DSPLIB_svd_exec_cn< double >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
int DSPF_sp_sort_singular_values_cn(const int Nrows, const int Ncols, dataType *U, dataType *V, dataType *singular_values, const int colUStride, const int colVStride, uint32_t enableReducedForm)
template int DSPF_sp_svd_cn< float >(DSPLIB_svd_PrivArgs *pKerPrivArgs, const int Nrows, const int Ncols, float *A, float *U, float *V, float *U1, float *V1, float *diag, float *superdiag, const int32_t strideIn, const int32_t strideU, const int32_t strideV, uint32_t enableReducedForm)
template DSPLIB_STATUS DSPLIB_svd_exec_cn< float >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
template int DSPF_sp_svd_cn< double >(DSPLIB_svd_PrivArgs *pKerPrivArgs, const int Nrows, const int Ncols, double *A, double *U, double *V, double *U1, double *V1, double *diag, double *superdiag, const int32_t strideIn, const int32_t strideU, const int32_t strideV, uint32_t enableReducedForm)
int DSPF_sp_convert_to_bidiag_cn(const int Nrows, const int Ncols, dataType *U, dataType *V, dataType *diag, dataType *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm)
template int DSPF_sp_sort_singular_values_cn< float >(const int Nrows, const int Ncols, float *U, float *V, float *singular_values, const int colUStride, const int colVStride, uint32_t enableReducedForm)
int DSPF_sp_svd_cn(DSPLIB_svd_PrivArgs *pKerPrivArgs, const int Nrows, const int Ncols, dataType *A, dataType *U, dataType *V, dataType *U1, dataType *V1, dataType *diag, dataType *superdiag, const int32_t strideIn, const int32_t strideU, const int32_t strideV, uint32_t enableReducedForm)
template int DSPF_sp_bidiag_to_diag_cn< double >(const int Nrows, const int Ncols, double *U, double *V, double *diag, double *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm)
template int DSPF_sp_bidiag_to_diag_cn< float >(const int Nrows, const int Ncols, float *U, float *V, float *diag, float *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm)
#define MAX_ITERATION_COUNT
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
@ DSPLIB_ERR_FAILURE
Definition: DSPLIB_types.h:153
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Definition: DSPLIB_svd.h:129
Structure that is reserved for internal use by the kernel.
uint32_t widthIn
Size of input buffer for different batches DSPLIB_svd_init that will be retrieved and used by DSPLIB_...
uint32_t strideU
Stride between rows of U matrix
uint32_t enableReducedForm
Flag for enabling the calculation of reduced form enableReducedForm = 1 for reduced form SVD calc ena...
int32_t strideIn
Stride between rows of input data matrix
uint32_t strideV
Stride between rows of V matrix
uint32_t heightIn
Height of input data matrix