DSPLIB User Guide
DSPLIB_svd_small_ci.cpp
Go to the documentation of this file.
1 
2 /******************************************************************************/
6 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the
18  * distribution.
19  *
20  * Neither the name of Texas Instruments Incorporated nor the names of
21  * its contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  ******************************************************************************/
37 
38 /******************************************************************************
39  * Version 1.0 Date Aug 2023 Author: Asheesh Bhardwaj
40  *****************************************************************************/
41 
42 /*******************************************************************************
43  *
44  * INCLUDES
45  *
46  ******************************************************************************/
47 
48 #include "DSPLIB_svd_small_diag.h"
49 #include "DSPLIB_svd_small_priv.h"
53 
54 /*******************************************************************************
55  *
56  * DEFINES
57  *
58  ******************************************************************************/
59 #define MAX_ITERATION_COUNT 30
60 
61 /* *****************************************************************************
62  *
63  * INITIALIZATION
64  *
65  ***************************************************************************** */
66 
67 template <typename dataType>
69  const DSPLIB_bufParams2D_t *bufParamsIn,
70  const DSPLIB_bufParams2D_t *bufParamsU,
71  const DSPLIB_bufParams2D_t *bufParamsV,
72  const DSPLIB_bufParams1D_t *bufParamsDiag,
73  const DSPLIB_bufParams1D_t *bufParamsSuperDiag,
74  const DSPLIB_svd_small_InitArgs *pKerInitArgs)
75 {
76  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
77 
78  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
79  return DSPLIB_SUCCESS;
80 }
81 
82 // template DSPLIB_STATUS DSPLIB_svd_small_init_ci<float>(DSPLIB_kernelHandle handle,
83 // const DSPLIB_bufParams2D_t *bufParamsIn,
84 // const DSPLIB_bufParams2D_t *bufParamsU,
85 // const DSPLIB_bufParams2D_t *bufParamsV,
86 // const DSPLIB_bufParams1D_t *bufParamsDiag,
87 // const DSPLIB_bufParams1D_t *bufParamsSuperDiag,
88 // const DSPLIB_svd_small_InitArgs *pKerInitArgs);
89 
91  const DSPLIB_bufParams2D_t *bufParamsIn,
92  const DSPLIB_bufParams2D_t *bufParamsU,
93  const DSPLIB_bufParams2D_t *bufParamsV,
94  const DSPLIB_bufParams1D_t *bufParamsDiag,
95  const DSPLIB_bufParams1D_t *bufParamsSuperDiag,
96  const DSPLIB_svd_small_InitArgs *pKerInitArgs);
97 
98 /* *****************************************************************************
99  *
100  * IMPLEMENTATION
101  *
102  ***************************************************************************** */
103 #if (__C7X_VEC_SIZE_BITS__ == 512)
104 
105 template <typename dataType>
106 static inline int DSPF_sp_convert_to_bidiag_ci(const int Nrows,
107  const int Ncols,
108  dataType *U,
109  dataType *V,
110  dataType *diag,
111  dataType *superdiag,
112  const int colUStride,
113  const int colVStride,
114  uint32_t enableReducedForm)
115 {
116  // int i, j, k;
117  // dataType si;
118  dataType s, scale;
119 
120  /* Householder processing */
121  s = 0;
122  scale = 0;
123 
124  if (Nrows == 6) {
125  u_process_1st_iter(&U[0 + 0 * colUStride], colUStride, diag, superdiag, &scale, &s, 6, 0);
126  u_process_2nd_iter(&U[1 + 1 * colUStride], colUStride, diag, superdiag, &scale, &s, 5, 1);
127  u_process_3rd_iter(&U[2 + 2 * colUStride], colUStride, diag, superdiag, &scale, &s, 4, 2);
128  u_process_4th_iter(&U[3 + 3 * colUStride], colUStride, diag, superdiag, &scale, &s, 3, 3);
129  u_process_5th_iter(&U[4 + 4 * colUStride], colUStride, diag, superdiag, &scale, &s, 2, 4);
130  u_process_6th_iter(&U[5 + 5 * colUStride], colUStride, diag, superdiag, &scale, &s, 1, 5);
131  }
132  else {
133  u_process_0th_iter(&U[0 + 0 * colUStride], colUStride, diag, superdiag, &scale, &s, 6, 0);
134  u_process_1st_iter(&U[1 + 1 * colUStride], colUStride, diag, superdiag, &scale, &s, 5, 1);
135  u_process_2nd_iter(&U[2 + 2 * colUStride], colUStride, diag, superdiag, &scale, &s, 4, 2);
136  u_process_3rd_iter(&U[3 + 3 * colUStride], colUStride, diag, superdiag, &scale, &s, 3, 3);
137  u_process_4th_iter(&U[4 + 4 * colUStride], colUStride, diag, superdiag, &scale, &s, 2, 4);
138  u_process_5th_iter(&U[5 + 5 * colUStride], colUStride, diag, superdiag, &scale, &s, 1, 5);
139  }
140 
141  /* update V */
142  int diag_index_V = (Ncols - 1) + (Ncols - 1) * colVStride;
143  V[diag_index_V] = 1;
144  s = superdiag[Ncols - 1];
145  int diag_index_U = (Ncols - 2) + (Ncols - 2) * colUStride;
146  diag_index_V = (Ncols - 2) + (Ncols - 2) * colVStride;
147  v_process_1st_iter(&U[diag_index_U], &V[diag_index_V], colUStride, colVStride, &s);
148 
149  V[diag_index_V] = 1;
150  s = superdiag[Ncols - 2];
151  diag_index_U = (Ncols - 3) + (Ncols - 3) * colUStride;
152  diag_index_V = (Ncols - 3) + (Ncols - 3) * colVStride;
153  v_process_2nd_iter(&U[diag_index_U], &V[diag_index_V], colUStride, colVStride, &s);
154 
155  V[diag_index_V] = 1;
156  s = superdiag[Ncols - 3];
157  diag_index_U = (Ncols - 4) + (Ncols - 4) * colUStride;
158  diag_index_V = (Ncols - 4) + (Ncols - 4) * colVStride;
159  v_process_3rd_iter(&U[diag_index_U], &V[diag_index_V], colUStride, colVStride, &s);
160 
161  V[diag_index_V] = 1;
162  s = superdiag[Ncols - 4];
163  diag_index_U = (Ncols - 5) + (Ncols - 5) * colUStride;
164  diag_index_V = (Ncols - 5) + (Ncols - 5) * colVStride;
165  v_process_4th_iter(&U[diag_index_U], &V[diag_index_V], colUStride, colVStride, &s);
166 
167  V[diag_index_V] = 1;
168  s = superdiag[Ncols - 5];
169  diag_index_U = (Ncols - 6) + (Ncols - 6) * colUStride;
170  diag_index_V = (Ncols - 6) + (Ncols - 6) * colVStride;
171  v_process_5th_iter(&U[diag_index_U], &V[diag_index_V], colUStride, colVStride, &s);
172 
173  if (Nrows == 6) {
174  dataType *U_diag = &U[(Ncols - 1) + (Ncols - 1) * colUStride];
175  s = diag[Ncols - 1];
176  u_update_1st_iter(U_diag, colUStride, diag, s);
177 
178  U_diag = &U[(Ncols - 2) + (Ncols - 2) * colUStride];
179  s = diag[Ncols - 2];
180  u_update_2nd_iter(U_diag, colUStride, diag, s);
181 
182  U_diag = &U[(Ncols - 3) + (Ncols - 3) * colUStride];
183  s = diag[Ncols - 3];
184  u_update_3rd_iter(U_diag, colUStride, diag, s);
185 
186  U_diag = &U[(Ncols - 4) + (Ncols - 4) * colUStride];
187  s = diag[Ncols - 4];
188  u_update_4th_iter(U_diag, colUStride, diag, s);
189 
190  U_diag = &U[(Ncols - 5) + (Ncols - 5) * colUStride];
191  s = diag[Ncols - 5];
192  u_update_5th_iter(U_diag, colUStride, diag, s);
193 
194  U_diag = &U[(Ncols - 6) + (Ncols - 6) * colUStride];
195  s = diag[Ncols - 6];
196  u_update_6th_iter(U_diag, colUStride, diag, s);
197  }
198  else {
199  if (enableReducedForm == 0) {
200  dataType *U_diag = &U[(Ncols - 1) + (Ncols - 1) * colUStride];
201  s = diag[Ncols - 1];
202  u_update_6X7_R_1st_iter(U_diag, colUStride, diag, s, 2U);
203 
204  U_diag = &U[(Ncols - 2) + (Ncols - 2) * colUStride];
205  s = diag[Ncols - 2];
206  u_update_6X7_NR_2nd_iter(U_diag, colUStride, diag, s, 3U);
207 
208  U_diag = &U[(Ncols - 3) + (Ncols - 3) * colUStride];
209  s = diag[Ncols - 3];
210  u_update_6X7_NR_3rd_iter(U_diag, colUStride, diag, s, 4U);
211 
212  U_diag = &U[(Ncols - 4) + (Ncols - 4) * colUStride];
213  s = diag[Ncols - 4];
214  u_update_6X7_NR_4th_iter(U_diag, colUStride, diag, s, 5U);
215 
216  U_diag = &U[(Ncols - 5) + (Ncols - 5) * colUStride];
217  s = diag[Ncols - 5];
218  u_update_6X7_NR_5th_iter(U_diag, colUStride, diag, s, 6U);
219 
220  U_diag = &U[(Ncols - 6) + (Ncols - 6) * colUStride];
221  s = diag[Ncols - 6];
222  u_update_6X7_NR_6th_iter(U_diag, colUStride, diag, s, 7U);
223  }
224  else {
225  dataType *U_diag = &U[(Ncols - 1) + (Ncols - 1) * colUStride];
226  s = diag[Ncols - 1];
227  u_update_6X7_NR_1st_iter(U_diag, colUStride, diag, s, 1U);
228 
229  U_diag = &U[(Ncols - 2) + (Ncols - 2) * colUStride];
230  s = diag[Ncols - 2];
231  u_update_6X7_NR_2nd_iter(U_diag, colUStride, diag, s, 2U);
232 
233  U_diag = &U[(Ncols - 3) + (Ncols - 3) * colUStride];
234  s = diag[Ncols - 3];
235  u_update_6X7_NR_3rd_iter(U_diag, colUStride, diag, s, 3U);
236 
237  U_diag = &U[(Ncols - 4) + (Ncols - 4) * colUStride];
238  s = diag[Ncols - 4];
239  u_update_6X7_NR_4th_iter(U_diag, colUStride, diag, s, 4U);
240 
241  U_diag = &U[(Ncols - 5) + (Ncols - 5) * colUStride];
242  s = diag[Ncols - 5];
243  u_update_6X7_NR_5th_iter(U_diag, colUStride, diag, s, 5U);
244 
245  U_diag = &U[(Ncols - 6) + (Ncols - 6) * colUStride];
246  s = diag[Ncols - 6];
247  u_update_6X7_NR_6th_iter(U_diag, colUStride, diag, s, 6U);
248  }
249  }
250 
251  return 0;
252 }
253 // template int DSPF_sp_convert_to_bidiag_ci<float>(const int Nrows,
254 // const int Ncols,
255 // float *U,
256 // float *V,
257 // float *diag,
258 // float *superdiag,
259 // const int colUStride,
260 // const int colVStride,
261 // uint32_t enableReducedForm);
262 template int DSPF_sp_convert_to_bidiag_ci<double>(const int Nrows,
263  const int Ncols,
264  double *U,
265  double *V,
266  double *diag,
267  double *superdiag,
268  const int colUStride,
269  const int colVStride,
270  uint32_t enableReducedForm);
271 
272 template <typename dataType>
273 static inline int DSPF_sp_bidiag_to_diag_ci(const int Nrows,
274  const int Ncols,
275  dataType *U,
276  dataType *V,
277  dataType *diag,
278  dataType *superdiag,
279  const int colUStride,
280  const int colVStride,
281  uint32_t enableReducedForm)
282 {
283  // int row;
284  int m = 0;
285  int i, k, rotation_test, iter, total_iter;
286  dataType x, y, z, epsilon;
287  dataType c, s, f, g, h;
288 
289  iter = 0;
290  total_iter = 0;
291  /* ------------------------------------------------------------------- */
292  /* find max in col */
293  /* ------------------------------------------------------------------- */
294  typedef typename c7x::make_full_vector<double>::type vec;
295 
296  epsilon = set_epsilon(diag, superdiag);
297 
298  for (k = Ncols - 1; k >= 0; k--) {
299  total_iter += iter;
300  iter = 0;
301  while (true) {
302  rotation_test = 1;
303  rotation_test = rotation_test_check(diag, superdiag, &m, epsilon, (uint32_t) (k + 1));
304 
305  if (rotation_test) {
306  c = 0;
307  s = 1;
308  __vpred pred_Z = __mask_long((uint32_t) Nrows);
309  for (i = m; i <= k; i++) {
310  vec *ptr_Uy = (vec *) &U[(m - 1) * colVStride];
311  vec *ptr_Uz = (vec *) &U[i * colVStride];
312  // vec vec_Uy = *ptr_Uy;
313  // vec vec_Uz = *ptr_Uz;
314  vec vec_Uy = __vload_pred(pred_Z, ptr_Uy);
315  vec vec_Uz = __vload_pred(pred_Z, ptr_Uz);
316 
317  f = s * superdiag[i];
318  superdiag[i] = c * superdiag[i];
319 #if !defined(ENABLE_LDRA_COVERAGE)
320  /* This part of code checks for "test f convergence" part condition
321  Ref. Singular Value Decomposition and Least Squares Solutions. G. H. Golub et al
322  We use these conditions in order to calculate correct results if and when they occur */
323  if (__abs(f) <= epsilon) {
324  break;
325  }
326 #endif
327 
328  g = diag[i];
329  double f_sq = f * f;
330  double g_sq = g * g;
331  double f_g_sq = f_sq + g_sq;
332  double h_rsq = getRecipSqrt(f_g_sq);
333  h = f_g_sq * h_rsq; // sqrt(f * f + g * g);
334  diag[i] = h;
335  c = g * h_rsq; // g / h;
336  s = (-f) * h_rsq; //-f / h;
337 
338  vec temp_Uy = vec_Uy * c + vec_Uz * s;
339  vec temp_Uz = -vec_Uy * s + vec_Uz * c;
340  // *ptr_Uy = temp_Uy;
341  // *ptr_Uz = temp_Uz;
342  __vstore_pred(pred_Z, ptr_Uy, temp_Uy);
343  __vstore_pred(pred_Z, ptr_Uz, temp_Uz);
344  } /* for (i=m;i<=k;i++) */
345  } /* if (rotation_test) */
346 
347  z = diag[k];
348  if (m == k) {
349  if (z < 0) {
350  diag[k] = -z;
351  __vpred pred_Z = __mask_long(6u);
352  vec *p_vec_Z = (vec *) (&V[k * colVStride]);
353 
354  vec vec_Z = __vload_pred(pred_Z, p_vec_Z);
355  __vstore_pred(pred_Z, p_vec_Z, -vec_Z);
356  } /* if (z>0) */
357  break;
358  } /* if (m==k) */
359  else {
360 #if !defined(ENABLE_LDRA_COVERAGE)
361  /* This part of code retricts the count of "test f splitting" part
362  Ref. Singular Value Decomposition and Least Squares Solutions. G. H. Golub et al
363  We use these conditions in order to break the while loop to avoid infinite loop */
364  if (iter >= MAX_ITERATION_COUNT) {
365  total_iter = -1;
366  break;
367  }
368 #endif
369 
370  iter++;
371  dataType z_sq, x_sq, x_recip, y_sq, g_sq, h_sq, h_y, h_y_2, f_g_recip, f_sq, f_h_sq;
372 
373  z_sq = z * z;
374 
375  x = diag[m];
376  x_sq = x * x;
377  x_recip = getRecip(x);
378 
379  y = diag[k - 1];
380  y_sq = y * y;
381 
382  g = superdiag[k - 1];
383  g_sq = g * g;
384 
385  h = superdiag[k];
386  h_sq = h * h;
387 
388  h_y = h * y;
389  h_y_2 = h_y + h_y;
390 
391  // f = ((y - z) * (y + z) + (g - h) * (g + h)) * getRecip((2 * h * y));
392  f = ((y_sq - z_sq) + (g_sq - h_sq)) * getRecip((h_y_2));
393 
394  g = getSqrt(f * f + 1);
395  if (f < 0) {
396  g = -g;
397  }
398 
399  f_g_recip = getRecip((f + g));
400  f = ((x_sq - z_sq) + h * y * f_g_recip - h_sq) * x_recip;
401 
402  /* next QR transformation */
403  c = 1;
404  s = 1;
405  // uint32_t cnt = 0;
406  __vpred pred_Z = __mask_long((uint32_t) Nrows);
407 
408  for (i = m + 1; i <= k; i++) {
409  vec *ptr_Vx = (vec *) &V[(i - 1) * colVStride];
410  vec *ptr_Vz = (vec *) &V[i * colVStride];
411  // vec vec_Vx = *ptr_Vx;
412  // vec vec_Vz = *ptr_Vz;
413  vec vec_Vx = __vload_pred(pred_Z, ptr_Vx);
414  vec vec_Vz = __vload_pred(pred_Z, ptr_Vz);
415 
416  vec *ptr_Uy = (vec *) &U[(i - 1) * colUStride];
417  vec *ptr_Uz = (vec *) &U[i * colUStride];
418  // vec vec_Uy = *ptr_Uy;
419  // vec vec_Uz = *ptr_Uz;
420  vec vec_Uy = __vload_pred(pred_Z, ptr_Uy);
421  vec vec_Uz = __vload_pred(pred_Z, ptr_Uz);
422 
423  f_sq = f * f;
424  g = superdiag[i];
425  y = diag[i];
426  h = s * g;
427  h_sq = h * h;
428  f_h_sq = f_sq + h_sq;
429  g = g * c;
430  z = getRecipSqrt(f_h_sq);
431  superdiag[i - 1] = z * f_h_sq;
432 
433  c = f * z;
434  s = h * z;
435  // cV[cnt] = c;
436  // sV[cnt] = s;
437  vec temp_Vx = vec_Vx * c + vec_Vz * s;
438  vec temp_Vz = -vec_Vx * s + vec_Vz * c;
439  // *ptr_Vx = temp_Vx;
440  // *ptr_Vz = temp_Vz;
441  __vstore_pred(pred_Z, ptr_Vx, temp_Vx);
442  __vstore_pred(pred_Z, ptr_Vz, temp_Vz);
443 
444  f = x * c + g * s;
445  f_sq = f * f;
446  g = -x * s + g * c;
447  h = y * s;
448  h_sq = h * h;
449  f_h_sq = f_sq + h_sq;
450  y = c * y;
451 
452  z = getRecipSqrt(f_h_sq);
453  diag[i - 1] = z * f_h_sq;
454 
455  // if (f_h_sq != 0) {
456  c = f * z;
457  s = h * z;
458  // }
459  // cU[cnt] = c;
460  // sU[cnt] = s;
461  // cnt++;
462  vec temp_Uy = vec_Uy * c + vec_Uz * s;
463  vec temp_Uz = -vec_Uy * s + vec_Uz * c;
464  // *ptr_Uy = temp_Uy;
465  // *ptr_Uz = temp_Uz;
466  __vstore_pred(pred_Z, ptr_Uy, temp_Uy);
467  __vstore_pred(pred_Z, ptr_Uz, temp_Uz);
468 
469  f = c * g + s * y;
470  x = -s * g + c * y;
471 
472  } /* for (i=m+1;i<=k;i++) */
473  superdiag[m] = 0;
474  superdiag[k] = f;
475  diag[k] = x;
476  } /* if (m==k) */
477  } /* while (1==1) */
478  } /* for (k=Ncols-1:k>=0;k--) */
479 
480  return total_iter;
481 }
482 template int DSPF_sp_bidiag_to_diag_ci<float>(const int Nrows,
483  const int Ncols,
484  float *U,
485  float *V,
486  float *diag,
487  float *superdiag,
488  const int colUStride,
489  const int colVStride,
490  uint32_t enableReducedForm);
491 template int DSPF_sp_bidiag_to_diag_ci<double>(const int Nrows,
492  const int Ncols,
493  double *U,
494  double *V,
495  double *diag,
496  double *superdiag,
497  const int colUStride,
498  const int colVStride,
499  uint32_t enableReducedForm);
500 
501 template <typename dataType>
502 static inline int DSPF_sp_sort_singular_values_ci(const int Nrows,
503  const int Ncols,
504  dataType *U,
505  dataType *V,
506  dataType *singular_values,
507  const int colUStride,
508  const int colVStride,
509  uint32_t enableReducedForm)
510 {
511 
512  sort_singular_vals(singular_values, U, V, Nrows, Ncols, colUStride, colVStride);
513 
514  return 0;
515 }
516 // template int DSPF_sp_sort_singular_values_ci<float>(const int Nrows,
517 // const int Ncols,
518 // float *U,
519 // float *V,
520 // float *singular_values,
521 // const int colUStride,
522 // const int colVStride,
523 // uint32_t enableReducedForm);
524 template int DSPF_sp_sort_singular_values_ci<double>(const int Nrows,
525  const int Ncols,
526  double *U,
527  double *V,
528  double *singular_values,
529  const int colUStride,
530  const int colVStride,
531  uint32_t enableReducedForm);
532 
533 template <typename dataType>
534 static inline int DSPF_sp_svd_ci(DSPLIB_svd_small_PrivArgs *pKerPrivArgs,
535  const int Nrows,
536  const int Ncols,
537  dataType *A,
538  dataType *U,
539  dataType *V,
540  dataType *U1,
541  dataType *diag,
542  dataType *superdiag,
543  const int32_t strideIn,
544  const int32_t strideU,
545  const int32_t strideV,
546  uint32_t enableReducedForm)
547 {
548  DSPLIB_DEBUGPRINTFN(0, "Entering function pA: %p\n", A);
549 
550  int row, col, Nrows1, Ncols1, status;
551  /* ------------------------------------------------------------------- */
552  /* copy A matrix to U */
553  /* ------------------------------------------------------------------- */
554 
555  if (Nrows >= Ncols) {
556  Nrows1 = Nrows;
557  Ncols1 = Ncols;
558  }
559  else {
560  Nrows1 = Ncols;
561  Ncols1 = Nrows;
562  }
563  int32_t dataSize = sizeof(dataType);
564  int32_t colUStride = strideU / dataSize;
565  int32_t colVStride = strideV / dataSize;
566  int32_t colAStride = strideIn / dataSize;
567 
568  if (Nrows >= Ncols) {
569  /* Copy A to U */
570  for (row = 0; row < Nrows1; row++) {
571  for (col = 0; col < Ncols1; col++) {
572  U[col + row * colUStride] = A[col + row * colAStride];
573  }
574  }
575  }
576  else {
577  /* Copy A' to U */
578  for (row = 0; row < Nrows1; row++) {
579  for (col = 0; col < Ncols1; col++) {
580  U[col + row * colUStride] = A[row + col * colAStride];
581  }
582  }
583  }
584 
585  /* ------------------------------------------------------------------- */
586  /* convert A to bidiagonal matrix using Householder reflections */
587  /* ------------------------------------------------------------------- */
588  DSPF_sp_convert_to_bidiag_ci<dataType>(Nrows1, Ncols1, U, V, diag, superdiag, colUStride, colVStride,
589  enableReducedForm);
590 
591  /* ------------------------------------------------------------------- */
592  /* convert bidiagonal to diagonal using Givens rotations */
593  /* ------------------------------------------------------------------- */
594 
595  transpose_vec_mat(V, Ncols, Ncols, colVStride);
596  transpose_vec_mat(U, Ncols, Ncols, colUStride);
597  status = DSPF_sp_bidiag_to_diag_ci<dataType>(Nrows1, Ncols1, U, V, diag, superdiag, colUStride, colVStride,
598  enableReducedForm);
599 
600  transpose_vec_mat(V, Ncols, Ncols, colVStride);
601  transpose_vec_mat(U, Ncols, Ncols, colUStride);
602 
603  /* ------------------------------------------------------------------- */
604  /* sort singular values in descending order */
605  /* ------------------------------------------------------------------- */
606  DSPF_sp_sort_singular_values_ci<dataType>(Nrows1, Ncols1, U, V, diag, colUStride, colVStride, enableReducedForm);
607 
608  /* ------------------------------------------------------------------- */
609  /* switch U and V */
610  /* ------------------------------------------------------------------- */
611  if (Ncols > Nrows) {
612  if (enableReducedForm == 0u) {
613  memcpy(U1, V, sizeof(dataType) * Nrows * colVStride);
614  memcpy(V, U, sizeof(dataType) * Ncols * colUStride);
615  memcpy(U, U1, sizeof(dataType) * Nrows * colUStride);
616  }
617  else {
618  memcpy(U1, V, sizeof(dataType) * Ncols * colVStride);
619  memcpy(V, U, sizeof(dataType) * Ncols * colUStride);
620  memcpy(U, U1, sizeof(dataType) * Nrows * colUStride);
621  }
622  }
623  DSPLIB_DEBUGPRINTFN(0, "Exiting function with status: %d\n", status);
624 
625  return status;
626 }
627 // template int DSPF_sp_svd_ci<float>(DSPLIB_svd_small_PrivArgs *pKerPrivArgs,
628 // const int Nrows,
629 // const int Ncols,
630 // float *A,
631 // float *U,
632 // float *V,
633 // float *U1,
634 // float *diag,
635 // float *superdiag,
636 // const int32_t strideIn,
637 // const int32_t strideU,
638 // const int32_t strideV,
639 // uint32_t enableReducedForm);
640 template int DSPF_sp_svd_ci<double>(DSPLIB_svd_small_PrivArgs *pKerPrivArgs,
641  const int Nrows,
642  const int Ncols,
643  double *A,
644  double *U,
645  double *V,
646  double *U1,
647  double *diag,
648  double *superdiag,
649  const int32_t strideIn,
650  const int32_t strideU,
651  const int32_t strideV,
652  uint32_t enableReducedForm);
653 
654 #endif /* #if (__C7X_VEC_SIZE_BITS__ == 512) */
655 
656 template <typename dataType>
658  void *restrict pA,
659  void *restrict pU,
660  void *restrict pV,
661  void *restrict pDiag,
662  void *restrict pSuperDiag,
663  void *restrict pU1)
664 {
665  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
666 
667  DSPLIB_STATUS status = DSPLIB_SUCCESS;
668 
669 #if (__C7X_VEC_SIZE_BITS__ == 512)
670  DSPLIB_svd_small_PrivArgs *pKerPrivArgs = (DSPLIB_svd_small_PrivArgs *) handle;
671  uint32_t heightIn = pKerPrivArgs->heightIn;
672  uint32_t widthIn = pKerPrivArgs->widthIn;
673  int32_t strideIn = pKerPrivArgs->strideIn;
674  int32_t strideU = pKerPrivArgs->strideU;
675  int32_t strideV = pKerPrivArgs->strideV;
676  uint32_t enableReducedForm = pKerPrivArgs->enableReducedForm;
677 
678  /* Typecast void pointers to respective data type */
679  dataType *pALocal = (dataType *) pA;
680  dataType *pULocal = (dataType *) pU;
681  dataType *pVLocal = (dataType *) pV;
682  dataType *pDiagLocal = (dataType *) pDiag;
683  dataType *pSuperDiagLocal = (dataType *) pSuperDiag;
684  dataType *pU1Local = (dataType *) pU1;
685 
686  DSPLIB_DEBUGPRINTFN(0, "pALocal: %p pOutLocal: %p widthIn: %d heightIn: %d\n", pALocal, pULocal, widthIn, heightIn);
687 
688 #if !defined(ENABLE_LDRA_COVERAGE)
689  int svd_status =
690  DSPF_sp_svd_ci<dataType>(pKerPrivArgs, heightIn, widthIn, pALocal, pULocal, pVLocal, pU1Local, pDiagLocal,
691  pSuperDiagLocal, strideIn, strideU, strideV, enableReducedForm);
692 
693  if (svd_status < 0) {
694  status = DSPLIB_ERR_FAILURE;
695  }
696 #else
697  DSPF_sp_svd_ci<dataType>(pKerPrivArgs, heightIn, widthIn, pALocal, pULocal, pVLocal, pU1Local, pDiagLocal,
698  pSuperDiagLocal, strideIn, strideU, strideV, enableReducedForm);
699 
700 #endif
701 #else
702  DSPLIB_DEBUGPRINTFN(0, "%s\n", "The code is only implemented for __C7X_VEC_SIZE_BITS__ == 512 ");
704 
705 #endif
706 
707  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", status);
708  return status;
709 }
710 
711 // template DSPLIB_STATUS DSPLIB_svd_small_exec_ci<float>(DSPLIB_kernelHandle handle,
712 // void *restrict pA,
713 // void *restrict pU,
714 // void *restrict pV,
715 // void *restrict pDiag,
716 // void *restrict pSuperDiag,
717 // void *restrict pU1);
718 
720  void *restrict pA,
721  void *restrict pU,
722  void *restrict pV,
723  void *restrict pDiag,
724  void *restrict pSuperDiag,
725  void *restrict pU1);
726 
727 /* ======================================================================== */
728 /* End of file: DSPLIB_svd_small_ci.cpp */
729 /* ======================================================================== */
dataType getRecipSqrt(dataType a)
dataType getRecip(dataType value)
DSPLIB_STATUS DSPLIB_svd_small_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svd_small_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_svd_small_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svd_small_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_svd_small_exec_ci(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_svd_small_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1)
#define MAX_ITERATION_COUNT
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
dataType getSqrt(dataType a)
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
@ DSPLIB_ERR_NOT_IMPLEMENTED
Definition: DSPLIB_types.h:158
@ DSPLIB_ERR_FAILURE
Definition: DSPLIB_types.h:153
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint32_t enableReducedForm
Flag for enabling the calculation of reduced form enableReducedForm = 1 for reduced form SVD calc ena...
uint32_t strideU
Stride between rows of U matrix
uint32_t strideV
Stride between rows of V matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_svd_small_init that will be retrieved and used by D...
uint32_t heightIn
Height of input data matrix