DSPLIB User Guide
DSPLIB_svd_small_v_process.h
Go to the documentation of this file.
1 
2 /******************************************************************************/
6 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the
18  * distribution.
19  *
20  * Neither the name of Texas Instruments Incorporated nor the names of
21  * its contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  ******************************************************************************/
37 
38 /******************************************************************************
39  * Version 1.0 Date Aug 2023 Author: Asheesh Bhardwaj
40  *****************************************************************************/
41 
42 /*******************************************************************************
43  *
44  * INCLUDES
45  *
46  ******************************************************************************/
47 
48 #include "DSPLIB_svd_small_priv.h"
49 
50 /* *****************************************************************************
51  *
52  * IMPLEMENTATION
53  *
54  ***************************************************************************** */
55 #if (__C7X_VEC_SIZE_BITS__ == 512)
56 
57 
58 template <typename dataType> static inline void v_process_1st_iter(dataType *U,
59  dataType *V,
60  const int colUStride,
61  const int colVStride,
62  dataType *s);
63 template <> inline void v_process_1st_iter<float>(float *U,
64  float *V,
65  const int colUStride,
66  const int colVStride,
67  float *s)
68 {
69  return;
70 }
71 template <> inline void v_process_1st_iter<double>(double *U,
72  double *V,
73  const int colUStride,
74  const int colVStride,
75  double *s)
76 {
77  /* -------------------------------------------------- */
78  /* For i = 1 */
79  /* -------------------------------------------------- */
80  {
81  typedef typename c7x::make_full_vector<double>::type vec;
82  __vpred pred_2_elem = __create_vpred(0x000000000000FFFFU);
83  __vpred pred_2_elem_u = __create_vpred(0x000000000000FF00U);
84 
85 
86  vec vec_top_row = (vec)0.0;
87  vec_top_row.s[0] = 1.0;
88 
89  vec vec_in0 = __vload_pred(pred_2_elem_u, (vec *)&V[0 + 1 * colVStride]);
90 
91  vec vec_u_row = __vload_pred(pred_2_elem_u, (vec *)U);
92 
93  double normFactor = vec_u_row.s[1] * (*s);
94  vec vec_invNorm = (vec)getRecip(normFactor);
95 
96  vec vec_v_col = vec_u_row * vec_invNorm;
97 
98  vec vec_si = (vec)0.0;
99  vec_si += vec_u_row * vec_in0;
100 
101  vec_in0 += vec_si * vec_v_col;
102 
103  __vstore_pred(pred_2_elem, (vec *)&V[0 + 0 * colVStride], vec_top_row);
104  __vstore_pred(pred_2_elem, (vec *)&V[0 + 1 * colVStride], vec_in0);
105  }
106  return;
107 }
108 
109 
110 
111 
112 
113 template <typename dataType> static inline void v_process_2nd_iter(dataType *U,
114  dataType *V,
115  const int colUStride,
116  const int colVStride,
117  dataType *s);
118 template <> inline void v_process_2nd_iter<float>(float *U,
119  float *V,
120  const int colUStride,
121  const int colVStride,
122  float *s)
123 {
124  return;
125 }
126 template <> inline void v_process_2nd_iter<double>(double *U,
127  double *V,
128  const int colUStride,
129  const int colVStride,
130  double *s)
131 {
132  /* -------------------------------------------------- */
133  /* For i = 2 */
134  /* -------------------------------------------------- */
135  {
136  typedef typename c7x::make_full_vector<double>::type vec;
137  __vpred pred_3_elem = __create_vpred(0x0000000000FFFFFFU);
138  __vpred pred_3_elem_u = __create_vpred(0x0000000000FFFF00U);
139 
140 
141  vec vec_top_row = (vec)0.0;
142  vec_top_row.s[0] = 1.0;
143 
144  vec vec_in0 = __vload_pred(pred_3_elem_u, (vec *)&V[0 + 1 * colVStride]);
145  vec vec_in1 = __vload_pred(pred_3_elem_u, (vec *)&V[0 + 2 * colVStride]);
146 
147  vec vec_u_row = __vload_pred(pred_3_elem_u, (vec *)U);
148 
149  double normFactor = vec_u_row.s[1] * (*s);
150  vec vec_invNorm = (vec)getRecip(normFactor);
151 
152  vec vec_v_col = vec_u_row * vec_invNorm;
153 
154  vec vec_si = (vec)0.0;
155  vec_si += vec_u_row.s[1] * vec_in0;
156  vec_si += vec_u_row.s[2] * vec_in1;
157 
158  vec_in0 += vec_si * vec_v_col.s[1];
159  vec_in1 += vec_si * vec_v_col.s[2];
160 
161  __vstore_pred(pred_3_elem, (vec *)&V[0 + 0 * colVStride], vec_top_row);
162  __vstore_pred(pred_3_elem, (vec *)&V[0 + 1 * colVStride], vec_in0);
163  __vstore_pred(pred_3_elem, (vec *)&V[0 + 2 * colVStride], vec_in1);
164 
165 
166  }
167  return;
168 }
169 
170 
171 
172 template <typename dataType> static inline void v_process_3rd_iter(dataType *U,
173  dataType *V,
174  const int colUStride,
175  const int colVStride,
176  dataType *s);
177 template <> inline void v_process_3rd_iter<float>(float *U,
178  float *V,
179  const int colUStride,
180  const int colVStride,
181  float *s)
182 {
183  return;
184 }
185 template <> inline void v_process_3rd_iter<double>(double *U,
186  double *V,
187  const int colUStride,
188  const int colVStride,
189  double *s)
190 {
191  /* -------------------------------------------------- */
192  /* For i = 3 */
193  /* -------------------------------------------------- */
194  {
195  typedef typename c7x::make_full_vector<double>::type vec;
196  __vpred pred_4_elem = __create_vpred(0x00000000FFFFFFFFU);
197  __vpred pred_4_elem_u = __create_vpred(0x00000000FFFFFF00U);
198 
199 
200  vec vec_top_row = (vec)0.0;
201  vec_top_row.s[0] = 1.0;
202 
203  vec vec_in0 = __vload_pred(pred_4_elem_u, (vec *)&V[0 + 1 * colVStride]);
204  vec vec_in1 = __vload_pred(pred_4_elem_u, (vec *)&V[0 + 2 * colVStride]);
205  vec vec_in2 = __vload_pred(pred_4_elem_u, (vec *)&V[0 + 3 * colVStride]);
206 
207  vec vec_u_row = __vload_pred(pred_4_elem_u, (vec *)U);
208 
209  double normFactor = vec_u_row.s[1] * (*s);
210  vec vec_invNorm = (vec)getRecip(normFactor);
211 
212  vec vec_v_col = vec_u_row * vec_invNorm;
213 
214  vec vec_si = (vec)0.0;
215  vec_si += vec_u_row.s[1] * vec_in0;
216  vec_si += vec_u_row.s[2] * vec_in1;
217  vec_si += vec_u_row.s[3] * vec_in2;
218 
219  vec_in0 += vec_si * vec_v_col.s[1];
220  vec_in1 += vec_si * vec_v_col.s[2];
221  vec_in2 += vec_si * vec_v_col.s[3];
222 
223 
224  __vstore_pred(pred_4_elem, (vec *)&V[0 + 0 * colVStride], vec_top_row);
225  __vstore_pred(pred_4_elem, (vec *)&V[0 + 1 * colVStride], vec_in0);
226  __vstore_pred(pred_4_elem, (vec *)&V[0 + 2 * colVStride], vec_in1);
227  __vstore_pred(pred_4_elem, (vec *)&V[0 + 3 * colVStride], vec_in2);
228 
229  }
230  return;
231 }
232 
233 
234 template <typename dataType> static inline void v_process_4th_iter(dataType *U,
235  dataType *V,
236  const int colUStride,
237  const int colVStride,
238  dataType *s);
239 template <> inline void v_process_4th_iter<float>(float *U,
240  float *V,
241  const int colUStride,
242  const int colVStride,
243  float *s)
244 {
245  return;
246 }
247 template <> inline void v_process_4th_iter<double>(double *U,
248  double *V,
249  const int colUStride,
250  const int colVStride,
251  double *s)
252 {
253  /* -------------------------------------------------- */
254  /* For i = 4 */
255  /* -------------------------------------------------- */
256  {
257  typedef typename c7x::make_full_vector<double>::type vec;
258  __vpred pred_5_elem = __create_vpred(0x000000FFFFFFFFFFU);
259  __vpred pred_5_elem_u = __create_vpred(0x000000FFFFFFFF00U);
260 
261 
262  vec vec_top_row = (vec)0.0;
263  vec_top_row.s[0] = 1.0;
264 
265  vec vec_in0 = __vload_pred(pred_5_elem_u, (vec *)&V[0 + 1 * colVStride]);
266  vec vec_in1 = __vload_pred(pred_5_elem_u, (vec *)&V[0 + 2 * colVStride]);
267  vec vec_in2 = __vload_pred(pred_5_elem_u, (vec *)&V[0 + 3 * colVStride]);
268  vec vec_in3 = __vload_pred(pred_5_elem_u, (vec *)&V[0 + 4 * colVStride]);
269 
270  vec vec_u_row = __vload_pred(pred_5_elem_u, (vec *)U);
271 
272  double normFactor = vec_u_row.s[1] * (*s);
273  vec vec_invNorm = (vec)getRecip(normFactor);
274 
275  vec vec_v_col = vec_u_row * vec_invNorm;
276 
277  vec vec_si = (vec)0.0;
278  vec_si += vec_u_row.s[1] * vec_in0;
279  vec_si += vec_u_row.s[2] * vec_in1;
280  vec_si += vec_u_row.s[3] * vec_in2;
281  vec_si += vec_u_row.s[4] * vec_in3;
282 
283  vec_in0 += vec_si * vec_v_col.s[1];
284  vec_in1 += vec_si * vec_v_col.s[2];
285  vec_in2 += vec_si * vec_v_col.s[3];
286  vec_in3 += vec_si * vec_v_col.s[4];
287 
288 
289  __vstore_pred(pred_5_elem, (vec *)&V[0 + 0 * colVStride], vec_top_row);
290  __vstore_pred(pred_5_elem, (vec *)&V[0 + 1 * colVStride], vec_in0);
291  __vstore_pred(pred_5_elem, (vec *)&V[0 + 2 * colVStride], vec_in1);
292  __vstore_pred(pred_5_elem, (vec *)&V[0 + 3 * colVStride], vec_in2);
293  __vstore_pred(pred_5_elem, (vec *)&V[0 + 4 * colVStride], vec_in3);
294 
295  }
296  return;
297 }
298 
299 
300 
301 
302 template <typename dataType> static inline void v_process_5th_iter(dataType *U,
303  dataType *V,
304  const int colUStride,
305  const int colVStride,
306  dataType *s);
307 template <> inline void v_process_5th_iter<float>(float *U,
308  float *V,
309  const int colUStride,
310  const int colVStride,
311  float *s)
312 {
313  return;
314 }
315 template <> inline void v_process_5th_iter<double>(double *U,
316  double *V,
317  const int colUStride,
318  const int colVStride,
319  double *s)
320 {
321  /* -------------------------------------------------- */
322  /* For i = 5 */
323  /* -------------------------------------------------- */
324  {
325  typedef typename c7x::make_full_vector<double>::type vec;
326  __vpred pred_6_elem = __create_vpred(0x0000FFFFFFFFFFFFU);
327  __vpred pred_6_elem_u = __create_vpred(0x0000FFFFFFFFFF00U);
328 
329 
330  vec vec_top_row = (vec)0.0;
331  vec_top_row.s[0] = 1.0;
332 
333  vec vec_in0 = __vload_pred(pred_6_elem_u, (vec *)&V[0 + 1 * colVStride]);
334  vec vec_in1 = __vload_pred(pred_6_elem_u, (vec *)&V[0 + 2 * colVStride]);
335  vec vec_in2 = __vload_pred(pred_6_elem_u, (vec *)&V[0 + 3 * colVStride]);
336  vec vec_in3 = __vload_pred(pred_6_elem_u, (vec *)&V[0 + 4 * colVStride]);
337  vec vec_in4 = __vload_pred(pred_6_elem_u, (vec *)&V[0 + 5 * colVStride]);
338 
339  vec vec_u_row = __vload_pred(pred_6_elem_u, (vec *)U);
340 
341  double normFactor = vec_u_row.s[1] * (*s);
342  vec vec_invNorm = (vec)getRecip(normFactor);
343 
344  vec vec_v_col = vec_u_row * vec_invNorm;
345 
346  vec vec_si = (vec)0.0;
347  vec_si += vec_u_row.s[1] * vec_in0;
348  vec_si += vec_u_row.s[2] * vec_in1;
349  vec_si += vec_u_row.s[3] * vec_in2;
350  vec_si += vec_u_row.s[4] * vec_in3;
351  vec_si += vec_u_row.s[5] * vec_in4;
352 
353  vec_in0 += vec_si * vec_v_col.s[1];
354  vec_in1 += vec_si * vec_v_col.s[2];
355  vec_in2 += vec_si * vec_v_col.s[3];
356  vec_in3 += vec_si * vec_v_col.s[4];
357  vec_in4 += vec_si * vec_v_col.s[5];
358 
359 
360  __vstore_pred(pred_6_elem, (vec *)&V[0 + 0 * colVStride], vec_top_row);
361  __vstore_pred(pred_6_elem, (vec *)&V[0 + 1 * colVStride], vec_in0);
362  __vstore_pred(pred_6_elem, (vec *)&V[0 + 2 * colVStride], vec_in1);
363  __vstore_pred(pred_6_elem, (vec *)&V[0 + 3 * colVStride], vec_in2);
364  __vstore_pred(pred_6_elem, (vec *)&V[0 + 4 * colVStride], vec_in3);
365  __vstore_pred(pred_6_elem, (vec *)&V[0 + 5 * colVStride], vec_in4);
366 
367  }
368  return;
369 }
370 
371 
372 
373 
374 #endif /* #if (__C7X_VEC_SIZE_BITS__ == 512) */
375 
376 
377 /* ======================================================================== */
378 /* End of file: DSPLIB_svd_small_v_process.h */
379 /* ======================================================================== */
380 
dataType getRecip(dataType value)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.