DSPLIB User Guide
DSPLIB_qrd_solver_ci.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2 **+--------------------------------------------------------------------------+**
3 **| **** |**
4 **| **** |**
5 **| ******o*** |**
6 **| ********_///_**** |**
7 **| ***** /_//_/ **** |**
8 **| ** ** (__/ **** |**
9 **| ********* |**
10 **| **** |**
11 **| *** |**
12 **| |**
13 **| Copyright (c) 2016 Texas Instruments Incorporated |**
14 **| ALL RIGHTS RESERVED |**
15 **| |**
16 **| Permission to use, copy, modify, or distribute this software, |**
17 **| whether in part or in whole, for any purpose is forbidden without |**
18 **| a signed licensing agreement and NDA from Texas Instruments |**
19 **| Incorporated (TI). |**
20 **| |**
21 **| TI makes no representation or warranties with respect to the |**
22 **| performance of this computer program, and specifically disclaims |**
23 **| any responsibility for any damages, special or consequential, |**
24 **| connected with the use of this program. |**
25 **| |**
26 **+--------------------------------------------------------------------------+**
27 *******************************************************************************/
28 #include "DSPLIB_qrd_common.h"
29 #include "DSPLIB_qrd_solver_priv.h"
30 
31 #include <cstring>
32 
33 template <typename dataType> void DSPLIB_qrd_solver_y_init_ci(DSPLIB_kernelHandle handle)
34 {
35  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
36  DSPLIB_qrd_solver_PrivArgs *pKerPrivArgs = (DSPLIB_qrd_solver_PrivArgs *) handle;
37  uint8_t *pBlock = pKerPrivArgs->bufPblock;
38  int32_t nCols = pKerPrivArgs->widthR;
39  int32_t nRows = pKerPrivArgs->widthR;
40  int32_t strideQ = pKerPrivArgs->strideQ;
41  int32_t colStrideQ = strideQ / sizeof(dataType);
42 
43  typedef typename c7x::make_full_vector<dataType>::type vec;
44 
45  int32_t lenTile8 = 8;
46  uint32_t eleCount = c7x::element_count_of<vec>::value;
47  int32_t nTiles_8 = DSPLIB_ceilingDiv(nCols, (eleCount * lenTile8));
48 
49  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
50  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
51  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
52  __SE_ELEDUP SE_ELEDUP = c7x::se_eledup<dataType>::value;
53 
54  __SE_TEMPLATE_v1 seScalarParams = __gen_SE_TEMPLATE_v1();
55  __SE_TEMPLATE_v1 seMatrixParams = __gen_SE_TEMPLATE_v1();
56  __SA_TEMPLATE_v1 saMatrixParams = __gen_SA_TEMPLATE_v1();
57  __SA_TEMPLATE_v1 saRefParams = __gen_SA_TEMPLATE_v1();
58 
59  seScalarParams.DIM1 = 0;
60  seScalarParams.ELEDUP = SE_ELEDUP;
61  seScalarParams.DIMFMT = __SE_DIMFMT_2D;
62  seScalarParams.VECLEN = SE_VECLEN;
63  seScalarParams.ELETYPE = SE_ELETYPE;
64  seScalarParams.ICNT1 = nTiles_8;
65  seScalarParams.ICNT0 = nRows;
66 
67  seMatrixParams.ICNT0 = (eleCount * lenTile8);
68  seMatrixParams.DIM1 = colStrideQ;
69  seMatrixParams.DIM2 = (eleCount * lenTile8);
70  seMatrixParams.DIMFMT = __SE_DIMFMT_3D;
71  seMatrixParams.ELETYPE = SE_ELETYPE;
72  seMatrixParams.VECLEN = SE_VECLEN;
73  seMatrixParams.DECDIM1 = __SE_DECDIM_DIM2;
74  seMatrixParams.ICNT2 = nTiles_8;
75  seMatrixParams.DECDIM1_WIDTH = nCols;
76  seMatrixParams.ICNT1 = nRows;
77 
78  saMatrixParams.ICNT0 = (eleCount * lenTile8);
79  saMatrixParams.DIM1 = colStrideQ;
80  saMatrixParams.DIM2 = (eleCount * lenTile8);
81  saMatrixParams.DIMFMT = __SA_DIMFMT_3D;
82  saMatrixParams.VECLEN = SA_VECLEN;
83  saMatrixParams.DECDIM1 = __SA_DECDIM_DIM2;
84  saMatrixParams.ICNT2 = nTiles_8;
85  saMatrixParams.DECDIM1_WIDTH = nCols;
86  saMatrixParams.ICNT1 = nRows;
87 
88  saRefParams.ICNT0 = nCols;
89  saRefParams.DIMFMT = __SA_DIMFMT_1D;
90  saRefParams.VECLEN = SA_VECLEN;
91 
92  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE)) = seScalarParams;
93  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE)) = seMatrixParams;
94 
95  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE)) = saMatrixParams;
96  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE)) = saRefParams;
97 
98  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
99 }
102 
103 template <typename dataType>
104 void DSPLIB_qrd_solver_y_exec_ci(dataType *pLocalQ,
105  int32_t nCols,
106  int32_t nRows,
107  dataType *pLocalB,
108  dataType *pLocalY,
109  uint8_t *pBlock)
110 {
111  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
112  typedef typename c7x::make_full_vector<dataType>::type vec;
113 
114  __SE_TEMPLATE_v1 seScalarParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
115  __SE_TEMPLATE_v1 seMatrixParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
116  __SA_TEMPLATE_v1 saMatrixParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
117  __SA_TEMPLATE_v1 saRefParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
118 
119  uint32_t eleCount = c7x::element_count_of<vec>::value;
120 
121  int32_t lenTile8 = 8;
122  int32_t nTiles_8 = DSPLIB_ceilingDiv(nCols, (eleCount * lenTile8));
123 
124  __SE0_OPEN(pLocalB, seScalarParams);
125  __SE1_OPEN(pLocalQ, seMatrixParams);
126  __SA1_OPEN(saMatrixParams);
127  __SA0_OPEN(saRefParams);
128  __SA2_OPEN(saRefParams);
129 
130  // __SA2_OPEN(saRefStoreParams);
131  for (int32_t tile = 0; tile < nTiles_8; tile++) {
132  vec sV1 = (vec) 0;
133  vec sV2 = (vec) 0;
134  vec sV3 = (vec) 0;
135  vec sV4 = (vec) 0;
136  vec sV5 = (vec) 0;
137  vec sV6 = (vec) 0;
138  vec sV7 = (vec) 0;
139  vec sV8 = (vec) 0;
140  for (int32_t vertical = 0; vertical < nRows; vertical++) {
141  vec scalarDup = c7x::strm_eng<0, vec>::get_adv();
142 
143  vec v1 = c7x::strm_eng<1, vec>::get_adv();
144  vec v2 = c7x::strm_eng<1, vec>::get_adv();
145  vec v3 = c7x::strm_eng<1, vec>::get_adv();
146  vec v4 = c7x::strm_eng<1, vec>::get_adv();
147  vec v5 = c7x::strm_eng<1, vec>::get_adv();
148  vec v6 = c7x::strm_eng<1, vec>::get_adv();
149  vec v7 = c7x::strm_eng<1, vec>::get_adv();
150  vec v8 = c7x::strm_eng<1, vec>::get_adv();
151 
152  sV1 += v1 * scalarDup;
153  sV2 += v2 * scalarDup;
154  sV3 += v3 * scalarDup;
155  sV4 += v4 * scalarDup;
156  sV5 += v5 * scalarDup;
157  sV6 += v6 * scalarDup;
158  sV7 += v7 * scalarDup;
159  sV8 += v8 * scalarDup;
160  }
161 
162  __vpred lPred = c7x::strm_agen<2, vec>::get_vpred();
163  vec *psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
164  __vstore_pred(lPred, psV, sV1);
165 
166  lPred = c7x::strm_agen<2, vec>::get_vpred();
167  psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
168  __vstore_pred(lPred, psV, sV2);
169 
170  lPred = c7x::strm_agen<2, vec>::get_vpred();
171  psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
172  __vstore_pred(lPred, psV, sV3);
173 
174  lPred = c7x::strm_agen<2, vec>::get_vpred();
175  psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
176  __vstore_pred(lPred, psV, sV4);
177 
178  lPred = c7x::strm_agen<2, vec>::get_vpred();
179  psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
180  __vstore_pred(lPred, psV, sV5);
181 
182  lPred = c7x::strm_agen<2, vec>::get_vpred();
183  psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
184  __vstore_pred(lPred, psV, sV6);
185 
186  lPred = c7x::strm_agen<2, vec>::get_vpred();
187  psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
188  __vstore_pred(lPred, psV, sV7);
189 
190  lPred = c7x::strm_agen<2, vec>::get_vpred();
191  psV = c7x::strm_agen<2, vec>::get_adv(pLocalY);
192  __vstore_pred(lPred, psV, sV8);
193  }
194  __SE0_CLOSE();
195  __SA0_CLOSE();
196  __SE1_CLOSE();
197  __SA1_CLOSE();
198  __SA2_CLOSE();
199  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
200 }
201 template void DSPLIB_qrd_solver_y_exec_ci<float>(float *pLocalQ,
202  int32_t nCols,
203  int32_t nRows,
204  float *pLocalB,
205  float *pLocalY,
206  uint8_t *pBlock);
207 template void DSPLIB_qrd_solver_y_exec_ci<double>(double *pLocalQ,
208  int32_t nCols,
209  int32_t nRows,
210  double *pLocalB,
211  double *pLocalY,
212  uint8_t *pBlock);
213 
214 template <typename dataType> void DSPLIB_qrd_solver_backSubstitution_init_ci(DSPLIB_kernelHandle handle)
215 {
216  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
217  DSPLIB_qrd_solver_PrivArgs *pKerPrivArgs = (DSPLIB_qrd_solver_PrivArgs *) handle;
218  uint8_t *pBlock = pKerPrivArgs->bufPblock;
219  int32_t nRows = pKerPrivArgs->widthR;
220  int32_t strideR = pKerPrivArgs->strideR;
221  int32_t colRstride = strideR / sizeof(dataType);
222 
223  typedef typename c7x::make_full_vector<dataType>::type vec;
224  int32_t eleCount = c7x::element_count_of<vec>::value;
225 
226  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
227  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
228  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
229 
230  __SE_TEMPLATE_v1 seDivReadParams = __gen_SE_TEMPLATE_v1();
231  seDivReadParams.ICNT0 = nRows;
232  seDivReadParams.DIMFMT = __SE_DIMFMT_1D;
233  seDivReadParams.ELETYPE = SE_ELETYPE;
234  seDivReadParams.VECLEN = SE_VECLEN;
235 
236  __SE_TEMPLATE_v1 seBlockParams = __gen_SE_TEMPLATE_v1();
237  __SA_TEMPLATE_v1 saWriteXParams = __gen_SA_TEMPLATE_v1();
238  __SA_TEMPLATE_v1 saReverseParams = __gen_SA_TEMPLATE_v1();
239 
240  seBlockParams.ICNT0 = eleCount;
241  seBlockParams.DIM1 = -colRstride;
242  seBlockParams.DIMFMT = __SE_DIMFMT_2D;
243  seBlockParams.ELETYPE = SE_ELETYPE;
244  seBlockParams.VECLEN = SE_VECLEN;
245 
246  saWriteXParams.ICNT0 = 1;
247  saWriteXParams.ICNT1 = nRows;
248  saWriteXParams.DIM1 = -1;
249  saWriteXParams.DIMFMT = __SA_DIMFMT_2D;
250  saWriteXParams.VECLEN = SA_VECLEN;
251 
252  saReverseParams.ICNT0 = eleCount;
253  saReverseParams.DIM1 = -eleCount;
254  saReverseParams.DIMFMT = __SA_DIMFMT_2D;
255  saReverseParams.VECLEN = SA_VECLEN;
256 
257  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE)) = seDivReadParams;
258  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE)) = seBlockParams;
259  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE)) = saWriteXParams;
260  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (7 * SE_PARAM_SIZE)) = saReverseParams;
261 
262  __SE_TEMPLATE_v1 seDiagReadParams = __gen_SE_TEMPLATE_v1();
263  seDiagReadParams.ICNT0 = 1;
264  seDiagReadParams.ICNT1 = nRows;
265  seDiagReadParams.DIM1 = colRstride + 1;
266  seDiagReadParams.DIMFMT = __SE_DIMFMT_2D;
267  seDiagReadParams.ELETYPE = SE_ELETYPE;
268  seDiagReadParams.VECLEN = __SE_VECLEN_1ELEM;
269  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (8 * SE_PARAM_SIZE)) = seDiagReadParams;
270 
271  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
272 }
275 
276 template <typename dataType, typename V = typename c7x::make_full_vector<dataType>::type>
277 inline void getElement(V inVec, uint32_t index, dataType *element);
278 template <typename V> inline void getElement(V inVec, uint32_t index, float *element)
279 {
280  *element = __as_float(__vgetw_vrd(c7x::as_int_vec(inVec), index));
281 }
282 
283 template <typename V> inline void getElement(V inVec, uint32_t index, double *element)
284 {
285  *element = __as_double(__vgetd_vrd(c7x::as_long_vec(inVec), index));
286 }
287 
288 template <typename dataType>
290  dataType *pX,
291  dataType *pY,
292  dataType *pDiv,
293  int32_t nRows,
294  int32_t nCols,
295  int32_t colRstride,
296  uint8_t *pBlock)
297 {
298  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
299 
300  __SE_TEMPLATE_v1 seDivReadParams;
301  __SA_TEMPLATE_v1 saDivStoreParams;
302  __SE_TEMPLATE_v1 seDiagReadParams;
303  __SA_TEMPLATE_v1 saDiagWriteParams;
304  saDivStoreParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
305  seDivReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
306  saDiagWriteParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
307  seDiagReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (8 * SE_PARAM_SIZE));
308 
309  saDiagWriteParams.DIM1 = 1;
310 
311  typedef typename c7x::make_full_vector<dataType>::type vec;
312  int32_t eleCount = c7x::element_count_of<vec>::value;
313  int32_t nVec = DSPLIB_ceilingDiv(nRows, eleCount);
314 
315  /* Calculate reciprocals of Diagonal Elements */
316 
317  __SE1_OPEN(pR, seDiagReadParams);
318  __SA1_OPEN(saDiagWriteParams);
319  int32_t row = 0;
320 
321  for (row = 0; row < nRows; row++) {
322  vec vecDiag = c7x::strm_eng<1, vec>::get_adv();
323  __vpred predDiag = c7x::strm_agen<1, vec>::get_vpred();
324  vec *pStoreDiag = c7x::strm_agen<1, vec>::get_adv(pDiv);
325  __vstore_pred(predDiag, pStoreDiag, vecDiag);
326  }
327  __SE1_CLOSE();
328  __SA1_CLOSE();
329 
330  __SE0_OPEN(pDiv, seDivReadParams);
331  __SA0_OPEN(saDivStoreParams);
332  dataType TwoP0 = 2.0;
333  int32_t ii = 0;
334 
335  for (ii = 0; ii < nVec - 3; ii += 4) {
336  vec v1 = c7x::strm_eng<0, vec>::get_adv();
337  vec v2 = c7x::strm_eng<0, vec>::get_adv();
338  vec v3 = c7x::strm_eng<0, vec>::get_adv();
339  vec v4 = c7x::strm_eng<0, vec>::get_adv();
340 
341  vec yy1 = __recip(v1);
342  yy1 = yy1 * (TwoP0 - v1 * yy1);
343  yy1 = yy1 * (TwoP0 - v1 * yy1);
344 
345  vec yy2 = __recip(v2);
346  yy2 = yy2 * (TwoP0 - v2 * yy2);
347  yy2 = yy2 * (TwoP0 - v2 * yy2);
348 
349  vec yy3 = __recip(v3);
350  yy3 = yy3 * (TwoP0 - v3 * yy3);
351  yy3 = yy3 * (TwoP0 - v3 * yy3);
352 
353  vec yy4 = __recip(v4);
354  yy4 = yy4 * (TwoP0 - v4 * yy4);
355  yy4 = yy4 * (TwoP0 - v4 * yy4);
356 
357  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
358  vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
359  __vstore_pred(predDiv1, pStoreDiv1, yy1);
360 
361  __vpred predDiv2 = c7x::strm_agen<0, vec>::get_vpred();
362  vec *pStoreDiv2 = c7x::strm_agen<0, vec>::get_adv(pDiv);
363  __vstore_pred(predDiv2, pStoreDiv2, yy2);
364 
365  __vpred predDiv3 = c7x::strm_agen<0, vec>::get_vpred();
366  vec *pStoreDiv3 = c7x::strm_agen<0, vec>::get_adv(pDiv);
367  __vstore_pred(predDiv3, pStoreDiv3, yy3);
368 
369  __vpred predDiv4 = c7x::strm_agen<0, vec>::get_vpred();
370  vec *pStoreDiv4 = c7x::strm_agen<0, vec>::get_adv(pDiv);
371  __vstore_pred(predDiv4, pStoreDiv4, yy4);
372  }
373 
374  for (; ii < nVec; ii++) {
375  vec v1 = c7x::strm_eng<0, vec>::get_adv();
376 
377  vec yy1 = __recip(v1);
378  yy1 = yy1 * (TwoP0 - v1 * yy1);
379  yy1 = yy1 * (TwoP0 - v1 * yy1);
380 
381  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
382  vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
383  __vstore_pred(predDiv1, pStoreDiv1, yy1);
384  }
385 
386  __SE0_CLOSE();
387  __SA0_CLOSE();
388 
389  /* Back Substitution */
390  int32_t totalBlocks = nCols / eleCount;
391  int32_t remainingEle = nCols - (totalBlocks * eleCount);
392 
393  __SE_TEMPLATE_v1 seBlockParams;
394  __SE_TEMPLATE_v1 seReadXParams;
395  __SA_TEMPLATE_v1 saWriteXParams;
396  __SA_TEMPLATE_v1 saReverseParams;
397 
398  seReadXParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
399  seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
400  saWriteXParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
401  saReverseParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (7 * SE_PARAM_SIZE));
402 
403  seReadXParams.ICNT0 = 1;
404  seReadXParams.DIM1 = -1;
405  saReverseParams.ICNT1 = totalBlocks;
406 
407  dataType *pRLastElem = &pR[(nRows - 1) + ((nRows - 1) * colRstride)];
408  dataType *pXLastElem = &pX[nRows - 1];
409  dataType *pSE0 = pRLastElem - (eleCount - 1);
410  dataType *pSA1 = pX + nRows - 1;
411  dataType *pSA2 = pY + nRows - eleCount;
412  dataType *pSA3 = pDiv + nRows - eleCount;
413 
414  __SA1_OPEN(saWriteXParams);
415  if (totalBlocks) {
416  __SA2_OPEN(saReverseParams);
417  __SA3_OPEN(saReverseParams);
418  }
419 
420  for (int32_t block = 0; block < totalBlocks; block++) {
421  __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
422  vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pSA2);
423  vec vecY = __vload_pred(predY, pLoadY);
424 
425  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
426  vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
427  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
428 
429  int32_t sumRows = block * eleCount;
430  int32_t totalRows = sumRows + eleCount;
431 
432  seBlockParams.ICNT1 = totalRows;
433  seReadXParams.ICNT1 = sumRows;
434 
435  __SE0_OPEN(pSE0, seBlockParams);
436  if (sumRows > 0) {
437  __SE1_OPEN(pXLastElem, seReadXParams);
438  }
439 
440  /* Calculate sum */
441  vec vecSum = (vec) 0;
442  vec vecSum1 = (vec) 0;
443  vec vecSum2 = (vec) 0;
444  vec vecSum3 = (vec) 0;
445  vec vecSum4 = (vec) 0;
446  int32_t vertical = 0;
447 
448  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
449  vec v1 = c7x::strm_eng<0, vec>::get_adv();
450  vec x1 = c7x::strm_eng<1, vec>::get_adv();
451  vecSum1 += v1 * x1;
452 
453  vec v2 = c7x::strm_eng<0, vec>::get_adv();
454  vec x2 = c7x::strm_eng<1, vec>::get_adv();
455  vecSum2 += v2 * x2;
456 
457  vec v3 = c7x::strm_eng<0, vec>::get_adv();
458  vec x3 = c7x::strm_eng<1, vec>::get_adv();
459  vecSum3 += v3 * x3;
460 
461  vec v4 = c7x::strm_eng<0, vec>::get_adv();
462  vec x4 = c7x::strm_eng<1, vec>::get_adv();
463  vecSum4 += v4 * x4;
464  }
465 
466  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
467  /* Calculate values */
468  uint32_t vecIndex = eleCount - 1;
469  dataType resultEle1, resultEle2, resultEle3, resultEle4;
470 
471  for (vertical = 0; vertical < eleCount - 3; vertical += 4) {
472  vec v1 = c7x::strm_eng<0, vec>::get_adv();
473  vec result1 = (vecY - vecSum) * vecDiv;
474  getElement(result1, vecIndex--, &resultEle1);
475  vecSum += v1 * (resultEle1);
476  dataType *pStoreX1 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
477  *pStoreX1 = resultEle1;
478 
479  vec v2 = c7x::strm_eng<0, vec>::get_adv();
480  vec result2 = (vecY - vecSum) * vecDiv;
481  getElement(result2, vecIndex--, &resultEle2);
482  vecSum += v2 * (resultEle2);
483  dataType *pStoreX2 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
484  *pStoreX2 = resultEle2;
485 
486  vec v3 = c7x::strm_eng<0, vec>::get_adv();
487  vec result3 = (vecY - vecSum) * vecDiv;
488  getElement(result3, vecIndex--, &resultEle3);
489  vecSum += v3 * (resultEle3);
490  dataType *pStoreX3 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
491  *pStoreX3 = resultEle3;
492 
493  vec v4 = c7x::strm_eng<0, vec>::get_adv();
494  vec result4 = (vecY - vecSum) * vecDiv;
495  getElement(result4, vecIndex--, &resultEle4);
496  vecSum += v4 * (resultEle4);
497  dataType *pStoreX4 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
498  *pStoreX4 = resultEle4;
499  }
500 
501  pSE0 -= eleCount;
502  __SE0_CLOSE();
503  __SE1_CLOSE();
504  }
505  if (totalBlocks) {
506  __SA2_CLOSE();
507  __SA3_CLOSE();
508  }
509 
510  /* Processing Remaining Elements */
511  if (remainingEle > 0) {
512  seBlockParams.ICNT0 = remainingEle;
513  seBlockParams.ICNT1 = nRows;
514 
515  seReadXParams.ICNT1 = nRows;
516 
517  saReverseParams.ICNT0 = remainingEle;
518  saReverseParams.ICNT1 = 1;
519  saReverseParams.DIM1 = 0;
520 
521  pSE0 = &pR[(nRows - 1) * colRstride];
522  __SE0_OPEN(pSE0, seBlockParams);
523  __SE1_OPEN(pXLastElem, seReadXParams);
524  __SA2_OPEN(saReverseParams);
525  __SA3_OPEN(saReverseParams);
526 
527  int32_t sumRows = totalBlocks * eleCount;
528 
529  __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
530  vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pY);
531  vec vecY = __vload_pred(predY, pLoadY);
532 
533  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
534  vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pDiv);
535  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
536 
537  vec vecSum = (vec) 0;
538  vec vecSum1 = (vec) 0;
539  vec vecSum2 = (vec) 0;
540  vec vecSum3 = (vec) 0;
541  vec vecSum4 = (vec) 0;
542  int32_t vertical = 0;
543 
544  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
545  vec v1 = c7x::strm_eng<0, vec>::get_adv();
546  vec x1 = c7x::strm_eng<1, vec>::get_adv();
547  vecSum1 += v1 * x1;
548 
549  vec v2 = c7x::strm_eng<0, vec>::get_adv();
550  vec x2 = c7x::strm_eng<1, vec>::get_adv();
551  vecSum2 += v2 * x2;
552 
553  vec v3 = c7x::strm_eng<0, vec>::get_adv();
554  vec x3 = c7x::strm_eng<1, vec>::get_adv();
555  vecSum3 += v3 * x3;
556 
557  vec v4 = c7x::strm_eng<0, vec>::get_adv();
558  vec x4 = c7x::strm_eng<1, vec>::get_adv();
559  vecSum4 += v4 * x4;
560  }
561 
562  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
563 
564  int32_t vecIndex = remainingEle - 1;
565  dataType resultEle;
566 
567  for (vertical = 0; vertical < remainingEle; vertical++) {
568  vec v1 = c7x::strm_eng<0, vec>::get_adv();
569  vec result = (vecY - vecSum) * vecDiv;
570 
571  getElement(result, vecIndex--, &resultEle);
572  vecSum += v1 * (resultEle);
573 
574  dataType *pStoreX = c7x::strm_agen<1, dataType>::get_adv(pSA1);
575  *pStoreX = resultEle;
576  }
577 
578  __SE0_CLOSE();
579  __SE1_CLOSE();
580  __SA2_CLOSE();
581  __SA3_CLOSE();
582  }
583  __SA1_CLOSE();
584  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
585 }
587  float *pX,
588  float *pY,
589  float *pDiv,
590  int32_t nRows,
591  int32_t nCols,
592  int32_t colRstride,
593  uint8_t *pBlock);
595  double *pX,
596  double *pY,
597  double *pDiv,
598  int32_t nRows,
599  int32_t nCols,
600  int32_t colRstride,
601  uint8_t *pBlock);
602 
603 template <typename dataType>
605  DSPLIB_bufParams2D_t *bufParamsQ,
606  DSPLIB_bufParams2D_t *bufParamsR,
607  DSPLIB_bufParams1D_t *bufParamsB,
608  DSPLIB_bufParams1D_t *bufParamsY,
609  DSPLIB_bufParams1D_t *bufParamsX,
610  const DSPLIB_qrdSolverInitArgs *pKerInitArgs)
611 {
612  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
613 
614  DSPLIB_qrd_solver_PrivArgs *pKerPrivArgs = (DSPLIB_qrd_solver_PrivArgs *) handle;
615  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs = &pKerPrivArgs->pMatTransKerPrivArgs;
616 
617  DSPLIB_matTransInitArgs kerInitArgsMatTrans;
618 
619  kerInitArgsMatTrans.funcStyle = pKerInitArgs->funcStyle;
620  kerInitArgsMatTrans.dimX = bufParamsR->dim_x;
621  kerInitArgsMatTrans.dimY = bufParamsR->dim_y;
622 
623  pMatTransKerPrivArgs->widthIn = bufParamsR->dim_x;
624  pMatTransKerPrivArgs->heightIn = bufParamsR->dim_y;
625  pMatTransKerPrivArgs->strideIn = bufParamsR->stride_y;
626  pMatTransKerPrivArgs->strideOut = bufParamsR->stride_y;
627 
628  DSPLIB_qrd_solver_y_init_ci<dataType>(handle);
629  DSPLIB_matTrans_init_ci<dataType>(pMatTransKerPrivArgs, bufParamsR, bufParamsR, &kerInitArgsMatTrans);
630  DSPLIB_qrd_solver_backSubstitution_init_ci<dataType>(handle);
631 
632  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
633  return DSPLIB_SUCCESS;
634 }
636  DSPLIB_bufParams2D_t *bufParamsQ,
637  DSPLIB_bufParams2D_t *bufParamsR,
638  DSPLIB_bufParams1D_t *bufParamsB,
639  DSPLIB_bufParams1D_t *bufParamsY,
640  DSPLIB_bufParams1D_t *bufParamsX,
641  const DSPLIB_qrdSolverInitArgs *pKerInitArgs);
642 
644  DSPLIB_bufParams2D_t *bufParamsQ,
645  DSPLIB_bufParams2D_t *bufParamsR,
646  DSPLIB_bufParams1D_t *bufParamsB,
647  DSPLIB_bufParams1D_t *bufParamsY,
648  DSPLIB_bufParams1D_t *bufParamsX,
649  const DSPLIB_qrdSolverInitArgs *pKerInitArgs);
650 
651 template <typename dataType>
653  void *restrict pQ,
654  void *restrict pR,
655  void *restrict pB,
656  void *restrict pY,
657  void *restrict pX,
658  void *restrict pD,
659  void *restrict pR1)
660 {
661  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
662 
663  DSPLIB_STATUS status = DSPLIB_SUCCESS;
664 
665  DSPLIB_qrd_solver_PrivArgs *pKerPrivArgs = (DSPLIB_qrd_solver_PrivArgs *) handle;
666  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs = &pKerPrivArgs->pMatTransKerPrivArgs;
667  int32_t nRows = pKerPrivArgs->heightR;
668  int32_t nCols = pKerPrivArgs->widthR;
669  int32_t strideR = pKerPrivArgs->strideR;
670  int32_t dataSize = sizeof(dataType);
671  uint8_t *pBlock = pKerPrivArgs->bufPblock;
672  int32_t loopCnt;
673 
674  /* Typecast void pointers to respective data type */
675  dataType *pLocalQ = (dataType *) pQ;
676  dataType *pLocalR = (dataType *) pR;
677  dataType *pLocalB = (dataType *) pB;
678  dataType *pLocalY = (dataType *) pY;
679  dataType *pLocalX = (dataType *) pX;
680  dataType *pLocalD = (dataType *) pD;
681  dataType *pLocalR1 = (dataType *) pR1;
682 
683  int32_t colRstride = strideR / dataSize;
684  DSPLIB_DEBUGPRINTFN(0, "pLocalQ: %p pLocalR: %p pLocalB: %p pLocalY: %p pLocalX: %p nCols: %d nRows: %d\n", pLocalQ,
685  pLocalR, pLocalB, pLocalY, pLocalX, nCols, nRows);
686 
687  /* ------------------------------------------------------------------- */
688  /* Write each column of 'pLocal' to a row of 'pLocalX'. */
689  /* ------------------------------------------------------------------- */
690 
691  DSPLIB_qrd_solver_y_exec_ci<dataType>(pLocalQ, nCols, nRows, pLocalB, pLocalY, pBlock);
692 
693  /* use backward substitution to solve x=inv(R)*y */
694  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs, pLocalR, pLocalR1);
695 
696  memset(pLocalX, 0, sizeof(dataType) * nCols);
697  loopCnt = nCols;
698  DSPLIB_qrd_solver_backSubstitution_ci<dataType>(pLocalR1, pLocalX, pLocalY, pLocalD, loopCnt, loopCnt, colRstride,
699  pBlock);
700 
701  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", status);
702 
703  return (status);
704 }
705 // explicit instantiation for the different data type versions
707  void *restrict pQ,
708  void *restrict pR,
709  void *restrict pB,
710  void *restrict pY,
711  void *restrict pX,
712  void *restrict pD,
713  void *restrict pR1);
714 
716  void *restrict pQ,
717  void *restrict pR,
718  void *restrict pB,
719  void *restrict pY,
720  void *restrict pX,
721  void *restrict pD,
722  void *restrict pR1);
723 /* ======================================================================== */
724 /* End of file: DSPLIB_qrd_solver_ci.cpp */
725 /* ======================================================================== */
template void DSPLIB_qrd_solver_y_exec_ci< double >(double *pLocalQ, int32_t nCols, int32_t nRows, double *pLocalB, double *pLocalY, uint8_t *pBlock)
template void DSPLIB_qrd_solver_y_init_ci< double >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_qrd_solver_init_ci< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_qrdSolverInitArgs *pKerInitArgs)
template void DSPLIB_qrd_solver_y_exec_ci< float >(float *pLocalQ, int32_t nCols, int32_t nRows, float *pLocalB, float *pLocalY, uint8_t *pBlock)
void getElement(V inVec, uint32_t index, dataType *element)
DSPLIB_STATUS DSPLIB_qrd_solver_exec_ci(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pB, void *restrict pY, void *restrict pX, void *restrict pD, void *restrict pR1)
This function is the main execution function for the C7x implementation of the kernel....
template void DSPLIB_qrd_solver_backSubstitution_ci< float >(float *pR, float *pX, float *pY, float *pDiv, int32_t nRows, int32_t nCols, int32_t colRstride, uint8_t *pBlock)
void DSPLIB_qrd_solver_backSubstitution_init_ci(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_qrd_solver_init_ci< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_qrdSolverInitArgs *pKerInitArgs)
template void DSPLIB_qrd_solver_y_init_ci< float >(DSPLIB_kernelHandle handle)
void DSPLIB_qrd_solver_y_init_ci(DSPLIB_kernelHandle handle)
DSPLIB_STATUS DSPLIB_qrd_solver_init_ci(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_qrdSolverInitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_qrd_solver_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pB, void *restrict pY, void *restrict pX, void *restrict pD, void *restrict pR1)
template void DSPLIB_qrd_solver_backSubstitution_ci< double >(double *pR, double *pX, double *pY, double *pDiv, int32_t nRows, int32_t nCols, int32_t colRstride, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_qrd_solver_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pB, void *restrict pY, void *restrict pX, void *restrict pD, void *restrict pR1)
void DSPLIB_qrd_solver_backSubstitution_ci(dataType *pR, dataType *pX, dataType *pY, dataType *pDiv, int32_t nRows, int32_t nCols, int32_t colRstride, uint8_t *pBlock)
void DSPLIB_qrd_solver_y_exec_ci(dataType *pLocalQ, int32_t nCols, int32_t nRows, dataType *pLocalB, dataType *pLocalY, uint8_t *pBlock)
template void DSPLIB_qrd_solver_backSubstitution_init_ci< float >(DSPLIB_kernelHandle handle)
template void DSPLIB_qrd_solver_backSubstitution_init_ci< double >(DSPLIB_kernelHandle handle)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_qrd_solver.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
uint32_t dim_x
Width of buffer in X dimension in elements.
uint32_t dim_y
Height of buffer in Y dimension in elements.
Structure containing the parameters to initialize the kernel.
uint32_t dimX
Size of input data.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideOut
Stride between rows of output data matrix
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...
Structure containing the parameters to initialize the kernel.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_QRD_SOLVER_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
int32_t strideR
Stride between rows of R output data matrix
int32_t strideQ
Stride between rows of Q output data matrix
uint32_t heightR
Height of input data matrix
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs
Privargs for the matTrans kernel.
uint32_t widthR
Size of input buffer for different batches DSPLIB_qrd_solver_init that will be retrieved and used by ...