DSPLIB User Guide
DSPLIB_cholesky_solver_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date 10/2/22 Author: Asheesh Bhardwaj
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "../common/c71/DSPLIB_inlines.h"
49 
50 /*******************************************************************************
51  *
52  * INITIALIZATION
53  *
54  ******************************************************************************/
55 
57 {
58  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
60  uint8_t *pBlock = pKerPrivArgs->bufPblock;
61  int32_t order = pKerPrivArgs->order;
62  int32_t strideL = pKerPrivArgs->stride;
63  int32_t colLstride = strideL / sizeof(dataType);
64 
65  typedef typename c7x::make_full_vector<dataType>::type vec;
66  uint32_t eleCount = c7x::element_count_of<vec>::value;
67 
68  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
69  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
70  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
71  __SE_ELEDUP SE_ELEDUP = c7x::se_eledup<dataType>::value;
72 
73  __SE_TEMPLATE_v1 seDiagReadParams = __gen_SE_TEMPLATE_v1();
74  seDiagReadParams.ICNT0 = 1;
75  seDiagReadParams.ICNT1 = order;
76  seDiagReadParams.DIM1 = colLstride + 1;
77  seDiagReadParams.DIMFMT = __SE_DIMFMT_2D;
78  seDiagReadParams.ELETYPE = SE_ELETYPE;
79  seDiagReadParams.VECLEN = __SE_VECLEN_1ELEM;
80  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE)) = seDiagReadParams;
81 
82  __SA_TEMPLATE_v1 saWriteXParams = __gen_SA_TEMPLATE_v1();
83  saWriteXParams.ICNT0 = 1;
84  saWriteXParams.ICNT1 = order;
85  saWriteXParams.DIM1 = -1;
86  saWriteXParams.DIMFMT = __SA_DIMFMT_2D;
87  saWriteXParams.VECLEN = SA_VECLEN;
88  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE)) = saWriteXParams;
89 
90  __SE_TEMPLATE_v1 seDivReadParams = __gen_SE_TEMPLATE_v1();
91  seDivReadParams.ICNT0 = order;
92  seDivReadParams.DIMFMT = __SE_DIMFMT_1D;
93  seDivReadParams.ELETYPE = SE_ELETYPE;
94  seDivReadParams.VECLEN = SE_VECLEN;
95  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE)) = seDivReadParams;
96 
97  __SA_TEMPLATE_v1 saDivStoreParams = __gen_SA_TEMPLATE_v1();
98  saDivStoreParams.ICNT0 = order;
99  saDivStoreParams.DIMFMT = __SA_DIMFMT_1D;
100  saDivStoreParams.VECLEN = SA_VECLEN;
101  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE)) = saDivStoreParams;
102 
103  __SE_TEMPLATE_v1 seReadXParams = __gen_SE_TEMPLATE_v1();
104  seReadXParams.ICNT0 = 1;
105  seReadXParams.DIM1 = -1;
106  seReadXParams.DIMFMT = __SE_DIMFMT_2D;
107  seReadXParams.VECLEN = SE_VECLEN;
108  seReadXParams.ELETYPE = SE_ELETYPE;
109  seReadXParams.ELEDUP = SE_ELEDUP;
110  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE)) = seReadXParams;
111 
112  __SE_TEMPLATE_v1 seBlockParams = __gen_SE_TEMPLATE_v1();
113  seBlockParams.ICNT0 = eleCount;
114  seBlockParams.DIM1 = -colLstride;
115  seBlockParams.DIMFMT = __SE_DIMFMT_2D;
116  seBlockParams.ELETYPE = SE_ELETYPE;
117  seBlockParams.VECLEN = SE_VECLEN;
118  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE)) = seBlockParams;
119 
120  __SA_TEMPLATE_v1 saReverseParams = __gen_SA_TEMPLATE_v1();
121  saReverseParams.ICNT0 = eleCount;
122  saReverseParams.DIM1 = -((int32_t)eleCount);
123  saReverseParams.DIMFMT = __SA_DIMFMT_2D;
124  saReverseParams.VECLEN = SA_VECLEN;
125  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE)) = saReverseParams;
126 
127  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
128 }
131 
132 template <typename dataType>
134  DSPLIB_bufParams2D_t *bufParamsU,
135  DSPLIB_bufParams2D_t *bufParamsScratch,
136  DSPLIB_bufParams1D_t *bufParamsY,
137  DSPLIB_bufParams1D_t *bufParamsB,
138  DSPLIB_bufParams1D_t *bufParamsX,
139  DSPLIB_bufParams1D_t *bufParamsDiv,
140  const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs)
141 {
142  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
143  DSPLIB_STATUS status = DSPLIB_SUCCESS;
145  DSPLIB_matTrans_PrivArgs *matTransPrivArgs = &pKerPrivArgs->matTransPrivArgs;
146  DSPLIB_matTransInitArgs matTransInitArgs;
147 
148 
149  DSPLIB_cholesky_solver_backSubstitution_init_ci<dataType>(handle);
150 
151  matTransInitArgs.dimX = pKerPrivArgs->order;
152  matTransInitArgs.dimY = pKerPrivArgs->order;
153  matTransInitArgs.funcStyle = DSPLIB_FUNCTION_OPTIMIZED;
154 
155  matTransPrivArgs->widthIn = bufParamsU->dim_x;
156  matTransPrivArgs->heightIn = bufParamsU->dim_y;
157  matTransPrivArgs->strideIn = bufParamsU->stride_y;
158  matTransPrivArgs->strideOut = bufParamsScratch->stride_y;
159 
160  DSPLIB_matTrans_init_ci<dataType>(matTransPrivArgs, bufParamsU, bufParamsScratch, &matTransInitArgs);
161 
162  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", status);
163 
164  return status;
165 }
166 
168  DSPLIB_bufParams2D_t *bufParamsU,
169  DSPLIB_bufParams2D_t *bufParamsScratch,
170  DSPLIB_bufParams1D_t *bufParamsY,
171  DSPLIB_bufParams1D_t *bufParamsB,
172  DSPLIB_bufParams1D_t *bufParamsX,
173  DSPLIB_bufParams1D_t *bufParamsDiv,
174  const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs);
175 
177  DSPLIB_bufParams2D_t *bufParamsU,
178  DSPLIB_bufParams2D_t *bufParamsScratch,
179  DSPLIB_bufParams1D_t *bufParamsY,
180  DSPLIB_bufParams1D_t *bufParamsB,
181  DSPLIB_bufParams1D_t *bufParamsX,
182  DSPLIB_bufParams1D_t *bufParamsDiv,
183  const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs);
184 
185 /*******************************************************************************
186  *
187  * IMPLEMENTATION
188  *
189  ******************************************************************************/
190 
191 template <typename dataType, typename V = typename c7x::make_full_vector<dataType>::type>
192 inline void getElement(V inVec, uint32_t index, dataType *element);
193 template <typename V> inline void getElement(V inVec, uint32_t index, float *element)
194 {
195  *element = __as_float(__vgetw_vrd(c7x::as_int_vec(inVec), index));
196 }
197 
198 template <typename V> inline void getElement(V inVec, uint32_t index, double *element)
199 {
200  *element = __as_double(__vgetd_vrd(c7x::as_long_vec(inVec), index));
201 }
202 
203 template <typename dataType>
204 static inline void DSPLIB_cholesky_solver_forwardSubstitution_ci(dataType *pL,
205  dataType *pY,
206  dataType *pB,
207  dataType *pDiv,
208  int32_t order,
209  int32_t colLstride,
210  uint8_t *pBlock)
211 {
212  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
213 
214  __SE_TEMPLATE_v1 seDivReadParams;
215  __SA_TEMPLATE_v1 saDivStoreParams;
216  __SE_TEMPLATE_v1 seDiagReadParams;
217  __SA_TEMPLATE_v1 saDiagWriteParams;
218  seDiagReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
219  saDiagWriteParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
220  seDivReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
221  saDivStoreParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
222 
223  saDiagWriteParams.DIM1 = 1;
224 
225  typedef typename c7x::make_full_vector<dataType>::type vec;
226  int32_t eleCount = c7x::element_count_of<vec>::value;
227  int32_t nVec = DSPLIB_ceilingDiv(order, eleCount);
228 
229  /* Calculate reciprocals of Diagonal Elements */
230  __SE1_OPEN(pL, seDiagReadParams);
231  __SA1_OPEN(saDiagWriteParams);
232  int32_t row = 0;
233 
234  for (row = 0; row < order; row++) {
235  vec vecDiag = c7x::strm_eng<1, vec>::get_adv();
236 
237  __vpred predDiag = c7x::strm_agen<1, vec>::get_vpred();
238  vec *pStoreDiag = c7x::strm_agen<1, vec>::get_adv(pDiv);
239  __vstore_pred(predDiag, pStoreDiag, vecDiag);
240  }
241  __SE1_CLOSE();
242  __SA1_CLOSE();
243 
244  __SE0_OPEN(pDiv, seDivReadParams);
245  __SA0_OPEN(saDivStoreParams);
246  dataType TwoP0 = 2.0;
247  int32_t ii = 0;
248 
249  for (ii = 0; ii < nVec - 3; ii += 4) {
250  vec v1 = c7x::strm_eng<0, vec>::get_adv();
251  vec v2 = c7x::strm_eng<0, vec>::get_adv();
252  vec v3 = c7x::strm_eng<0, vec>::get_adv();
253  vec v4 = c7x::strm_eng<0, vec>::get_adv();
254 
255  vec yy1 = __recip(v1);
256  yy1 = yy1 * (TwoP0 - v1 * yy1);
257  yy1 = yy1 * (TwoP0 - v1 * yy1);
258 
259  vec yy2 = __recip(v2);
260  yy2 = yy2 * (TwoP0 - v2 * yy2);
261  yy2 = yy2 * (TwoP0 - v2 * yy2);
262 
263  vec yy3 = __recip(v3);
264  yy3 = yy3 * (TwoP0 - v3 * yy3);
265  yy3 = yy3 * (TwoP0 - v3 * yy3);
266 
267  vec yy4 = __recip(v4);
268  yy4 = yy4 * (TwoP0 - v4 * yy4);
269  yy4 = yy4 * (TwoP0 - v4 * yy4);
270 
271  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
272  vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
273  __vstore_pred(predDiv1, pStoreDiv1, yy1);
274 
275  __vpred predDiv2 = c7x::strm_agen<0, vec>::get_vpred();
276  vec *pStoreDiv2 = c7x::strm_agen<0, vec>::get_adv(pDiv);
277  __vstore_pred(predDiv2, pStoreDiv2, yy2);
278 
279  __vpred predDiv3 = c7x::strm_agen<0, vec>::get_vpred();
280  vec *pStoreDiv3 = c7x::strm_agen<0, vec>::get_adv(pDiv);
281  __vstore_pred(predDiv3, pStoreDiv3, yy3);
282 
283  __vpred predDiv4 = c7x::strm_agen<0, vec>::get_vpred();
284  vec *pStoreDiv4 = c7x::strm_agen<0, vec>::get_adv(pDiv);
285  __vstore_pred(predDiv4, pStoreDiv4, yy4);
286  }
287 
288 
289  for (; ii < nVec; ii++) {
290  vec v1 = c7x::strm_eng<0, vec>::get_adv();
291 
292  vec yy1 = __recip(v1);
293  yy1 = yy1 * (TwoP0 - v1 * yy1);
294  yy1 = yy1 * (TwoP0 - v1 * yy1);
295 
296  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
297  vec *pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
298  __vstore_pred(predDiv1, pStoreDiv1, yy1);
299  }
300 
301  __SE0_CLOSE();
302  __SA0_CLOSE();
303 
304  /*----------------------------------------------------------------------
305  Forward Substitution
306  -----------------------------------------------------------------------*/
307 
308  __SE_TEMPLATE_v1 seBlockParams;
309  __SE_TEMPLATE_v1 seReadYParams;
310  __SA_TEMPLATE_v1 saWriteYParams;
311  __SA_TEMPLATE_v1 sa1DReadParams;
312 
313  saWriteYParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
314  seReadYParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
315  seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
316  sa1DReadParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
317 
318  saWriteYParams.DIM1 = 1;
319  seReadYParams.DIM1 = 1;
320  seBlockParams.DIM1 = colLstride;
321 
322  dataType *pSE0 = pL;
323  dataType *pSA1 = pY;
324  dataType *pSA2 = pB;
325  dataType *pSA3 = pDiv;
326 
327  __SA1_OPEN(saWriteYParams);
328  __SA2_OPEN(sa1DReadParams);
329  __SA3_OPEN(sa1DReadParams);
330 
331 
332  for (int32_t block = 0; block < nVec; block++) { /* excl - FL: 20 DBL: 20*/
333 
334  __vpred predB = c7x::strm_agen<2, vec>::get_vpred();
335  vec *pLoadB = c7x::strm_agen<2, vec>::get_adv(pSA2);
336  vec vecB = __vload_pred(predB, pLoadB);
337 
338  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
339  vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
340  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
341 
342  int32_t sumRows = block * eleCount;
343  int32_t totalRows = sumRows + eleCount;
344 
345  seBlockParams.ICNT1 = totalRows;
346  seReadYParams.ICNT1 = sumRows;
347 
348  __SE0_OPEN(pSE0, seBlockParams);
349  if (sumRows > 0) {
350  __SE1_OPEN(pY, seReadYParams);
351  }
352 
353  /* Calculate sum */
354  vec vecSum = (vec) 0;
355  vec vecSum1 = (vec) 0;
356  vec vecSum2 = (vec) 0;
357  vec vecSum3 = (vec) 0;
358  vec vecSum4 = (vec) 0;
359  int32_t vertical = 0;
360  /* FL - 7 + rip_cnt * 4, DBL - 7 + trip_cnt * 4*/
361  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
362  vec v1 = c7x::strm_eng<0, vec>::get_adv();
363  vec y1 = c7x::strm_eng<1, vec>::get_adv();
364  vecSum1 += v1 * y1;
365 
366  vec v2 = c7x::strm_eng<0, vec>::get_adv();
367  vec y2 = c7x::strm_eng<1, vec>::get_adv();
368  vecSum2 += v2 * y2;
369 
370  vec v3 = c7x::strm_eng<0, vec>::get_adv();
371  vec y3 = c7x::strm_eng<1, vec>::get_adv();
372  vecSum3 += v3 * y3;
373 
374  vec v4 = c7x::strm_eng<0, vec>::get_adv();
375  vec y4 = c7x::strm_eng<1, vec>::get_adv();
376  vecSum4 += v4 * y4;
377  }
378 
379  /* excl - FL: 11 DBL: 11*/
380  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
381 
382  /* Calculate values */
383  dataType resultEle1; /* FL - 377, DBL - 169 */
384  for (vertical = 0; vertical < eleCount; vertical++) {
385  vec v1 = c7x::strm_eng<0, vec>::get_adv();
386  vec result1 = (vecB - vecSum) * vecDiv;
387  getElement(result1, vertical, &resultEle1);
388  vecSum += v1 * (resultEle1);
389  __vpred predYCalc = c7x::strm_agen<1, vec>::get_vpred();
390  vec *pStoreY = c7x::strm_agen<1, vec>::get_adv(pSA1);
391  __vstore_pred(predYCalc, pStoreY, (vec) resultEle1);
392 
393  }
394  /* excl - FL: 22 DBL: 22*/
395  pSE0 += eleCount;
396  __SE0_CLOSE();
397  __SE1_CLOSE();
398  }
399  __SA2_CLOSE();
400  __SA3_CLOSE();
401  __SA1_CLOSE();
402  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
403 }
405  float *pX,
406  float *pY,
407  float *pDiv,
408  int32_t order,
409  int32_t colLstride,
410  uint8_t *pBlock);
412  double *pX,
413  double *pY,
414  double *pDiv,
415  int32_t order,
416  int32_t colLstride,
417  uint8_t *pBlock);
418 
419 template <typename dataType>
420 static inline void DSPLIB_cholesky_solver_backSubstitution_ci(dataType *pL,
421  dataType *pX,
422  dataType *pY,
423  dataType *pDiv,
424  int32_t order,
425  int32_t colLstride,
426  uint8_t *pBlock)
427 {
428  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
429 
430  typedef typename c7x::make_full_vector<dataType>::type vec;
431  int32_t eleCount = c7x::element_count_of<vec>::value;
432 
433  /* Calculate reciprocals of Diagonal Elements */
434 
435  int32_t totalBlocks = order / eleCount;
436  int32_t remainingEle = order - (totalBlocks * eleCount);
437 
438  __SE_TEMPLATE_v1 seBlockParams;
439  __SE_TEMPLATE_v1 seReadXParams;
440  __SA_TEMPLATE_v1 saWriteXParams;
441  __SA_TEMPLATE_v1 saReverseParams;
442 
443  saWriteXParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
444  seReadXParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
445  seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
446  saReverseParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
447 
448  seReadXParams.ICNT0 = 1;
449  seReadXParams.DIM1 = -1;
450  saReverseParams.ICNT1 = totalBlocks;
451 
452  dataType *pLLastElem = &pL[(order - 1) + ((order - 1) * colLstride)];
453  dataType *pXLastElem = &pX[order - 1];
454  dataType *pSE0 = pLLastElem - (eleCount - 1);
455  dataType *pSA1 = pX + order - 1;
456  dataType *pSA2 = pY + order - eleCount;
457  dataType *pSA3 = pDiv + order - eleCount;
458 
459  __SA1_OPEN(saWriteXParams);
460 
461  if (totalBlocks > 0) {
462  __SA2_OPEN(saReverseParams);
463  __SA3_OPEN(saReverseParams);
464 
465 
466  for (int32_t block = 0; block < totalBlocks; block++) { /* Excl cyc FL: 20 DBL- 20*/
467  __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
468  vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pSA2);
469  vec vecY = __vload_pred(predY, pLoadY);
470 
471  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
472  vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
473  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
474 
475  int32_t sumRows = block * eleCount;
476  int32_t totalRows = sumRows + eleCount;
477 
478  seBlockParams.ICNT1 = totalRows;
479  seReadXParams.ICNT1 = sumRows;
480 
481  __SE0_OPEN(pSE0, seBlockParams);
482  if (sumRows > 0) {
483  __SE1_OPEN(pXLastElem, seReadXParams);
484  }
485 
486  /* Calculate sum */
487  vec vecSum = (vec) 0;
488  vec vecSum1 = (vec) 0;
489  vec vecSum2 = (vec) 0;
490  vec vecSum3 = (vec) 0;
491  vec vecSum4 = (vec) 0;
492  int32_t vertical = 0;
493 
494  /* FL: 7 + trip_cnt * 4 DBL: 7 + trip_cnt * 4*/
495  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
496  vec v1 = c7x::strm_eng<0, vec>::get_adv();
497  vec x1 = c7x::strm_eng<1, vec>::get_adv();
498  vecSum1 += v1 * x1;
499 
500  vec v2 = c7x::strm_eng<0, vec>::get_adv();
501  vec x2 = c7x::strm_eng<1, vec>::get_adv();
502  vecSum2 += v2 * x2;
503 
504  vec v3 = c7x::strm_eng<0, vec>::get_adv();
505  vec x3 = c7x::strm_eng<1, vec>::get_adv();
506  vecSum3 += v3 * x3;
507 
508  vec v4 = c7x::strm_eng<0, vec>::get_adv();
509  vec x4 = c7x::strm_eng<1, vec>::get_adv();
510  vecSum4 += v4 * x4;
511  }
512 
513  /* Excl cyc FL: 11 DBL: 11 */
514  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
515 
516  /* Calculate values */
517  uint32_t vecIndex = eleCount - 1;
518  dataType resultEle1;
519  /* FL: 337 DBL: 169*/
520  for (vertical = 0; vertical < eleCount; vertical++) {
521  vec v1 = c7x::strm_eng<0, vec>::get_adv();
522  vec result1 = (vecY - vecSum) * vecDiv;
523  getElement(result1, vecIndex--, &resultEle1);
524  vecSum += v1 * (resultEle1);
525  dataType *pStoreX1 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
526  *pStoreX1 = resultEle1;
527  }
528  /* 22 */
529  pSE0 -= eleCount;
530  __SE0_CLOSE();
531  __SE1_CLOSE();
532  }
533  __SA2_CLOSE();
534  __SA3_CLOSE();
535 
536 
537  }
538  /* Processing Remaining Elements */
539  if (remainingEle > 0) {
540  seBlockParams.ICNT0 = remainingEle;
541  seBlockParams.ICNT1 = order;
542 
543  seReadXParams.ICNT1 = order;
544 
545  saReverseParams.ICNT0 = remainingEle;
546  saReverseParams.ICNT1 = 1;
547  saReverseParams.DIM1 = 0;
548 
549  pSE0 = &pL[(order - 1) * colLstride];
550  __SE0_OPEN(pSE0, seBlockParams);
551  __SE1_OPEN(pXLastElem, seReadXParams);
552  __SA2_OPEN(saReverseParams);
553  __SA3_OPEN(saReverseParams);
554 
555  int32_t sumRows = totalBlocks * eleCount;
556 
557  __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
558  vec *pLoadY = c7x::strm_agen<2, vec>::get_adv(pY);
559  vec vecY = __vload_pred(predY, pLoadY);
560 
561  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
562  vec *pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pDiv);
563  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
564 
565  vec vecSum = (vec) 0;
566  vec vecSum1 = (vec) 0;
567  vec vecSum2 = (vec) 0;
568  vec vecSum3 = (vec) 0;
569  vec vecSum4 = (vec) 0;
570  int32_t vertical = 0;
571 
572  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
573  vec v1 = c7x::strm_eng<0, vec>::get_adv();
574  vec x1 = c7x::strm_eng<1, vec>::get_adv();
575  vecSum1 += v1 * x1;
576 
577  vec v2 = c7x::strm_eng<0, vec>::get_adv();
578  vec x2 = c7x::strm_eng<1, vec>::get_adv();
579  vecSum2 += v2 * x2;
580 
581  vec v3 = c7x::strm_eng<0, vec>::get_adv();
582  vec x3 = c7x::strm_eng<1, vec>::get_adv();
583  vecSum3 += v3 * x3;
584 
585  vec v4 = c7x::strm_eng<0, vec>::get_adv();
586  vec x4 = c7x::strm_eng<1, vec>::get_adv();
587  vecSum4 += v4 * x4;
588  }
589 
590  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
591 
592  int32_t vecIndex = remainingEle - 1;
593  dataType resultEle;
594 
595  for (vertical = 0; vertical < remainingEle; vertical++) {
596  vec v1 = c7x::strm_eng<0, vec>::get_adv();
597  vec result = (vecY - vecSum) * vecDiv;
598 
599  getElement(result, vecIndex--, &resultEle);
600  vecSum += v1 * (resultEle);
601 
602  dataType *pStoreX = c7x::strm_agen<1, dataType>::get_adv(pSA1);
603  *pStoreX = resultEle;
604  }
605 
606  __SE0_CLOSE();
607  __SE1_CLOSE();
608  __SA2_CLOSE();
609  __SA3_CLOSE();
610  }
611  __SA1_CLOSE();
612  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
613 }
615  float *pX,
616  float *pY,
617  float *pDiv,
618  int32_t order,
619  int32_t colLstride,
620  uint8_t *pBlock);
622  double *pX,
623  double *pY,
624  double *pDiv,
625  int32_t order,
626  int32_t colLstride,
627  uint8_t *pBlock);
628 
629 template <typename dataType>
631  void *restrict pU,
632  void *restrict pScratch,
633  void *restrict pY,
634  void *restrict pB,
635  void *restrict pX,
636  void *restrict pDiv)
637 {
638 
639  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
640 
642  DSPLIB_matTrans_PrivArgs *matTransPrivArgs = &pKerPrivArgs->matTransPrivArgs;
643 
644  uint8_t *pBlock = pKerPrivArgs->bufPblock;
645  int32_t order = pKerPrivArgs->order;
646  int32_t strideL = pKerPrivArgs->stride;
647  int32_t colLStride = strideL / sizeof(dataType);
648 
649  dataType *pLocalU = (dataType *) pU;
650  dataType *pLocaltransU = (dataType *) pScratch;
651  dataType *pLocalY = (dataType *) pY;
652  dataType *pLocalB = (dataType *) pB;
653  dataType *pLocalX = (dataType *) pX;
654  dataType *pLocalDiv = (dataType *) pDiv;
655 
656  DSPLIB_DEBUGPRINTFN(0, "pLocalU: %p pLocalX: %p\n", pLocalU, pLocalX);
657 
658  /*-----------------------------------------------------------------------
659  solve L*y=b for y using forward substitution
660  -------------------------------------------------------------------------*/
661  DSPLIB_cholesky_solver_forwardSubstitution_ci<dataType>(pLocalU, pLocalY, pLocalB, pLocalDiv, order, colLStride,
662  pBlock);
663 
664  /*-----------------------------------------------------------------------
665  solve U*x=y for x using backward substitution
666  ------------------------------------------------------------------------ */
667  DSPLIB_matTrans_exec_ci<dataType>(matTransPrivArgs, pLocalU, pLocaltransU);
668 
669  DSPLIB_cholesky_solver_backSubstitution_ci<dataType>(pLocaltransU, pLocalX, pLocalY, pLocalDiv, order, colLStride,
670  pBlock);
671 
672  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
673  return DSPLIB_SUCCESS;
674 }
675 
677  void *restrict pU,
678  void *restrict pScratch,
679  void *restrict pY,
680  void *restrict pB,
681  void *restrict pX,
682  void *restrict pDiv);
683 
685  void *restrict pU,
686  void *restrict pScratch,
687  void *restrict pY,
688  void *restrict pB,
689  void *restrict pX,
690  void *restrict pDiv);
691 /* ======================================================================== */
692 /* End of file: DSPLIB_cholesky_solver_ci.cpp */
693 /* ======================================================================== */
template void DSPLIB_cholesky_solver_forwardSubstitution_ci< double >(double *pL, double *pX, double *pY, double *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
static void DSPLIB_cholesky_solver_backSubstitution_ci(dataType *pL, dataType *pX, dataType *pY, dataType *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_backSubstitution_init_ci< float >(DSPLIB_kernelHandle handle)
void getElement(V inVec, uint32_t index, dataType *element)
template DSPLIB_STATUS DSPLIB_cholesky_solver_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pU, void *restrict pScratch, void *restrict pY, void *restrict pB, void *restrict pX, void *restrict pDiv)
void DSPLIB_cholesky_solver_backSubstitution_init_ci(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_cholesky_solver_init_ci< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsU, DSPLIB_bufParams2D_t *bufParamsScratch, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsX, DSPLIB_bufParams1D_t *bufParamsDiv, const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs)
static void DSPLIB_cholesky_solver_forwardSubstitution_ci(dataType *pL, dataType *pY, dataType *pB, dataType *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_forwardSubstitution_ci< float >(float *pL, float *pX, float *pY, float *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_backSubstitution_ci< double >(double *pL, double *pX, double *pY, double *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_cholesky_solver_backSubstitution_init_ci< double >(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_cholesky_solver_init_ci< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsU, DSPLIB_bufParams2D_t *bufParamsScratch, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsX, DSPLIB_bufParams1D_t *bufParamsDiv, const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_cholesky_solver_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pU, void *restrict pScratch, void *restrict pY, void *restrict pB, void *restrict pX, void *restrict pDiv)
DSPLIB_STATUS DSPLIB_cholesky_solver_exec_ci(DSPLIB_kernelHandle handle, void *restrict pU, void *restrict pScratch, void *restrict pY, void *restrict pB, void *restrict pX, void *restrict pDiv)
This function is the main execution function for the C7x implementation of the kernel....
template void DSPLIB_cholesky_solver_backSubstitution_ci< float >(float *pL, float *pX, float *pY, float *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
DSPLIB_STATUS DSPLIB_cholesky_solver_init_ci(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsU, DSPLIB_bufParams2D_t *bufParamsScratch, DSPLIB_bufParams1D_t *bufParamsY, DSPLIB_bufParams1D_t *bufParamsB, DSPLIB_bufParams1D_t *bufParamsX, DSPLIB_bufParams1D_t *bufParamsDiv, const DSPLIB_cholesky_solver_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_cholesky_solver.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_FUNCTION_OPTIMIZED
Definition: DSPLIB_types.h:177
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
uint32_t dim_x
Width of buffer in X dimension in elements.
uint32_t dim_y
Height of buffer in Y dimension in elements.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t order
Order of input buffer for different batches DSPLIB_cholesky_solver_init that will be retrieved and us...
DSPLIB_matTrans_PrivArgs matTransPrivArgs
Struture to store privArgs for matTrans kernel.
uint8_t bufPblock[DSPLIB_CHOLESKY_SOLVER_IXX_IXX_OXX_PBLOCK_SIZE]
Structure containing the parameters to initialize the kernel.
uint32_t dimX
Size of input data.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideOut
Stride between rows of output data matrix
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...