DSPLIB User Guide
DSPLIB_lud_sol_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date Aug 2023 Author: Asheesh Bhardwaj
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "DSPLIB_lud_sol_priv.h"
48 
49 /*********************************************************************
50  *
51  * INITIALIZATION
52  *
53  *********************************************************************/
54 
55 template <typename dataType> void DSPLIB_lud_sol_substitution_init_ci(DSPLIB_kernelHandle handle)
56 {
57  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
58  DSPLIB_lud_sol_PrivArgs *pKerPrivArgs = (DSPLIB_lud_sol_PrivArgs *) handle;
59  uint8_t * pBlock = pKerPrivArgs->bufPblock;
60  int32_t order = pKerPrivArgs->order;
61  int32_t strideMat = pKerPrivArgs->strideOrder;
62  int32_t colMatstride = strideMat / sizeof(dataType);
63 
64  typedef typename c7x::make_full_vector<dataType>::type vec;
65  uint32_t eleCount = c7x::element_count_of<vec>::value;
66 
67  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
68  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
69  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
70  __SE_ELEDUP SE_ELEDUP = c7x::se_eledup<dataType>::value;
71 
72  __SE_TEMPLATE_v1 seDiagReadParams = __gen_SE_TEMPLATE_v1();
73  seDiagReadParams.ICNT0 = 1;
74  seDiagReadParams.ICNT1 = order;
75  seDiagReadParams.DIM1 = colMatstride + 1;
76  seDiagReadParams.DIMFMT = __SE_DIMFMT_2D;
77  seDiagReadParams.ELETYPE = SE_ELETYPE;
78  seDiagReadParams.VECLEN = __SE_VECLEN_1ELEM;
79  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE)) = seDiagReadParams;
80 
81  __SA_TEMPLATE_v1 saWriteXParams = __gen_SA_TEMPLATE_v1();
82  saWriteXParams.ICNT0 = 1;
83  saWriteXParams.ICNT1 = order;
84  saWriteXParams.DIM1 = -1;
85  saWriteXParams.DIMFMT = __SA_DIMFMT_2D;
86  saWriteXParams.VECLEN = SA_VECLEN;
87  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE)) = saWriteXParams;
88 
89  __SE_TEMPLATE_v1 seDivReadParams = __gen_SE_TEMPLATE_v1();
90  seDivReadParams.ICNT0 = order;
91  seDivReadParams.DIMFMT = __SE_DIMFMT_1D;
92  seDivReadParams.ELETYPE = SE_ELETYPE;
93  seDivReadParams.VECLEN = SE_VECLEN;
94  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE)) = seDivReadParams;
95 
96  __SA_TEMPLATE_v1 saDivStoreParams = __gen_SA_TEMPLATE_v1();
97  saDivStoreParams.ICNT0 = order;
98  saDivStoreParams.DIMFMT = __SA_DIMFMT_1D;
99  saDivStoreParams.VECLEN = SA_VECLEN;
100  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE)) = saDivStoreParams;
101 
102  __SE_TEMPLATE_v1 seReadXParams = __gen_SE_TEMPLATE_v1();
103  seReadXParams.ICNT0 = 1;
104  seReadXParams.DIM1 = -1;
105  seReadXParams.DIMFMT = __SE_DIMFMT_2D;
106  seReadXParams.VECLEN = SE_VECLEN;
107  seReadXParams.ELETYPE = SE_ELETYPE;
108  seReadXParams.ELEDUP = SE_ELEDUP;
109  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE)) = seReadXParams;
110 
111  __SE_TEMPLATE_v1 seBlockParams = __gen_SE_TEMPLATE_v1();
112  seBlockParams.ICNT0 = eleCount;
113  seBlockParams.DIM1 = -colMatstride;
114  seBlockParams.DIMFMT = __SE_DIMFMT_2D;
115  seBlockParams.ELETYPE = SE_ELETYPE;
116  seBlockParams.VECLEN = SE_VECLEN;
117  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE)) = seBlockParams;
118 
119  __SA_TEMPLATE_v1 saReverseParams = __gen_SA_TEMPLATE_v1();
120  saReverseParams.ICNT0 = eleCount;
121  saReverseParams.DIM1 = -((int32_t)eleCount);
122  saReverseParams.DIMFMT = __SA_DIMFMT_2D;
123  saReverseParams.VECLEN = SA_VECLEN;
124  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE)) = saReverseParams;
125 
126  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
127 }
130 
131 template <typename dataType> void DSPLIB_lud_sol_permuteB_init_ci(DSPLIB_kernelHandle handle)
132 {
133  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
134 
135  DSPLIB_lud_sol_PrivArgs *pKerPrivArgs = (DSPLIB_lud_sol_PrivArgs *) handle;
136  uint8_t * pBlock = pKerPrivArgs->bufPblock;
137  int32_t order = pKerPrivArgs->order;
138 
139  typedef typename c7x::make_full_vector<uint16_t>::type vecUINT16;
140  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vecUINT16>::value;
141  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vecUINT16>::value;
142  // __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vecUINT16>::value;
143  int32_t pStride = pKerPrivArgs->strideP;
144  int32_t colPStride = pStride / sizeof(uint16_t);
145 
146  __SE_TEMPLATE_v1 seMatReadParams = __gen_SE_TEMPLATE_v1();
147  seMatReadParams.ICNT0 = order;
148  seMatReadParams.DIM1 = colPStride * 2;
149  seMatReadParams.DIMFMT = __SE_DIMFMT_2D;
150  seMatReadParams.ELETYPE = SE_ELETYPE;
151  seMatReadParams.VECLEN = SE_VECLEN;
152 
153  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (7 * SE_PARAM_SIZE)) = seMatReadParams;
154 
155  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
156 }
159 
160 template <typename dataType>
162  const DSPLIB_bufParams2D_t * bufParamsP,
163  const DSPLIB_bufParams2D_t * bufParamsL,
164  const DSPLIB_bufParams2D_t * bufParamsU,
165  const DSPLIB_bufParams1D_t * bufParamsB,
166  const DSPLIB_bufParams1D_t * bufParamsX,
167  const DSPLIB_bufParams2D_t * bufParamsVecScratch,
168  const DSPLIB_bufParams2D_t * bufParamsScratchTrans,
169  const DSPLIB_lud_solInitArgs *pKerInitArgs)
170 {
171  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
172  DSPLIB_lud_sol_PrivArgs *pKerPrivArgs = (DSPLIB_lud_sol_PrivArgs *) handle;
173  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs = &pKerPrivArgs->pMatTransKerPrivArgs;
174 
175  DSPLIB_matTransInitArgs kerInitArgsMatTrans;
176 
177  kerInitArgsMatTrans.funcStyle = pKerInitArgs->funcStyle;
178  kerInitArgsMatTrans.dimX = bufParamsU->dim_x;
179  kerInitArgsMatTrans.dimY = bufParamsU->dim_y;
180 
181  pMatTransKerPrivArgs->widthIn = bufParamsU->dim_x;
182  pMatTransKerPrivArgs->heightIn = bufParamsU->dim_y;
183  pMatTransKerPrivArgs->strideIn = bufParamsU->stride_y;
184  pMatTransKerPrivArgs->strideOut = bufParamsScratchTrans->stride_y;
185 
186  DSPLIB_matTrans_init_ci<dataType>(pMatTransKerPrivArgs, bufParamsU, bufParamsScratchTrans, &kerInitArgsMatTrans);
187  DSPLIB_lud_sol_substitution_init_ci<dataType>(handle);
188  DSPLIB_lud_sol_permuteB_init_ci<dataType>(handle);
189  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
190  return DSPLIB_SUCCESS;
191 }
192 
194  const DSPLIB_bufParams2D_t * bufParamsP,
195  const DSPLIB_bufParams2D_t * bufParamsL,
196  const DSPLIB_bufParams2D_t * bufParamsU,
197  const DSPLIB_bufParams1D_t * bufParamsB,
198  const DSPLIB_bufParams1D_t * bufParamsX,
199  const DSPLIB_bufParams2D_t * bufParamsVecScratch,
200  const DSPLIB_bufParams2D_t * bufParamsScratchTrans,
201  const DSPLIB_lud_solInitArgs *pKerInitArgs);
202 
204  const DSPLIB_bufParams2D_t * bufParamsP,
205  const DSPLIB_bufParams2D_t * bufParamsL,
206  const DSPLIB_bufParams2D_t * bufParamsU,
207  const DSPLIB_bufParams1D_t * bufParamsB,
208  const DSPLIB_bufParams1D_t * bufParamsX,
209  const DSPLIB_bufParams2D_t * bufParamsVecScratch,
210  const DSPLIB_bufParams2D_t * bufParamsScratchTrans,
211  const DSPLIB_lud_solInitArgs *pKerInitArgs);
212 
213 /*********************************************************************
214  *
215  * IMPLEMENTATION
216  *
217  *********************************************************************/
218 template <typename dataType>
219 void DSPLIB_lud_sol_permuteB_ci(unsigned short *pIn,
220  dataType * B,
221  dataType * B_Mod,
222  int32_t order,
223  int32_t colPStride,
224  uint32_t * permuteOrder,
225  uint8_t * pBlock)
226 {
227  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
228 
229  typedef typename c7x::make_full_vector<uint16_t>::type vec;
230  int32_t eleCount = c7x::element_count_of<vec>::value;
231 
232  __SE_TEMPLATE_v1 se0Params, se1Params;
233  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (7 * SE_PARAM_SIZE));
234  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (7 * SE_PARAM_SIZE));
235 
236  int32_t nVec = DSPLIB_ceilingDiv(order, eleCount);
237  int32_t se1ICNT1 = order / 2;
238  int32_t se0ICNT1 = order - se1ICNT1;
239 
240  se0Params.ICNT1 = se0ICNT1;
241  se1Params.ICNT1 = se1ICNT1;
242 
243  __SE0_OPEN(pIn, se0Params);
244 
245  vec vecZero = (vec) 0;
246  vec vecOne = (vec) 1;
247 
248  vec idx_0_to_eleCount;
249 
250 #if (__C7X_VEC_SIZE_BITS__ == 256)
251  idx_0_to_eleCount = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
252 #elif (__C7X_VEC_SIZE_BITS__ == 512)
253  idx_0_to_eleCount = c7x::ushort_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
254  23, 24, 25, 26, 27, 28, 29, 30, 31);
255 #endif
256 
257  int32_t vertical = 0;
258  if (se1ICNT1 > 0) {
259  __SE1_OPEN(pIn + colPStride, se1Params);
260 
261  for (vertical = 0; vertical < order - 1; vertical += 2) {
262 
263  vec maxValVec1 = (vec) 0;
264  vec maxValVec2 = (vec) 0;
265  vec vMaxIdx1; // = idx_0_to_eleCount;
266  vec vMaxIdx2; // = idx_0_to_eleCount;
267  vec vCurrIdx1 = idx_0_to_eleCount;
268  vec vCurrIdx2 = idx_0_to_eleCount;
269 
270  for (int32_t horizontal = 0; horizontal < nVec; horizontal++) {
271  vec v1 = c7x::strm_eng<0, vec>::get_adv();
272  vec v2 = c7x::strm_eng<1, vec>::get_adv();
273 
274  __vpred cmpPred1 = __cmp_eq_pred(vecZero, v1);
275  __vpred cmpPred2 = __cmp_eq_pred(vecZero, v2);
276 
277  maxValVec1 = __select(cmpPred1, maxValVec1, v1);
278  maxValVec2 = __select(cmpPred2, maxValVec2, v2);
279 
280  vMaxIdx1 = __select(cmpPred1, vMaxIdx1, vCurrIdx1);
281  vMaxIdx2 = __select(cmpPred2, vMaxIdx2, vCurrIdx2);
282 
283  vCurrIdx1 = vCurrIdx1 + (uint16_t) eleCount;
284  vCurrIdx2 = vCurrIdx2 + (uint16_t) eleCount;
285  }
286 
287  __vpred cmpPredFinal1 = __cmp_eq_pred(vecOne, maxValVec1);
288  uint32_t tempIdx1 = __rightmost_bit_detect_short(cmpPredFinal1) >> 1;
289  uint32_t finalIdx1 = __vgetuh_vrd(vMaxIdx1, tempIdx1);
290 
291  __vpred cmpPredFinal2 = __cmp_eq_pred(vecOne, maxValVec2);
292  uint32_t tempIdx2 = __rightmost_bit_detect_short(cmpPredFinal2) >> 1;
293  uint32_t finalIdx2 = __vgetuh_vrd(vMaxIdx2, tempIdx2);
294 
295  B_Mod[vertical + 0] = B[finalIdx1];
296  B_Mod[vertical + 1] = B[finalIdx2];
297  }
298  }
299 
300  if (se0ICNT1 != se1ICNT1) {
301 
302  vec maxValVec1 = (vec) 0;
303  vec vMaxIdx1 = idx_0_to_eleCount;
304  vec vCurrIdx1 = idx_0_to_eleCount;
305 
306  for (int32_t horizontal = 0; horizontal < nVec; horizontal++) {
307  vec v1 = c7x::strm_eng<0, vec>::get_adv();
308 
309  __vpred cmpPred1 = __cmp_eq_pred(vecZero, v1);
310 
311  maxValVec1 = __select(cmpPred1, maxValVec1, v1);
312 
313  vMaxIdx1 = __select(cmpPred1, vMaxIdx1, vCurrIdx1);
314 
315  vCurrIdx1 = vCurrIdx1 + (uint16_t) eleCount;
316  }
317 
318  __vpred cmpPredFinal1 = __cmp_eq_pred(vecOne, maxValVec1);
319  uint32_t tempIdx1 = __rightmost_bit_detect_short(cmpPredFinal1) >> 1;
320  uint32_t finalIdx1 = __vgetuh_vrd(vMaxIdx1, tempIdx1);
321 
322  B_Mod[vertical + 0] = B[finalIdx1];
323  }
324 
325  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
326 }
327 
328 template void DSPLIB_lud_sol_permuteB_ci<float>(unsigned short *pIn,
329  float * B,
330  float * B_Mod,
331  int32_t order,
332  int32_t colPStride,
333  uint32_t * permuteOrder,
334  uint8_t * pBlock);
335 template void DSPLIB_lud_sol_permuteB_ci<double>(unsigned short *pIn,
336  double * B,
337  double * B_Mod,
338  int32_t order,
339  int32_t colPStride,
340  uint32_t * permuteOrder,
341  uint8_t * pBlock);
342 
343 template <typename dataType, typename V = typename c7x::make_full_vector<dataType>::type>
344 inline void getElement(V inVec, uint32_t index, dataType *element);
345 template <typename V> inline void getElement(V inVec, uint32_t index, float *element)
346 {
347  *element = __as_float(__vgetw_vrd(c7x::as_int_vec(inVec), index));
348 }
349 
350 template <typename V> inline void getElement(V inVec, uint32_t index, double *element)
351 {
352  *element = __as_double(__vgetd_vrd(c7x::as_long_vec(inVec), index));
353 }
354 
355 template <typename dataType>
356 static inline void DSPLIB_lud_sol_forwardSubstitution_ci(dataType *pL,
357  dataType *pY,
358  dataType *pB,
359  dataType *pDiv,
360  int32_t order,
361  int32_t colLstride,
362  uint8_t * pBlock)
363 {
364  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
365 
366  __SE_TEMPLATE_v1 seDivReadParams;
367  __SA_TEMPLATE_v1 saDivStoreParams;
368  __SE_TEMPLATE_v1 seDiagReadParams;
369  __SA_TEMPLATE_v1 saDiagWriteParams;
370  seDiagReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
371  saDiagWriteParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
372  seDivReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
373  saDivStoreParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
374 
375  saDiagWriteParams.DIM1 = 1;
376 
377  typedef typename c7x::make_full_vector<dataType>::type vec;
378  int32_t eleCount = c7x::element_count_of<vec>::value;
379  int32_t nVec = DSPLIB_ceilingDiv(order, eleCount);
380 
381  /* Calculate reciprocals of Diagonal Elements */
382 
383  __SE1_OPEN(pL, seDiagReadParams);
384  __SA1_OPEN(saDiagWriteParams);
385  int32_t row = 0;
386 
387  for (row = 0; row < order; row++) {
388  vec vecDiag = c7x::strm_eng<1, vec>::get_adv();
389 
390  __vpred predDiag = c7x::strm_agen<1, vec>::get_vpred();
391  vec * pStoreDiag = c7x::strm_agen<1, vec>::get_adv(pDiv);
392  __vstore_pred(predDiag, pStoreDiag, vecDiag);
393  }
394  __SE1_CLOSE();
395  __SA1_CLOSE();
396 
397  __SE0_OPEN(pDiv, seDivReadParams);
398  __SA0_OPEN(saDivStoreParams);
399  dataType TwoP0 = 2.0;
400  int32_t ii = 0;
401 
402  for (ii = 0; ii < nVec - 3; ii += 4) {
403  vec v1 = c7x::strm_eng<0, vec>::get_adv();
404  vec v2 = c7x::strm_eng<0, vec>::get_adv();
405  vec v3 = c7x::strm_eng<0, vec>::get_adv();
406  vec v4 = c7x::strm_eng<0, vec>::get_adv();
407 
408  vec yy1 = __recip(v1);
409  yy1 = yy1 * (TwoP0 - v1 * yy1);
410  yy1 = yy1 * (TwoP0 - v1 * yy1);
411 
412  vec yy2 = __recip(v2);
413  yy2 = yy2 * (TwoP0 - v2 * yy2);
414  yy2 = yy2 * (TwoP0 - v2 * yy2);
415 
416  vec yy3 = __recip(v3);
417  yy3 = yy3 * (TwoP0 - v3 * yy3);
418  yy3 = yy3 * (TwoP0 - v3 * yy3);
419 
420  vec yy4 = __recip(v4);
421  yy4 = yy4 * (TwoP0 - v4 * yy4);
422  yy4 = yy4 * (TwoP0 - v4 * yy4);
423 
424  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
425  vec * pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
426  __vstore_pred(predDiv1, pStoreDiv1, yy1);
427 
428  __vpred predDiv2 = c7x::strm_agen<0, vec>::get_vpred();
429  vec * pStoreDiv2 = c7x::strm_agen<0, vec>::get_adv(pDiv);
430  __vstore_pred(predDiv2, pStoreDiv2, yy2);
431 
432  __vpred predDiv3 = c7x::strm_agen<0, vec>::get_vpred();
433  vec * pStoreDiv3 = c7x::strm_agen<0, vec>::get_adv(pDiv);
434  __vstore_pred(predDiv3, pStoreDiv3, yy3);
435 
436  __vpred predDiv4 = c7x::strm_agen<0, vec>::get_vpred();
437  vec * pStoreDiv4 = c7x::strm_agen<0, vec>::get_adv(pDiv);
438  __vstore_pred(predDiv4, pStoreDiv4, yy4);
439  }
440 
441  for (; ii < nVec; ii++) {
442  vec v1 = c7x::strm_eng<0, vec>::get_adv();
443 
444  vec yy1 = __recip(v1);
445  yy1 = yy1 * (TwoP0 - v1 * yy1);
446  yy1 = yy1 * (TwoP0 - v1 * yy1);
447 
448  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
449  vec * pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
450  __vstore_pred(predDiv1, pStoreDiv1, yy1);
451  }
452 
453  __SE0_CLOSE();
454  __SA0_CLOSE();
455 
456  /*----------------------------------------------------------------------
457  Forward Substitution
458  -----------------------------------------------------------------------*/
459 
460  __SE_TEMPLATE_v1 seBlockParams;
461  __SE_TEMPLATE_v1 seReadYParams;
462  __SA_TEMPLATE_v1 saWriteYParams;
463  __SA_TEMPLATE_v1 sa1DReadParams;
464 
465  saWriteYParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
466  seReadYParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
467  seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
468  sa1DReadParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
469 
470  saWriteYParams.DIM1 = 1;
471  seReadYParams.DIM1 = 1;
472  seBlockParams.DIM1 = colLstride;
473 
474  dataType *pSE0 = pL;
475  dataType *pSA1 = pY;
476  dataType *pSA2 = pB;
477  dataType *pSA3 = pDiv;
478 
479  __SA1_OPEN(saWriteYParams);
480  __SA2_OPEN(sa1DReadParams);
481  __SA3_OPEN(sa1DReadParams);
482 
483  for (int32_t block = 0; block < nVec; block++) {
484  __vpred predB = c7x::strm_agen<2, vec>::get_vpred();
485  vec * pLoadB = c7x::strm_agen<2, vec>::get_adv(pSA2);
486  vec vecB = __vload_pred(predB, pLoadB);
487 
488  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
489  vec * pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
490  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
491 
492  int32_t sumRows = block * eleCount;
493  int32_t totalRows = sumRows + eleCount;
494 
495  seBlockParams.ICNT1 = totalRows;
496  seReadYParams.ICNT1 = sumRows;
497 
498  __SE0_OPEN(pSE0, seBlockParams);
499  if (sumRows > 0) {
500  __SE1_OPEN(pY, seReadYParams);
501  }
502 
503  /* Calculate sum */
504  vec vecSum = (vec) 0;
505  vec vecSum1 = (vec) 0;
506  vec vecSum2 = (vec) 0;
507  vec vecSum3 = (vec) 0;
508  vec vecSum4 = (vec) 0;
509  int32_t vertical = 0;
510  /* Square Part- Accumulate sum */ /* 7 + trip_cnt * 4 */
511  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
512  vec v1 = c7x::strm_eng<0, vec>::get_adv();
513  vec y1 = c7x::strm_eng<1, vec>::get_adv();
514  vecSum1 += v1 * y1;
515 
516  vec v2 = c7x::strm_eng<0, vec>::get_adv();
517  vec y2 = c7x::strm_eng<1, vec>::get_adv();
518  vecSum2 += v2 * y2;
519 
520  vec v3 = c7x::strm_eng<0, vec>::get_adv();
521  vec y3 = c7x::strm_eng<1, vec>::get_adv();
522  vecSum3 += v3 * y3;
523 
524  vec v4 = c7x::strm_eng<0, vec>::get_adv();
525  vec y4 = c7x::strm_eng<1, vec>::get_adv();
526  vecSum4 += v4 * y4;
527  }
528 
529  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
530 
531  /* Diagonal Part- Calculate values */ /* 1 + tri_cnt *21 = 337 */
532  dataType resultEle1;
533  for (vertical = 0; vertical < eleCount; vertical++) {
534  vec v1 = c7x::strm_eng<0, vec>::get_adv();
535  vec result1 = (vecB - vecSum) * vecDiv;
536 
537  getElement(result1, vertical, &resultEle1);
538 
539  vecSum += v1 * (resultEle1);
540 
541  __vpred predYCalc = c7x::strm_agen<1, vec>::get_vpred();
542  vec * pStoreY = c7x::strm_agen<1, vec>::get_adv(pSA1);
543  __vstore_pred(predYCalc, pStoreY, (vec) resultEle1);
544  }
545 
546  pSE0 += eleCount;
547  __SE0_CLOSE();
548  __SE1_CLOSE();
549  }
550  __SA2_CLOSE();
551  __SA3_CLOSE();
552  __SA1_CLOSE();
553  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
554 }
556  float * pX,
557  float * pY,
558  float * pDiv,
559  int32_t order,
560  int32_t colLstride,
561  uint8_t *pBlock);
563  double * pX,
564  double * pY,
565  double * pDiv,
566  int32_t order,
567  int32_t colLstride,
568  uint8_t *pBlock);
569 
570 template <typename dataType>
571 static inline void DSPLIB_lud_sol_backSubstitution_ci(dataType *pL,
572  dataType *pX,
573  dataType *pY,
574  dataType *pDiv,
575  int32_t order,
576  int32_t colLstride,
577  uint8_t * pBlock)
578 {
579  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
580 
581  typedef typename c7x::make_full_vector<dataType>::type vec;
582  int32_t eleCount = c7x::element_count_of<vec>::value;
583 
584  /* Calculate reciprocals of Diagonal Elements */
585 
586  __SE_TEMPLATE_v1 seDivReadParams;
587  __SA_TEMPLATE_v1 saDivStoreParams;
588  __SE_TEMPLATE_v1 seDiagReadParams;
589  __SA_TEMPLATE_v1 saDiagWriteParams;
590  seDiagReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (0 * SE_PARAM_SIZE));
591  saDiagWriteParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
592  seDivReadParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (2 * SE_PARAM_SIZE));
593  saDivStoreParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (3 * SE_PARAM_SIZE));
594 
595  saDiagWriteParams.DIM1 = 1;
596 
597  int32_t nVec = DSPLIB_ceilingDiv(order, eleCount);
598 
599  /* Calculate reciprocals of Diagonal Elements */
600  __SE1_OPEN(pL, seDiagReadParams);
601  __SA1_OPEN(saDiagWriteParams);
602  int32_t row = 0;
603 
604  for (row = 0; row < order; row++) {
605  vec vecDiag = c7x::strm_eng<1, vec>::get_adv();
606 
607  __vpred predDiag = c7x::strm_agen<1, vec>::get_vpred();
608  vec * pStoreDiag = c7x::strm_agen<1, vec>::get_adv(pDiv);
609  __vstore_pred(predDiag, pStoreDiag, vecDiag);
610  }
611  __SE1_CLOSE();
612  __SA1_CLOSE();
613 
614  __SE0_OPEN(pDiv, seDivReadParams);
615  __SA0_OPEN(saDivStoreParams);
616  dataType TwoP0 = 2.0;
617  int32_t ii = 0;
618 
619  for (ii = 0; ii < nVec - 3; ii += 4) {
620  vec v1 = c7x::strm_eng<0, vec>::get_adv();
621  vec v2 = c7x::strm_eng<0, vec>::get_adv();
622  vec v3 = c7x::strm_eng<0, vec>::get_adv();
623  vec v4 = c7x::strm_eng<0, vec>::get_adv();
624 
625  vec yy1 = __recip(v1);
626  yy1 = yy1 * (TwoP0 - v1 * yy1);
627  yy1 = yy1 * (TwoP0 - v1 * yy1);
628 
629  vec yy2 = __recip(v2);
630  yy2 = yy2 * (TwoP0 - v2 * yy2);
631  yy2 = yy2 * (TwoP0 - v2 * yy2);
632 
633  vec yy3 = __recip(v3);
634  yy3 = yy3 * (TwoP0 - v3 * yy3);
635  yy3 = yy3 * (TwoP0 - v3 * yy3);
636 
637  vec yy4 = __recip(v4);
638  yy4 = yy4 * (TwoP0 - v4 * yy4);
639  yy4 = yy4 * (TwoP0 - v4 * yy4);
640 
641  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
642  vec * pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
643  __vstore_pred(predDiv1, pStoreDiv1, yy1);
644 
645  __vpred predDiv2 = c7x::strm_agen<0, vec>::get_vpred();
646  vec * pStoreDiv2 = c7x::strm_agen<0, vec>::get_adv(pDiv);
647  __vstore_pred(predDiv2, pStoreDiv2, yy2);
648 
649  __vpred predDiv3 = c7x::strm_agen<0, vec>::get_vpred();
650  vec * pStoreDiv3 = c7x::strm_agen<0, vec>::get_adv(pDiv);
651  __vstore_pred(predDiv3, pStoreDiv3, yy3);
652 
653  __vpred predDiv4 = c7x::strm_agen<0, vec>::get_vpred();
654  vec * pStoreDiv4 = c7x::strm_agen<0, vec>::get_adv(pDiv);
655  __vstore_pred(predDiv4, pStoreDiv4, yy4);
656  }
657 
658  for (; ii < nVec; ii++) {
659  vec v1 = c7x::strm_eng<0, vec>::get_adv();
660 
661  vec yy1 = __recip(v1);
662  yy1 = yy1 * (TwoP0 - v1 * yy1);
663  yy1 = yy1 * (TwoP0 - v1 * yy1);
664 
665  __vpred predDiv1 = c7x::strm_agen<0, vec>::get_vpred();
666  vec * pStoreDiv1 = c7x::strm_agen<0, vec>::get_adv(pDiv);
667  __vstore_pred(predDiv1, pStoreDiv1, yy1);
668  }
669 
670  __SE0_CLOSE();
671  __SA0_CLOSE();
672 
673  int32_t totalBlocks = order / eleCount;
674  int32_t remainingEle = order - (totalBlocks * eleCount);
675 
676  __SE_TEMPLATE_v1 seBlockParams;
677  __SE_TEMPLATE_v1 seReadXParams;
678  __SA_TEMPLATE_v1 saWriteXParams;
679  __SA_TEMPLATE_v1 saReverseParams;
680 
681  saWriteXParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
682  seReadXParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (4 * SE_PARAM_SIZE));
683  seBlockParams = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (5 * SE_PARAM_SIZE));
684  saReverseParams = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (6 * SE_PARAM_SIZE));
685 
686  seReadXParams.ICNT0 = 1;
687  seReadXParams.DIM1 = -1;
688  saReverseParams.ICNT1 = totalBlocks;
689 
690  dataType *pLLastElem = &pL[(order - 1) + ((order - 1) * colLstride)];
691  dataType *pXLastElem = &pX[order - 1];
692  dataType *pSE0 = pLLastElem - (eleCount - 1);
693  dataType *pSA1 = pX + order - 1;
694  dataType *pSA2 = pY + order - eleCount;
695  dataType *pSA3 = pDiv + order - eleCount;
696 
697  __SA1_OPEN(saWriteXParams);
698 
699  if (totalBlocks > 0) {
700  __SA2_OPEN(saReverseParams);
701  __SA3_OPEN(saReverseParams);
702 
703  for (int32_t block = 0; block < totalBlocks; block++) {
704  __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
705  vec * pLoadY = c7x::strm_agen<2, vec>::get_adv(pSA2);
706  vec vecY = __vload_pred(predY, pLoadY);
707 
708  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
709  vec * pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pSA3);
710  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
711 
712  int32_t sumRows = block * eleCount;
713  int32_t totalRows = sumRows + eleCount;
714 
715  seBlockParams.ICNT1 = totalRows;
716  seReadXParams.ICNT1 = sumRows;
717 
718  __SE0_OPEN(pSE0, seBlockParams);
719  if (sumRows > 0) {
720  __SE1_OPEN(pXLastElem, seReadXParams);
721  }
722 
723  /* Calculate sum */
724  vec vecSum = (vec) 0;
725  vec vecSum1 = (vec) 0;
726  vec vecSum2 = (vec) 0;
727  vec vecSum3 = (vec) 0;
728  vec vecSum4 = (vec) 0;
729  int32_t vertical = 0;
730 
731  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
732  vec v1 = c7x::strm_eng<0, vec>::get_adv();
733  vec x1 = c7x::strm_eng<1, vec>::get_adv();
734  vecSum1 += v1 * x1;
735 
736  vec v2 = c7x::strm_eng<0, vec>::get_adv();
737  vec x2 = c7x::strm_eng<1, vec>::get_adv();
738  vecSum2 += v2 * x2;
739 
740  vec v3 = c7x::strm_eng<0, vec>::get_adv();
741  vec x3 = c7x::strm_eng<1, vec>::get_adv();
742  vecSum3 += v3 * x3;
743 
744  vec v4 = c7x::strm_eng<0, vec>::get_adv();
745  vec x4 = c7x::strm_eng<1, vec>::get_adv();
746  vecSum4 += v4 * x4;
747  }
748 
749  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
750 
751  /* Calculate values */
752  uint32_t vecIndex = eleCount - 1;
753  dataType resultEle1;
754 
755  for (vertical = 0; vertical < eleCount; vertical++) {
756  vec v1 = c7x::strm_eng<0, vec>::get_adv();
757  vec result1 = (vecY - vecSum) * vecDiv;
758  getElement(result1, vecIndex--, &resultEle1);
759  vecSum += v1 * (resultEle1);
760  dataType *pStoreX1 = c7x::strm_agen<1, dataType>::get_adv(pSA1);
761  *pStoreX1 = resultEle1;
762  }
763 
764  pSE0 -= eleCount;
765  __SE0_CLOSE();
766  __SE1_CLOSE();
767  }
768  __SA2_CLOSE();
769  __SA3_CLOSE();
770  }
771  /* Processing Remaining Elements */
772  if (remainingEle > 0) {
773  seBlockParams.ICNT0 = remainingEle;
774  seBlockParams.ICNT1 = order;
775 
776  seReadXParams.ICNT1 = order;
777 
778  saReverseParams.ICNT0 = remainingEle;
779  saReverseParams.ICNT1 = 1;
780  saReverseParams.DIM1 = 0;
781 
782  pSE0 = &pL[(order - 1) * colLstride];
783  __SE0_OPEN(pSE0, seBlockParams);
784  __SE1_OPEN(pXLastElem, seReadXParams);
785  __SA2_OPEN(saReverseParams);
786  __SA3_OPEN(saReverseParams);
787 
788  int32_t sumRows = totalBlocks * eleCount;
789 
790  __vpred predY = c7x::strm_agen<2, vec>::get_vpred();
791  vec * pLoadY = c7x::strm_agen<2, vec>::get_adv(pY);
792  vec vecY = __vload_pred(predY, pLoadY);
793 
794  __vpred predDiv = c7x::strm_agen<3, vec>::get_vpred();
795  vec * pLoadDiv = c7x::strm_agen<3, vec>::get_adv(pDiv);
796  vec vecDiv = __vload_pred(predDiv, pLoadDiv);
797 
798  vec vecSum = (vec) 0;
799  vec vecSum1 = (vec) 0;
800  vec vecSum2 = (vec) 0;
801  vec vecSum3 = (vec) 0;
802  vec vecSum4 = (vec) 0;
803  int32_t vertical = 0;
804 
805  for (vertical = 0; vertical < sumRows - 3; vertical += 4) {
806  vec v1 = c7x::strm_eng<0, vec>::get_adv();
807  vec x1 = c7x::strm_eng<1, vec>::get_adv();
808  vecSum1 += v1 * x1;
809 
810  vec v2 = c7x::strm_eng<0, vec>::get_adv();
811  vec x2 = c7x::strm_eng<1, vec>::get_adv();
812  vecSum2 += v2 * x2;
813 
814  vec v3 = c7x::strm_eng<0, vec>::get_adv();
815  vec x3 = c7x::strm_eng<1, vec>::get_adv();
816  vecSum3 += v3 * x3;
817 
818  vec v4 = c7x::strm_eng<0, vec>::get_adv();
819  vec x4 = c7x::strm_eng<1, vec>::get_adv();
820  vecSum4 += v4 * x4;
821  }
822 
823  vecSum = vecSum1 + vecSum2 + vecSum3 + vecSum4;
824 
825  int32_t vecIndex = remainingEle - 1;
826  dataType resultEle;
827 
828  for (vertical = 0; vertical < remainingEle; vertical++) {
829  vec v1 = c7x::strm_eng<0, vec>::get_adv();
830  vec result = (vecY - vecSum) * vecDiv;
831 
832  getElement(result, vecIndex--, &resultEle);
833  vecSum += v1 * (resultEle);
834 
835  dataType *pStoreX = c7x::strm_agen<1, dataType>::get_adv(pSA1);
836  *pStoreX = resultEle;
837  }
838 
839  __SE0_CLOSE();
840  __SE1_CLOSE();
841  __SA2_CLOSE();
842  __SA3_CLOSE();
843  }
844  __SA1_CLOSE();
845  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
846 }
848  float * pX,
849  float * pY,
850  float * pDiv,
851  int32_t order,
852  int32_t colLstride,
853  uint8_t *pBlock);
855  double * pX,
856  double * pY,
857  double * pDiv,
858  int32_t order,
859  int32_t colLstride,
860  uint8_t *pBlock);
861 
862 template <typename dataType>
864  unsigned short *P,
865  dataType *L,
866  dataType *U,
867  dataType *B,
868  dataType *B_Mod,
869  dataType *Y,
870  dataType *X,
871  dataType *pDiv,
872  dataType *pScratchTrans)
873 {
874  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
875 
876  DSPLIB_matTrans_PrivArgs *pMatTransKerPrivArgs = &pKerPrivArgs->pMatTransKerPrivArgs;
877 
878  int32_t order = pKerPrivArgs->order;
879  int32_t strideOrder = pKerPrivArgs->strideOrder;
880  int32_t strideP = pKerPrivArgs->strideP;
881  uint8_t *pBlock = pKerPrivArgs->bufPblock;
882 
883  int32_t dataSize = sizeof(dataType);
884  int32_t dataSizeP = sizeof(unsigned short);
885 
886  int32_t orderStride = strideOrder / dataSize;
887  int32_t orderPStride = strideP / dataSizeP;
888 
889  /* -----------------------------------------------------------------------
890  modify b based on permutation matrix P
891  ------------------------------------------------------------------------- */
892  DSPLIB_lud_sol_permuteB_ci<dataType>(P, B, B_Mod, order, orderPStride, NULL, pBlock);
893 
894  /*-----------------------------------------------------------------------
895  solve L*y=b for y using forward substitution
896  -------------------------------------------------------------------------*/
897 
898  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs, L, pScratchTrans);
899 
900  DSPLIB_lud_sol_forwardSubstitution_ci<dataType>(pScratchTrans, Y, B_Mod, pDiv, order, orderStride, pBlock);
901 
902  /* -----------------------------------------------------------------------------
903  solve U*X=y for x using backward substitution
904  ------------------------------------------------------------------------------ */
905 
906  DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs, U, pScratchTrans);
907  DSPLIB_lud_sol_backSubstitution_ci<dataType>(pScratchTrans, X, Y, pDiv, order, orderStride, pBlock);
908 
909  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting function");
910  return 0;
911 }
912 
914  unsigned short *P,
915  float *L,
916  float *U,
917  float *B,
918  float *B_Mod,
919  float *Y,
920  float *X,
921  float *pDiv,
922  float *pScratchTrans);
924  unsigned short *P,
925  double *L,
926  double *U,
927  double *B,
928  double *B_Mod,
929  double *Y,
930  double *X,
931  double *pDiv,
932  double *pScratchTrans);
933 
934 template <typename dataType>
936  void *restrict pP,
937  void *restrict pL,
938  void *restrict pU,
939  void *restrict pB,
940  void *restrict pX,
941  void *restrict pVecScratch,
942  void *restrict pScratchTrans)
943 {
944  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
945  DSPLIB_STATUS status = DSPLIB_SUCCESS;
946  DSPLIB_lud_sol_PrivArgs *pKerPrivArgs = (DSPLIB_lud_sol_PrivArgs *) handle;
947 
948  int32_t strideVec = pKerPrivArgs->strideVec;
949 
950  /* Typecast void pointers to respective data type */
951  unsigned short *pPLocal = (unsigned short *) pP;
952  dataType * pLLocal = (dataType *) pL;
953  dataType * pULocal = (dataType *) pU;
954  dataType * pBLocal = (dataType *) pB;
955  dataType * pB_ModLocal = (dataType *) (pVecScratch) + (0 * strideVec / sizeof(dataType));
956  dataType * pYLocal = (dataType *) (pVecScratch) + (1 * strideVec / sizeof(dataType));
957  dataType * pXLocal = (dataType *) pX;
958  dataType * pDivLocal = (dataType *) (pVecScratch) + (2 * strideVec / sizeof(dataType));
959  dataType * pTransLocal = (dataType *) pScratchTrans;
960 
962  0, "pPLocal: %p pLLocal: %p pULocal: %p pBLocal: %p pB_ModLocal: %p pYLocal: %p pXLocal: %p order: %d\n",
963  pPLocal, pLLocal, pULocal, pBLocal, pB_ModLocal, pYLocal, pXLocal, pKerPrivArgs->order);
964 
965  DSPLIB_lud_sol_ci<dataType>(pKerPrivArgs, pPLocal, pLLocal, pULocal, pBLocal, pB_ModLocal, pYLocal, pXLocal,
966  pDivLocal, pTransLocal);
967 
968  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", status);
969  return status;
970 }
971 
972 // explicit instantiation for the different data type versions
974  void *restrict pP,
975  void *restrict pL,
976  void *restrict pU,
977  void *restrict pB,
978  void *restrict pX,
979  void *restrict pVecScratch,
980  void *restrict pScratchTrans);
981 
983  void *restrict pP,
984  void *restrict pL,
985  void *restrict pU,
986  void *restrict pB,
987  void *restrict pX,
988  void *restrict pVecScratch,
989  void *restrict pScratchTrans);
990 
991 /* ======================================================================== */
992 /* End of file: DSPLIB_lud_sol_ci.cpp */
993 /* ======================================================================== */
static void DSPLIB_lud_sol_backSubstitution_ci(dataType *pL, dataType *pX, dataType *pY, dataType *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_lud_sol_permuteB_init_ci< float >(DSPLIB_kernelHandle handle)
int DSPLIB_lud_sol_ci(DSPLIB_lud_sol_PrivArgs *pKerPrivArgs, unsigned short *P, dataType *L, dataType *U, dataType *B, dataType *B_Mod, dataType *Y, dataType *X, dataType *pDiv, dataType *pScratchTrans)
static void DSPLIB_lud_sol_forwardSubstitution_ci(dataType *pL, dataType *pY, dataType *pB, dataType *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
void DSPLIB_lud_sol_permuteB_ci(unsigned short *pIn, dataType *B, dataType *B_Mod, int32_t order, int32_t colPStride, uint32_t *permuteOrder, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_lud_sol_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pP, void *restrict pL, void *restrict pU, void *restrict pB, void *restrict pX, void *restrict pVecScratch, void *restrict pScratchTrans)
void getElement(V inVec, uint32_t index, dataType *element)
template int DSPLIB_lud_sol_ci< float >(DSPLIB_lud_sol_PrivArgs *pKerPrivArgs, unsigned short *P, float *L, float *U, float *B, float *B_Mod, float *Y, float *X, float *pDiv, float *pScratchTrans)
template void DSPLIB_lud_sol_forwardSubstitution_ci< float >(float *pL, float *pX, float *pY, float *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_lud_sol_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsP, const DSPLIB_bufParams2D_t *bufParamsL, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams1D_t *bufParamsB, const DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_bufParams2D_t *bufParamsVecScratch, const DSPLIB_bufParams2D_t *bufParamsScratchTrans, const DSPLIB_lud_solInitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_lud_sol_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pP, void *restrict pL, void *restrict pU, void *restrict pB, void *restrict pX, void *restrict pVecScratch, void *restrict pScratchTrans)
DSPLIB_STATUS DSPLIB_lud_sol_exec_ci(DSPLIB_kernelHandle handle, void *restrict pP, void *restrict pL, void *restrict pU, void *restrict pB, void *restrict pX, void *restrict pVecScratch, void *restrict pScratchTrans)
This function is the main execution function for the C7x implementation of the kernel....
template void DSPLIB_lud_sol_substitution_init_ci< double >(DSPLIB_kernelHandle handle)
template void DSPLIB_lud_sol_substitution_init_ci< float >(DSPLIB_kernelHandle handle)
template void DSPLIB_lud_sol_forwardSubstitution_ci< double >(double *pL, double *pX, double *pY, double *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
void DSPLIB_lud_sol_permuteB_init_ci(DSPLIB_kernelHandle handle)
template void DSPLIB_lud_sol_backSubstitution_ci< double >(double *pL, double *pX, double *pY, double *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template void DSPLIB_lud_sol_permuteB_ci< float >(unsigned short *pIn, float *B, float *B_Mod, int32_t order, int32_t colPStride, uint32_t *permuteOrder, uint8_t *pBlock)
void DSPLIB_lud_sol_substitution_init_ci(DSPLIB_kernelHandle handle)
template DSPLIB_STATUS DSPLIB_lud_sol_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsP, const DSPLIB_bufParams2D_t *bufParamsL, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams1D_t *bufParamsB, const DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_bufParams2D_t *bufParamsVecScratch, const DSPLIB_bufParams2D_t *bufParamsScratchTrans, const DSPLIB_lud_solInitArgs *pKerInitArgs)
template void DSPLIB_lud_sol_permuteB_ci< double >(unsigned short *pIn, double *B, double *B_Mod, int32_t order, int32_t colPStride, uint32_t *permuteOrder, uint8_t *pBlock)
DSPLIB_STATUS DSPLIB_lud_sol_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsP, const DSPLIB_bufParams2D_t *bufParamsL, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams1D_t *bufParamsB, const DSPLIB_bufParams1D_t *bufParamsX, const DSPLIB_bufParams2D_t *bufParamsVecScratch, const DSPLIB_bufParams2D_t *bufParamsScratchTrans, const DSPLIB_lud_solInitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template void DSPLIB_lud_sol_permuteB_init_ci< double >(DSPLIB_kernelHandle handle)
template void DSPLIB_lud_sol_backSubstitution_ci< float >(float *pL, float *pX, float *pY, float *pDiv, int32_t order, int32_t colLstride, uint8_t *pBlock)
template int DSPLIB_lud_sol_ci< double >(DSPLIB_lud_sol_PrivArgs *pKerPrivArgs, unsigned short *P, double *L, double *U, double *B, double *B_Mod, double *Y, double *X, double *pDiv, double *pScratchTrans)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_lud_sol.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
uint32_t dim_x
Width of buffer in X dimension in elements.
uint32_t dim_y
Height of buffer in Y dimension in elements.
Structure containing the parameters to initialize the kernel.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideOrder
Stride between rows of input and output data matrix
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs
Privargs for the matTrans kernel.
uint8_t bufPblock[DSPLIB_LUD_SOL_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
int32_t order
Size of input buffer for different batches DSPLIB_lud_sol_init that will be retrieved and used by DSP...
int32_t strideP
Stride between rows of input data matrix P
int32_t strideVec
Stride between rows of scratch data matrix
Structure containing the parameters to initialize the kernel.
uint32_t dimX
Size of input data.
int8_t funcStyle
Variant of the function refer to DSPLIB_FUNCTION_STYLE
Structure that is reserved for internal use by the kernel.
int32_t strideOut
Stride between rows of output data matrix
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...