54 template <
typename dataType>
60 const int32_t strideOrder,
61 const int32_t strideP)
67 dataType *inv_L, *inv_U, *inv_U_x_inv_L;
69 int32_t dataSize =
sizeof(dataType);
70 int32_t dataSizeP =
sizeof(
unsigned short);
72 int32_t orderStride = strideOrder / dataSize;
73 int32_t orderPStride = strideP / dataSizeP;
77 for (row = 0; row < order; row++) {
78 for (col = 0; col < order; col++) {
80 inv_L[col + row * orderStride] = 1.0;
83 inv_L[col + row * orderStride] = 0.0;
89 for (col = 0; col < order - 1; col++) {
90 for (row = col + 1; row < order; row++) {
91 dataType mulFact = 1 / L[col + col * orderStride];
93 factor = L[col + row * orderStride] * mulFact;
95 for (k = 0; k < order; k++) {
96 inv_L[k + row * orderStride] -= factor * inv_L[k + col * orderStride];
97 L[k + row * orderStride] -= factor * L[k + col * orderStride];
104 for (row = 0; row < order; row++) {
105 for (col = 0; col < order; col++) {
107 inv_U[col + row * orderStride] = 1.0;
110 inv_U[col + row * orderStride] = 0.0;
116 for (col = order - 1; col >= 1; col--) {
117 factor = U[col + col * orderStride];
118 dataType mulFact = 1 / factor;
119 for (row = col - 1; row >= 0; row--) {
120 factor = U[col + row * orderStride] * mulFact;
121 for (k = 0; k < order; k++) {
122 inv_U[k + row * orderStride] -= factor * inv_U[k + col * orderStride];
123 U[k + row * orderStride] -= factor * U[k + col * orderStride];
129 for (row = order - 1; row >= 0; row--) {
130 factor = U[row + row * orderStride];
131 dataType mulFact = 1 / factor;
132 for (col = 0; col < order; col++) {
133 L[col + row * orderStride] *= mulFact;
134 U[col + row * orderStride] *= mulFact;
139 inv_U_x_inv_L = &L[0];
140 for (row = 0; row < order; row++) {
141 for (col = 0; col < order; col++) {
143 for (k = 0; k < order; k++) {
144 sum += inv_U[k + row * orderStride] * inv_L[col + k * orderStride];
146 inv_U_x_inv_L[col + row * orderStride] = sum;
150 for (row = 0; row < order; row++) {
151 for (col = 0; col < order; col++) {
153 for (k = 0; k < order; k++) {
154 sum += inv_U_x_inv_L[k + row * orderStride] * P[col + k * orderPStride];
156 invA[col + row * orderStride] = sum;
169 const int32_t strideOrder,
170 const int32_t strideP);
176 const int32_t strideOrder,
177 const int32_t strideP);
179 template <
typename dataType>
184 void *restrict pinvA,
185 void *restrict pStratch)
193 int32_t order = pKerPrivArgs->
order;
195 int32_t strideP = pKerPrivArgs->
strideP;
198 unsigned short *pPLocal = (
unsigned short *) pP;
199 dataType * pLLocal = (dataType *) pL;
200 dataType * pULocal = (dataType *) pU;
201 dataType * pinvALocal = (dataType *) pinvA;
203 DSPLIB_DEBUGPRINTFN(0,
"pPLocal: %p pLLocal: %p pULocal: %p pinvALocal: %p order: %d\n", pPLocal, pLLocal, pULocal,
206 DSPLIB_lud_inv_cn<dataType>(order, pPLocal, pLLocal, pULocal, pinvALocal, strideOrder, strideP);
217 void *restrict pinv_A,
218 void *restrict pStratch);
224 void *restrict pinv_A,
225 void *restrict pStratch);
int DSPLIB_lud_inv_cn(const int order, unsigned short *P, dataType *L, dataType *U, dataType *invA, const int32_t strideOrder, const int32_t strideP)
template int DSPLIB_lud_inv_cn< double >(const int order, unsigned short *P, double *L, double *U, double *invA, const int32_t strideOrder, const int32_t strideP)
template DSPLIB_STATUS DSPLIB_lud_inv_exec_cn< float >(DSPLIB_kernelHandle handle, void *restrict pP, void *restrict pL, void *restrict pU, void *restrict pinv_A, void *restrict pStratch)
DSPLIB_STATUS DSPLIB_lud_inv_exec_cn(DSPLIB_kernelHandle handle, void *restrict pP, void *restrict pL, void *restrict pU, void *restrict pinvA, void *restrict pStratch)
This function is the main execution function for the natural C implementation of the kernel....
DSPLIB_STATUS DSPLIB_lud_inv_init_cn(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsP, const DSPLIB_bufParams2D_t *bufParamsL, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsinvA, const DSPLIB_lud_invInitArgs *pKerInitArgs)
This function is the initialization function for the natural C implementation of the kernel....
template int DSPLIB_lud_inv_cn< float >(const int order, unsigned short *P, float *L, float *U, float *invA, const int32_t strideOrder, const int32_t strideP)
template DSPLIB_STATUS DSPLIB_lud_inv_exec_cn< double >(DSPLIB_kernelHandle handle, void *restrict pP, void *restrict pL, void *restrict pU, void *restrict pinv_A, void *restrict pStratch)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_lud_inv.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t strideOrder
Stride between rows of input and output data matrix
int32_t order
Size of input buffer for different batches DSPLIB_lud_inv_init that will be retrieved and used by DSP...
int32_t strideP
Stride between rows of output data matrix P