56 template <
typename dataType>
62 const int32_t strideOrder,
63 const int32_t strideP)
67 int min_row, max_row, k, temp;
68 dataType min, max, tmp;
72 int32_t dataSize =
sizeof(dataType);
73 int32_t dataSizeP =
sizeof(
unsigned short);
75 int32_t orderStride = strideOrder / dataSize;
76 int32_t orderPStride = strideP / dataSizeP;
81 for (row = 0; row < order; row++) {
82 for (col = 0; col < order; col++) {
84 P[col + row * orderPStride] = 1;
87 P[col + row * orderPStride] = 0;
95 memcpy(U, A,
sizeof(dataType) * order * orderStride);
97 for (k = 0; k < order - 1; k++) {
102 for (row = k; row < order; row++) {
103 if (fabs(U[k + row * orderStride]) > max) {
104 max = fabs(U[k + row * orderStride]);
108 if (fabs(U[k + row * orderStride]) < min) {
109 min = fabs(U[k + row * orderStride]);
116 for (col = 0; col < order; col++) {
117 tmp = U[col + min_row * orderStride];
118 U[col + min_row * orderStride] = U[col + max_row * orderStride];
119 U[col + max_row * orderStride] = tmp;
120 temp = P[col + min_row * orderPStride];
121 P[col + min_row * orderPStride] = P[col + max_row * orderPStride];
122 P[col + max_row * orderPStride] = temp;
128 dataType mulFactor = 1.0 / U[k + k * orderStride];
129 for (row = k + 1; row < order; row++) {
130 U[k + row * orderStride] *= mulFactor;
134 for (row = k + 1; row < order; row++) {
135 for (col = k + 1; col < order; col++) {
136 U[col + row * orderStride] -= U[k + row * orderStride] * U[col + k * orderStride];
142 for (row = 0; row < order; row++) {
143 for (col = 0; col < order; col++) {
145 L[col + row * orderStride] = 0;
149 L[col + row * orderStride] = 1;
152 L[col + row * orderStride] = U[col + row * orderStride];
153 U[col + row * orderStride] = 0;
168 const int32_t strideOrder,
169 const int32_t strideP);
175 const int32_t strideOrder,
176 const int32_t strideP);
178 template <
typename dataType>
190 int32_t order = pKerPrivArgs->
order;
192 int32_t strideP = pKerPrivArgs->
strideP;
195 dataType * pALocal = (dataType *) pA;
196 dataType * pLLocal = (dataType *) pL;
197 dataType * pULocal = (dataType *) pU;
198 unsigned short *pPLocal = (
unsigned short *) pP;
200 DSPLIB_DEBUGPRINTFN(0,
"pALocal: %p pLLocal: %p pULocal: %p pPLocal: %p\n", pALocal, pLLocal, pULocal, pPLocal);
202 DSPLIB_lud_cn<dataType>(order, pALocal, pLLocal, pULocal, pPLocal, strideOrder, strideP);
int DSPLIB_lud_cn(int order, dataType *A, dataType *L, dataType *U, unsigned short *P, const int32_t strideOrder, const int32_t strideP)
template int DSPLIB_lud_cn< float >(int order, float *A, float *L, float *U, unsigned short *P, const int32_t strideOrder, const int32_t strideP)
template DSPLIB_STATUS DSPLIB_lud_exec_cn< float >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pL, void *restrict pU, void *restrict pP)
DSPLIB_STATUS DSPLIB_lud_init_cn(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsA, const DSPLIB_bufParams2D_t *bufParamsL, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsP, const DSPLIB_ludInitArgs *pKerInitArgs)
This function is the initialization function for the natural C implementation of the kernel....
template DSPLIB_STATUS DSPLIB_lud_exec_cn< double >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pL, void *restrict pU, void *restrict pP)
DSPLIB_STATUS DSPLIB_lud_exec_cn(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pL, void *restrict pU, void *restrict pP)
This function is the main execution function for the natural C implementation of the kernel....
template int DSPLIB_lud_cn< double >(int order, double *A, double *L, double *U, unsigned short *P, const int32_t strideOrder, const int32_t strideP)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_lud.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t order
Size of input buffer for different batches DSPLIB_lud_init that will be retrieved and used by DSPLIB_...
int32_t strideOrder
Stride between rows of input and output data matrix
int32_t strideP
Stride between rows of output data matrix P