43 template <
typename dataType>
74 template <
typename dataType>
79 void *restrict pLocalInvAScracth,
80 void *restrict pScratch)
87 int32_t strideQ = pKerPrivArgs->
strideQ;
88 int32_t strideR = pKerPrivArgs->
strideR;
89 int32_t heightR = pKerPrivArgs->
heightR;
90 int32_t widthR = pKerPrivArgs->
widthR;
92 int32_t dataSize =
sizeof(dataType);
95 dataType *pLocalQ = (dataType *) pQ;
96 dataType *pLocalR = (dataType *) pR;
97 dataType *pLocalInvA = (dataType *) pInvA;
99 int32_t colRstride = strideR / dataSize;
100 int32_t colQstride = strideQ / dataSize;
101 int32_t colInvAStride = strideInvA / dataSize;
110 DSPLIB_DEBUGPRINTFN(0,
"pLocalQ: %p pLocalR: %p pLocalInvA: %p widthR: %d heightR: %d\n", pLocalQ, pLocalR,
111 pLocalInvA, widthR, heightR);
118 for (row = 0; row < heightR; row++) {
119 for (col = 0; col < widthR; col++) {
121 pLocalInvA[col + row * colInvAStride] = 1.0;
124 pLocalInvA[col + row * colInvAStride] = 0.0;
130 for (col = widthR - 1; col >= 1; col--) {
131 for (row = col - 1; row >= 0; row--) {
132 factor = pLocalR[col + row * colRstride] / pLocalR[col + col * colRstride];
133 for (k = 0; k < widthR; k++) {
134 pLocalInvA[k + row * colInvAStride] -= factor * pLocalInvA[k + col * colInvAStride];
135 pLocalR[k + row * colRstride] -= factor * pLocalR[k + col * colRstride];
141 for (row = heightR - 1; row >= 0; row--) {
142 factor = pLocalR[row + row * colRstride];
143 for (col = 0; col < widthR; col++) {
144 pLocalInvA[col + row * colInvAStride] /= factor;
145 pLocalR[col + row * colRstride] /= factor;
150 for (row = 0; row < heightR; row++) {
151 for (col = 0; col < widthR; col++) {
153 for (k = 0; k < widthR; k++) {
154 sum += pLocalInvA[k + row * colInvAStride] * pLocalQ[k + col * colQstride];
158 for (col = 0; col < widthR; col++) {
159 pLocalInvA[col + row * colInvAStride] = pLocalR[col];
172 void *restrict pInvA,
173 void *restrict pLocalInvAScracth,
174 void *restrict pScratch);
179 void *restrict pInvA,
180 void *restrict pLocalInvAScracth,
181 void *restrict pScratch);
template DSPLIB_STATUS DSPLIB_qrd_inverse_exec_cn< float >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pInvA, void *restrict pLocalInvAScracth, void *restrict pScratch)
DSPLIB_STATUS DSPLIB_qrd_inverse_init_cn(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams2D_t *bufParamsInvA, DSPLIB_bufParams2D_t *bufParamsInvAFinal, const DSPLIB_qrdInvInitArgs *pKerInitArgs)
This function is the initialization function for the natural C implementation of the kernel....
template DSPLIB_STATUS DSPLIB_qrd_inverse_exec_cn< double >(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pInvA, void *restrict pLocalInvAScracth, void *restrict pScratch)
DSPLIB_STATUS DSPLIB_qrd_inverse_exec_cn(DSPLIB_kernelHandle handle, void *restrict pQ, void *restrict pR, void *restrict pInvA, void *restrict pLocalInvAScracth, void *restrict pScratch)
This function is the main execution function for the natural C implementation of the kernel....
template DSPLIB_STATUS DSPLIB_qrd_inverse_init_cn< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams2D_t *bufParamsInvA, DSPLIB_bufParams2D_t *bufParamsInvAFinal, const DSPLIB_qrdInvInitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_qrd_inverse_init_cn< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams2D_t *bufParamsInvA, DSPLIB_bufParams2D_t *bufParamsInvAFinal, const DSPLIB_qrdInvInitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_qrd_inverse.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint32_t heightR
Height of input data matrix
int32_t strideR
Stride between rows of R output data matrix
uint32_t widthR
Size of input buffer for different batches DSPLIB_qrd_inverse_init that will be retrieved and used by...
int32_t strideInvA
Stride between rows of input data matrix
int32_t strideQ
Stride between rows of Q output data matrix