43 template <
typename dataType>
74 template <
typename dataType>
76 const void *restrict pA,
77 const void *restrict pQ,
78 const void *restrict pR,
79 const void *restrict pU,
80 const void *restrict pScratch)
87 int32_t nRows = pKerPrivArgs->
heightA;
88 int32_t nCols = pKerPrivArgs->
widthA;
89 int32_t strideA = pKerPrivArgs->
strideA;
90 int32_t strideQ = pKerPrivArgs->
strideQ;
91 int32_t strideR = pKerPrivArgs->
strideR;
92 int32_t dataSize =
sizeof(dataType);
94 int32_t colStrideQ = strideQ / dataSize;
95 int32_t colStrideR = strideR / dataSize;
96 int32_t colStrideA = strideA / dataSize;
110 dataType *pLocalA = (dataType *) pA;
111 dataType *pLocalQ = (dataType *) pQ;
112 dataType *pLocalR = (dataType *) pR;
113 dataType *pLocalU = (dataType *) pU;
115 DSPLIB_DEBUGPRINTFN(0,
"pALocal: %p pLocalQ: %p pLocalR: %p pLocalU: %p nCols: %d nRows: %d\n", pLocalA, pLocalQ,
116 pLocalR, pLocalU, nCols, nRows);
122 memcpy(pLocalR, pLocalA,
sizeof(dataType) * nRows * colStrideA);
126 memset(pLocalQ, 0.0,
sizeof(dataType) * nRows * colStrideQ);
127 for (row = 0; row < nRows; row++) {
128 pLocalQ[row + row * colStrideQ] = 1.0;
131 if (nRows <= nCols) {
132 loopCount = nRows - 2;
135 loopCount = nCols - 1;
138 for (col = 0; col <= loopCount; col++) {
140 for (row = col; row < nRows; row++) {
141 sum += pLocalR[col + (row * colStrideR)] * pLocalR[col + (row * colStrideR)];
145 if (pLocalR[col + (col * colStrideR)] >= 0) {
148 pLocalU[col] = pLocalR[col + (col * colStrideR)] + alpha;
149 pLocalR[col + (col * colStrideR)] = -alpha;
150 norm_sqr = pLocalU[col] * pLocalU[col];
151 for (row = col + 1; row < nRows; row++) {
152 pLocalU[row] = pLocalR[col + (row * colStrideR)];
153 pLocalR[col + (row * colStrideR)] = 0;
154 norm_sqr += pLocalU[row] * pLocalU[row];
156 if (alpha * pLocalU[col] != 0.00) {
157 scale = 1 / (alpha * pLocalU[col]);
159 for (i = col + 1; i < nCols; i++) {
161 for (k = col; k < nRows; k++) {
162 sum += pLocalU[k] * pLocalR[i + (k * colStrideR)];
165 for (k = col; k < nRows; k++) {
166 pLocalR[i + (k * colStrideR)] -= pLocalU[k] * sum;
170 for (i = 0; i < nRows; i++) {
172 for (k = col; k < nRows; k++) {
173 sum += pLocalU[k] * pLocalQ[k + (i * colStrideQ)];
176 for (k = col; k < nRows; k++) {
177 pLocalQ[k + i * colStrideQ] -= pLocalU[k] * sum;
191 const void *restrict pA,
192 const void *restrict pQ,
193 const void *restrict pR,
194 const void *restrict pU,
195 const void *restrict pScratch);
198 const void *restrict pA,
199 const void *restrict pQ,
200 const void *restrict pR,
201 const void *restrict pU,
202 const void *restrict pScratch);
DSPLIB_STATUS DSPLIB_qrd_init_cn(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsA, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsU, const DSPLIB_qrdInitArgs *pKerInitArgs)
This function is the initialization function for the natural C implementation of the kernel....
template DSPLIB_STATUS DSPLIB_qrd_exec_cn< double >(DSPLIB_kernelHandle handle, const void *restrict pA, const void *restrict pQ, const void *restrict pR, const void *restrict pU, const void *restrict pScratch)
template DSPLIB_STATUS DSPLIB_qrd_init_cn< double >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsA, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsU, const DSPLIB_qrdInitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_qrd_exec_cn< float >(DSPLIB_kernelHandle handle, const void *restrict pA, const void *restrict pQ, const void *restrict pR, const void *restrict pU, const void *restrict pScratch)
template DSPLIB_STATUS DSPLIB_qrd_init_cn< float >(DSPLIB_kernelHandle handle, DSPLIB_bufParams2D_t *bufParamsA, DSPLIB_bufParams2D_t *bufParamsQ, DSPLIB_bufParams2D_t *bufParamsR, DSPLIB_bufParams1D_t *bufParamsU, const DSPLIB_qrdInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_qrd_exec_cn(DSPLIB_kernelHandle handle, const void *restrict pA, const void *restrict pQ, const void *restrict pR, const void *restrict pU, const void *restrict pScratch)
This function is the main execution function for the natural C implementation of the kernel....
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_qrd.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t strideR
Stride between rows of R output data matrix
uint32_t heightA
Height of input data matrix
uint32_t widthA
Size of input buffer for different batches DSPLIB_qrd_init that will be retrieved and used by DSPLIB_...
int32_t strideQ
Stride between rows of Q output data matrix
int32_t strideA
Stride between rows of input data matrix