55 #define MAX_ITERATION_COUNT 30
63 template <
typename dataType>
74 DSPLIB_bidiag_u_init_ci<dataType>(handle);
75 DSPLIB_bidiag_v_init_ci<dataType>(handle);
77 DSPLIB_bidiag_uFinal_init_ci<dataType>(handle);
79 DSPLIB_diag_proc_init_ci<dataType>(handle);
81 DSPLIB_singularSort_swap_init_ci<dataType>(handle);
83 DSPLIB_svd_matTrans_init_ci<dataType>(handle, pKerInitArgs);
111 template <
typename dataType>
118 const int colUStride,
119 const int colVStride,
120 uint32_t enableReducedForm,
125 dataType s, scale, half_norm_squared;
130 for (i = 0; i < Ncols; i++) {
131 superdiag[i] = scale * s;
133 scale = DSPLIB_bidiag_uCol_halfnorm_ci<dataType>(&U[i + i * colUStride], Nrows - i, Ncols, colUStride,
134 &half_norm_squared, U1, &s, pBlock);
135 U[i + i * colUStride] = U[i + i * colUStride] - (s * scale);
136 if (Ncols - (i + 1) > 0 && scale > 0) {
137 DSPLIB_bidiag_uCol_ci<dataType>(&U[i + i * colUStride], Nrows - i, Ncols - (i + 1), colUStride,
138 half_norm_squared, U1, scale, pBlock);
144 if ((i != Ncols - 1)) {
145 scale = DSPLIB_bidiag_uRow_halfnorm_ci<dataType>(&U[(i + 1) + (i * colUStride)], Nrows - i, Ncols - (i + 1),
146 colUStride, &half_norm_squared, U1, &s, &superdiag[i + 1],
149 DSPLIB_bidiag_uRow_ci<dataType>(&U[(i + 1) + (i * colUStride)], Nrows - (i + 1), Ncols - (i + 1),
150 colUStride, &superdiag[i + 1], U1, scale, pBlock);
156 V[(Ncols - 1) + (Ncols - 1) * colVStride] = 1;
157 s = superdiag[Ncols - 1];
158 for (i = Ncols - 2; i >= 0; i--) {
159 DSPLIB_bidiag_v_ci<dataType>(&V[i + (i) *colVStride], Ncols - i - 1, Ncols - i, colVStride, s, U1,
160 &U[(i) + i * colUStride], colUStride, pBlock);
163 if (enableReducedForm == 0u) {
167 DSPLIB_bidiag_uFinal_expand_ci<dataType>(U, Nrows, Ncols, colUStride, pBlock);
172 if (Nrows - (i + 1) > 0) {
178 U[i + i * colUStride] += 1;
182 for (i = Ncols - 2; i >= 0; i--) {
186 U[i + i * colUStride] += 1;
193 for (i = Ncols - 1; i >= 0; i--) {
195 if (i != Ncols - 1) {
200 U[i + i * colUStride] += 1;
211 const int colUStride,
212 const int colVStride,
213 uint32_t enableReducedForm,
222 const int colUStride,
223 const int colVStride,
224 uint32_t enableReducedForm,
228 template <
typename dataType>
static inline dataType
getSqrt(dataType a)
230 const dataType Half = 0.5f;
231 const dataType OneP5 = 1.5f;
240 x = x * (OneP5 - (a * x * x * Half));
241 x = x * (OneP5 - (a * x * x * Half));
251 template <
typename dataType>
static inline dataType
getRecipSqrt(dataType a)
253 const dataType Half = 0.5f;
254 const dataType OneP5 = 1.5f;
264 x = x * (OneP5 - (a * x * x * Half));
265 x = x * (OneP5 - (a * x * x * Half));
273 template <
typename dataType>
281 const int colUStride,
282 const int rowUStride,
283 const int colVStride,
284 const int rowVStride,
285 uint32_t enableReducedForm,
288 int i, k, rotation_test, iter, total_iter;
290 dataType x, y, z, epsilon;
291 dataType c, s, f, g, h;
292 dataType *cU = &pTemp[0 * Ncols];
293 dataType *sU = &pTemp[1 * Ncols];
294 dataType *cV = &pTemp[2 * Ncols];
295 dataType *sV = &pTemp[3 * Ncols];
304 for (k = Ncols - 1; k >= 0; k--) {
315 for (i = m; i <= k; i++) {
316 f = s * superdiag[i];
317 superdiag[i] = c * superdiag[i];
318 #if !defined(ENABLE_LDRA_COVERAGE)
322 if (fabs(f) <= epsilon) {
338 for (i = 0; i <= loopCnt - 1; i++) {
339 for (row = 0; row < Nrows; row++) {
340 y = U[(m - 1) * rowUStride + row];
341 z = U[(i + m) * rowUStride + row];
342 U[(m - 1) * rowUStride + row] = y * cU[i] + z * sU[i];
343 U[(i + m) * rowUStride + row] = -y * sU[i] + z * cU[i];
347 DSPLIB_diag_rotation_proc_ci<dataType>(&U[(m - 1) * rowUStride], m, loopCnt, Nrows, rowUStride, cU, sU,
361 #if !defined(ENABLE_LDRA_COVERAGE)
373 g = superdiag[k - 1];
375 f = ((y - z) * (y + z) + (g - h) * (g + h)) *
getRecip((2 * h * y));
386 uint32_t loopCnt = (k - m);
389 dataType c1, s1, c2, s2, c3, s3, recipz;
390 for (; cnt < loopCnt - 1; cnt = cnt + 2) {
391 h = s * superdiag[i];
392 g = superdiag[i] * c;
394 superdiag[i - 1] = z;
402 g = -x * s1 + g * c1;
412 #if !defined(ENABLE_LDRA_COVERAGE)
426 x = -s3 * g + c3 * y;
427 h = s3 * superdiag[i + 1];
428 g = superdiag[i + 1] * c3;
439 g = -x * s2 + g * c2;
440 h = diag[i + 1] * s2;
441 y = c2 * diag[i + 1];
449 #if !defined(ENABLE_LDRA_COVERAGE)
477 superdiag[i - 1] = z;
489 #if !defined(ENABLE_LDRA_COVERAGE)
504 DSPLIB_diag_sqrt_ci<dataType>(&superdiag[m], &diag[m], (k - m), pBlock);
523 const int colUStride,
524 const int rowUStride,
525 const int colVStride,
526 const int rowVStride,
527 uint32_t enableReducedForm,
536 const int colUStride,
537 const int rowUStride,
538 const int colVStride,
539 const int rowVStride,
540 uint32_t enableReducedForm,
543 template <
typename dataType>
550 dataType *singular_values,
552 const int colUStride,
553 const int rowUStride,
554 const int colVStride,
555 const int rowVStride,
556 uint32_t enableReducedForm,
559 int32_t *maxIndArr = (int32_t *) &pScratch[1 * Ncols];
560 dataType *sortedSingular = &pScratch[2 * Ncols];
567 DSPLIB_svd_blk_move_ci<dataType>(sortedSingular, singular_values, 1, Ncols, 0, 0, pBlock);
577 DSPLIB_svd_blk_move_ci<dataType>(U, U1, Ncols, Nrows, colUStride, rowUStride, pBlock);
578 DSPLIB_svd_blk_move_ci<dataType>(V, V1, Ncols, Ncols, colVStride, rowVStride, pBlock);
588 float *singular_values,
590 const int colUStride,
591 const int rowUStride,
592 const int colVStride,
593 const int rowVStride,
594 uint32_t enableReducedForm,
602 double *singular_values,
604 const int colUStride,
605 const int rowUStride,
606 const int colVStride,
607 const int rowVStride,
608 uint32_t enableReducedForm,
611 template <
typename dataType>
616 void *restrict pDiag,
617 void *restrict pSuperDiag,
620 void *restrict pScratch)
634 dataType *pALocal = (dataType *) pA;
635 dataType *pULocal = (dataType *) pU;
636 dataType *pVLocal = (dataType *) pV;
637 dataType *pDiagLocal = (dataType *) pDiag;
638 dataType *pSuperDiagLocal = (dataType *) pSuperDiag;
639 dataType *pU1Local = (dataType *) pU1;
640 dataType *pV1Local = (dataType *) pV1;
641 dataType *pScratchLocal = (dataType *) pScratch;
645 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
646 uint32_t Nrows = pKerPrivArgs->
heightIn;
647 uint32_t Ncols = pKerPrivArgs->
widthIn;
648 int32_t strideIn = pKerPrivArgs->
strideIn;
649 int32_t strideU = pKerPrivArgs->
strideU;
651 int32_t strideV = pKerPrivArgs->
strideV;
662 if (Nrows >= Ncols) {
671 int32_t dataSize =
sizeof(dataType);
672 int32_t colUStride = strideU / dataSize;
673 int32_t rowUStride = strideURows / dataSize;
674 int32_t colVStride = strideV / dataSize;
675 int32_t rowVStride = strideVRows / dataSize;
676 int32_t colAStride = strideIn / dataSize;
677 if (Nrows >= Ncols) {
679 DSPLIB_svd_blk_move_ci<dataType>(pULocal, pALocal, Nrows1, Ncols1, colUStride, colAStride, pBlock);
683 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs1, pALocal, pULocal);
689 DSPLIB_svd_convert_to_bidiag_ci<dataType>(Nrows1, Ncols1, pULocal, pV1Local, pDiagLocal, pSuperDiagLocal, colUStride,
690 colVStride, enableReducedForm, pScratchLocal, pBlock);
695 if (enableReducedForm == 0u) {
696 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs2, pULocal, pU1Local);
699 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs3, pULocal, pU1Local);
702 #if !defined(ENABLE_LDRA_COVERAGE)
703 int svd_status = DSPLIB_svd_bidiag_to_diag_ci<dataType>(Nrows1, Ncols1, pU1Local, pV1Local, pDiagLocal, pSuperDiagLocal,
704 pScratchLocal, colUStride, rowUStride, colVStride, rowVStride,
705 enableReducedForm, pBlock);
707 DSPLIB_svd_bidiag_to_diag_ci<dataType>(Nrows1, Ncols1, pU1Local, pV1Local, pDiagLocal, pSuperDiagLocal,
708 pScratchLocal, colUStride, rowUStride, colVStride, rowVStride,
709 enableReducedForm, pBlock);
715 DSPLIB_svd_sort_singular_values_ci<dataType>(Nrows1, Ncols1, pU1Local, pULocal, pV1Local, pVLocal, pDiagLocal,
716 pScratchLocal, colUStride, rowUStride, colVStride, rowVStride,
717 enableReducedForm, pBlock);
719 if (enableReducedForm == 0u) {
720 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs2, pU1Local, pULocal);
723 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs4, pU1Local, pULocal);
725 DSPLIB_matTrans_exec_ci<dataType>(pMatTransKerPrivArgs5, pV1Local, pVLocal);
731 if (enableReducedForm == 0u) {
732 DSPLIB_svd_blk_move_ci<dataType>(pU1Local, pVLocal, Nrows, Nrows, rowUStride, colVStride, pBlock);
733 DSPLIB_svd_blk_move_ci<dataType>(pVLocal, pULocal, Ncols, Ncols, colVStride, colUStride, pBlock);
734 DSPLIB_svd_blk_move_ci<dataType>(pULocal, pU1Local, Nrows, Nrows, colUStride, rowUStride, pBlock);
737 DSPLIB_svd_blk_move_ci<dataType>(pU1Local, pVLocal, Nrows, Nrows, rowUStride, colVStride, pBlock);
738 DSPLIB_svd_blk_move_ci<dataType>(pVLocal, pULocal, Ncols, Nrows, colVStride, colUStride, pBlock);
739 DSPLIB_svd_blk_move_ci<dataType>(pULocal, pU1Local, Nrows, Nrows, colUStride, rowUStride, pBlock);
743 #if !defined(ENABLE_LDRA_COVERAGE)
756 void *restrict pDiag,
757 void *restrict pSuperDiag,
760 void *restrict pScratch);
766 void *restrict pDiag,
767 void *restrict pSuperDiag,
770 void *restrict pScratch);
void DSPLIB_bidiag_uFinal_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, dataType s, dataType *U1, uint8_t *pBlock)
This function implements the process corresponding to the "update U" loop in natural implementation.
void DSPLIB_bidiag_uFinal_initalize_ci(dataType *U, int32_t Nrows, int32_t Ncols, int32_t colUStride, dataType s, dataType *U1, uint8_t *pBlock)
This function implements the process corresponding to the "initial U" loop in natural implementation.
void DSPLIB_bidiag_uFinal_normalize_ci(dataType *U, int32_t Nrows, dataType s, int32_t colUStride, uint8_t *pBlock)
This function normalizes the column of input matrix U.
template int DSPLIB_svd_bidiag_to_diag_ci< float >(const int Nrows, const int Ncols, float *U, float *V, float *diag, float *superdiag, float *pTemp, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_svd_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
DSPLIB_STATUS DSPLIB_svd_exec_ci(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
This function is the main execution function for the C7x implementation of the kernel....
template int DSPLIB_svd_bidiag_to_diag_ci< double >(const int Nrows, const int Ncols, double *U, double *V, double *diag, double *superdiag, double *pTemp, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template float getSqrt< float >(float a)
template int DSPLIB_svd_sort_singular_values_ci< double >(const int Nrows, const int Ncols, double *U, double *U1, double *V, double *V1, double *singular_values, double *pScratch, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template int DSPLIB_svd_convert_to_bidiag_ci< float >(const int Nrows, const int Ncols, float *U, float *V, float *diag, float *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm, float *U1, uint8_t *pBlock)
template DSPLIB_STATUS DSPLIB_svd_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svdInitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_svd_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pA, void *restrict pU, void *restrict pV, void *restrict pDiag, void *restrict pSuperDiag, void *restrict pU1, void *restrict pV1, void *restrict pScratch)
static dataType getRecipSqrt(dataType a)
static dataType getSqrt(dataType a)
int DSPLIB_svd_sort_singular_values_ci(const int Nrows, const int Ncols, dataType *U, dataType *U1, dataType *V, dataType *V1, dataType *singular_values, dataType *pScratch, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template int DSPLIB_svd_sort_singular_values_ci< float >(const int Nrows, const int Ncols, float *U, float *U1, float *V, float *V1, float *singular_values, float *pScratch, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template double getRecipSqrt< double >(double a)
int DSPLIB_svd_bidiag_to_diag_ci(const int Nrows, const int Ncols, dataType *U, dataType *V, dataType *diag, dataType *superdiag, dataType *pTemp, const int colUStride, const int rowUStride, const int colVStride, const int rowVStride, uint32_t enableReducedForm, uint8_t *pBlock)
template int DSPLIB_svd_convert_to_bidiag_ci< double >(const int Nrows, const int Ncols, double *U, double *V, double *diag, double *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm, double *U1, uint8_t *pBlock)
int DSPLIB_svd_convert_to_bidiag_ci(const int Nrows, const int Ncols, dataType *U, dataType *V, dataType *diag, dataType *superdiag, const int colUStride, const int colVStride, uint32_t enableReducedForm, dataType *U1, uint8_t *pBlock)
DSPLIB_STATUS DSPLIB_svd_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svdInitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_svd_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsU, const DSPLIB_bufParams2D_t *bufParamsV, const DSPLIB_bufParams1D_t *bufParamsDiag, const DSPLIB_bufParams1D_t *bufParamsSuperDiag, const DSPLIB_svdInitArgs *pKerInitArgs)
template float getRecipSqrt< float >(float a)
template double getSqrt< double >(double a)
#define MAX_ITERATION_COUNT
void DSPLIB_diag_negate_v_ci(dataType *V, int32_t Ncols, int32_t colVStride, uint8_t *pBlock)
Negates the values of a row in V.
void DSPLIB_diag_rotation_check_ci(dataType *diag, dataType *superdiag, dataType epsilon, int32_t *m, int32_t *rotation_test, int32_t Ncols, uint8_t *pBlock)
Updates values of "m" and "rotation_test" flag vased on the values present in "diag",...
void DSPLIB_diag_epsilon_ci(dataType *diag, dataType *superdiag, dataType *epsilon, int32_t Ncols, uint8_t *pBlock)
Updates "epsilon" value based on absolute max values from "diag" and "superdiag" vectors.
void DSPLIB_diag_proc_ci(dataType *V, int32_t startRow, int32_t Nrows, int32_t Ncols, int32_t rowVStride, dataType *cV, dataType *sV, uint8_t *pBlock)
Updates rows of V' and U' based on the precalculated cV/cU and sV/sU vectors.
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
void DSPLIB_singularSort_index_ci(dataType *singular_values, dataType *singularBuffer, int32_t *maxIndArr, int32_t Ncols, uint8_t *pBlock)
This function sorts the singular values in descending order and also records the max index values for...
dataType getRecip(dataType value)
void DSPLIB_singularSort_swap_ci(dataType *V, int32_t Nrows, int32_t Ncols, int32_t rowVStride, int32_t *sortIndex, dataType *vBuff, uint8_t *pBlock)
This function uses the max index values calculated from DSPLIB_singularSort_index_ci to shuffle the r...
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
Structure that is reserved for internal use by the kernel.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint32_t widthIn
Size of input buffer for different batches DSPLIB_svd_init that will be retrieved and used by DSPLIB_...
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs5
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs2
uint32_t strideU
Stride between rows of U matrix
uint8_t bufPblock[DSPLIB_SVD_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs4
uint32_t enableReducedForm
Flag for enabling the calculation of reduced form enableReducedForm = 1 for reduced form SVD calc ena...
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs1
Privargs for the matTrans kernel.
int32_t strideIn
Stride between rows of input data matrix
uint32_t strideV
Stride between rows of V matrix
DSPLIB_matTrans_PrivArgs pMatTransKerPrivArgs3
uint32_t heightIn
Height of input data matrix