44 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
45 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
46 __SA_TEMPLATE_v1 sa2Params = __gen_SA_TEMPLATE_v1();
47 __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
49 typedef typename c7x::make_full_vector<dataType>::type vec;
50 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
51 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
52 int32_t eleCount = c7x::element_count_of<vec>::value;
54 se0Params.DIMFMT = __SE_DIMFMT_2D;
55 se0Params.ELETYPE = SE_ELETYPE;
56 se0Params.VECLEN = SE_VECLEN;
58 se1Params.ICNT0 = eleCount;
59 se1Params.DIM1 = eleCount;
61 se1Params.DIMFMT = __SE_DIMFMT_3D;
62 se1Params.ELETYPE = SE_ELETYPE;
63 se1Params.VECLEN = SE_VECLEN;
64 se1Params.DECDIM1 = __SE_DECDIM_DIM1;
66 sa2Params.DIMFMT = __SA_DIMFMT_1D;
67 sa2Params.VECLEN = __SA_VECLEN_1ELEM;
69 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (22 * SE_PARAM_SIZE)) = se0Params;
70 *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (23 * SE_PARAM_SIZE)) = sa2Params;
71 *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (24 * SE_PARAM_SIZE)) = se1Params;
88 template <
typename dataType>
90 dataType *singularBuffer,
97 __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (24 * SE_PARAM_SIZE));
99 typedef typename c7x::make_full_vector<dataType>::type vec;
100 int32_t eleCount = c7x::element_count_of<vec>::value;
102 int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
104 se0Params.ICNT1 = nVec;
105 se0Params.ICNT2 = Ncols;
106 se0Params.DECDIM1_WIDTH = Ncols;
108 __SE0_OPEN(singularBuffer, se0Params);
112 vec idx_0_to_eleCount;
113 for (int32_t i = 0; i < eleCount; i++) {
114 idx_0_to_eleCount.s[i] = i;
117 for (int32_t i = 0; i < Ncols; i++) {
118 __SE0_OPEN(singularBuffer, se0Params);
119 vec vCurrIdx = idx_0_to_eleCount;
120 vec vIdx = idx_0_to_eleCount;
121 vec maxValVec = (vec) (-std::numeric_limits<dataType>::max());
123 for (int32_t k = 0; k < nVec; k++) {
124 vec v1 = c7x::strm_eng<0, vec>::get_adv();
126 __vpred vpMask = __cmp_le_pred(v1, maxValVec);
127 maxValVec = __select(vpMask, maxValVec, v1);
128 vIdx = __select(vpMask, vIdx, vCurrIdx);
130 vCurrIdx = vCurrIdx + (eleCount);
132 c7x_horizontal_max_with_index(maxValVec, vIdx, &maxVal, &maxInd);
134 __vpred minIdxPred = __cmp_eq_pred((vec)maxVal, maxValVec);
135 vec equalValIdx = __select(minIdxPred, vIdx, (vec)std::numeric_limits<dataType>::max());
136 maxInd = (int32_t)c7x_horizontal_min_fp<dataType, vec>(equalValIdx);
138 singularBuffer[maxInd] = -std::numeric_limits<dataType>::max();
139 maxIndArr[maxInd] = i;
140 singular_values[i] = maxVal;
148 float *singularBuffer,
153 double *singularBuffer,
163 template <
typename dataType>
174 typedef typename c7x::make_full_vector<dataType>::type vec;
175 int32_t eleCount = c7x::element_count_of<vec>::value;
177 __SE_TEMPLATE_v1 se0Params, se1Params;
178 __SA_TEMPLATE_v1 sa0Params, sa1Params, sa2Params, sa3Params;
179 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (22 * SE_PARAM_SIZE));
180 se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + (22 * SE_PARAM_SIZE));
181 sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
182 sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (1 * SE_PARAM_SIZE));
183 sa2Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (23 * SE_PARAM_SIZE));
184 sa3Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + (23 * SE_PARAM_SIZE));
186 int32_t nVec = DSPLIB_ceilingDiv(Ncols, eleCount);
187 int32_t se0ICNT1 = Nrows / 2;
188 int32_t se1ICNT1 = Nrows - se0ICNT1;
190 se0Params.ICNT0 = sa0Params.ICNT0 = Ncols;
191 se0Params.ICNT1 = sa0Params.ICNT1 = se0ICNT1;
193 se1Params.ICNT0 = sa1Params.ICNT0 = Ncols;
194 se1Params.ICNT1 = sa1Params.ICNT1 = se1ICNT1;
196 se0Params.DIM1 = se1Params.DIM1 = rowVStride;
198 sa2Params.ICNT0 = se0ICNT1;
199 sa3Params.ICNT0 = se1ICNT1;
201 __SE1_OPEN(V + (rowVStride * se0ICNT1), se1Params);
202 __SA1_OPEN(sa1Params);
203 __SA3_OPEN(sa3Params);
206 __SE0_OPEN(V, se0Params);
207 __SA0_OPEN(sa0Params);
208 __SA2_OPEN(sa2Params);
210 for (int32_t vertical = 0; vertical < se0ICNT1; vertical++) {
211 int32_t *pIndex1 = c7x::strm_agen<2, int32_t>::get_adv(sortIndex);
212 int32_t *pIndex2 = c7x::strm_agen<3, int32_t>::get_adv(sortIndex + se0ICNT1);
213 int32_t offset1 = *pIndex1 * rowVStride;
214 int32_t offset2 = *pIndex2 * rowVStride;
216 for (int32_t horizontal = 0; horizontal < nVec; horizontal++) {
217 vec v1 = c7x::strm_eng<0, vec>::get_adv();
218 vec v2 = c7x::strm_eng<1, vec>::get_adv();
220 __vpred pred1 = c7x::strm_agen<0, vec>::get_vpred();
221 vec *pStore1 = c7x::strm_agen<0, vec>::get_adv(vBuff + offset1);
222 __vstore_pred(pred1, pStore1, v1);
224 __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
225 vec *pStore2 = c7x::strm_agen<1, vec>::get_adv(vBuff + offset2);
226 __vstore_pred(pred2, pStore2, v2);
231 if (se0ICNT1 != se1ICNT1) {
232 int32_t *pIndex2 = c7x::strm_agen<3, int32_t>::get_adv(sortIndex + se0ICNT1);
233 int32_t offset2 = *pIndex2 * rowVStride;
234 for (int32_t horizontal = 0; horizontal < nVec; horizontal++) {
235 vec v2 = c7x::strm_eng<1, vec>::get_adv();
237 __vpred pred2 = c7x::strm_agen<1, vec>::get_vpred();
238 vec *pStore2 = c7x::strm_agen<1, vec>::get_adv(vBuff + offset2);
239 __vstore_pred(pred2, pStore2, v2);
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.
template void DSPLIB_singularSort_index_ci< double >(double *singular_values, double *singularBuffer, int32_t *maxIndArr, int32_t Ncols, uint8_t *pBlock)
void DSPLIB_singularSort_swap_init_ci(DSPLIB_kernelHandle handle)
void DSPLIB_singularSort_index_ci(dataType *singular_values, dataType *singularBuffer, int32_t *maxIndArr, int32_t Ncols, uint8_t *pBlock)
This function sorts the singular values in descending order and also records the max index values for...
template void DSPLIB_singularSort_swap_ci< double >(double *V, int32_t Nrows, int32_t Ncols, int32_t rowVStride, int32_t *sortIndex, double *vBuff, uint8_t *pBlock)
template void DSPLIB_singularSort_swap_init_ci< float >(DSPLIB_kernelHandle handle)
template void DSPLIB_singularSort_swap_ci< float >(float *V, int32_t Nrows, int32_t Ncols, int32_t rowVStride, int32_t *sortIndex, float *vBuff, uint8_t *pBlock)
template void DSPLIB_singularSort_swap_init_ci< double >(DSPLIB_kernelHandle handle)
template void DSPLIB_singularSort_index_ci< float >(float *singular_values, float *singularBuffer, int32_t *maxIndArr, int32_t Ncols, uint8_t *pBlock)
void DSPLIB_singularSort_swap_ci(dataType *V, int32_t Nrows, int32_t Ncols, int32_t rowVStride, int32_t *sortIndex, dataType *vBuff, uint8_t *pBlock)
This function uses the max index values calculated from DSPLIB_singularSort_index_ci to shuffle the r...
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_SVD_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters