56 #if (__C7X_VEC_SIZE_BITS__ == 512)
58 template <
typename dataType>
59 inline void sort_singular_vals(dataType *diag,
65 const int colVStride);
68 inline void sort_singular_vals<float>(
float *diag,
80 inline void sort_singular_vals<double>(
double *singular_values,
89 typedef typename c7x::make_full_vector<double>::type vec_dp;
90 typedef typename c7x::make_full_vector<float>::type vec_sp;
92 __vpred pred_diag_elems = __mask_long((uint32_t) Ncols);
94 vec_dp vec_diag = __vload_pred(pred_diag_elems, (vec_dp *) singular_values);
96 vec_sp vec_diag_temp = __double_to_float(vec_diag);
98 vec_sp test_vec_1 = c7x::as_float_vec(__shift_left_full(c7x::as_ulong_vec(vec_diag_temp), 32u));
100 vec_sp test_vec_2 = vec_diag_temp + test_vec_1;
102 c7x::uchar_vec vec_rearrange = __sort_desc_perm(test_vec_2);
104 vec_rearrange = c7x::as_uchar_vec(__swap(c7x::as_uint_vec(vec_rearrange)));
106 vec_sp vec_sorted_temp = __sort_desc(test_vec_2);
107 vec_dp vec_sorted_sv = __low_float_to_double(vec_sorted_temp);
108 __vstore_pred(pred_diag_elems, (vec_dp *)singular_values, vec_sorted_sv);
114 vec_dp vec_v_in0 = __vload_pred(pred_diag_elems, (vec_dp *) (V + 0 * colVStride));
115 vec_dp vec_v_in1 = __vload_pred(pred_diag_elems, (vec_dp *) (V + 1 * colVStride));
116 vec_dp vec_v_in2 = __vload_pred(pred_diag_elems, (vec_dp *) (V + 2 * colVStride));
117 vec_dp vec_v_in3 = __vload_pred(pred_diag_elems, (vec_dp *) (V + 3 * colVStride));
118 vec_dp vec_v_in4 = __vload_pred(pred_diag_elems, (vec_dp *) (V + 4 * colVStride));
119 vec_dp vec_v_in5 = __vload_pred(pred_diag_elems, (vec_dp *) (V + 5 * colVStride));
121 vec_v_in0 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_v_in0)));
122 vec_v_in1 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_v_in1)));
123 vec_v_in2 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_v_in2)));
124 vec_v_in3 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_v_in3)));
125 vec_v_in4 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_v_in4)));
126 vec_v_in5 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_v_in5)));
128 __vstore_pred(pred_diag_elems, (vec_dp *)(V + 0 * colVStride), vec_v_in0);
129 __vstore_pred(pred_diag_elems, (vec_dp *)(V + 1 * colVStride), vec_v_in1);
130 __vstore_pred(pred_diag_elems, (vec_dp *)(V + 2 * colVStride), vec_v_in2);
131 __vstore_pred(pred_diag_elems, (vec_dp *)(V + 3 * colVStride), vec_v_in3);
132 __vstore_pred(pred_diag_elems, (vec_dp *)(V + 4 * colVStride), vec_v_in4);
133 __vstore_pred(pred_diag_elems, (vec_dp *)(V + 5 * colVStride), vec_v_in5);
140 vec_dp vec_u_in0 = __vload_pred(pred_diag_elems, (vec_dp *) (U + 0 * colUStride));
141 vec_dp vec_u_in1 = __vload_pred(pred_diag_elems, (vec_dp *) (U + 1 * colUStride));
142 vec_dp vec_u_in2 = __vload_pred(pred_diag_elems, (vec_dp *) (U + 2 * colUStride));
143 vec_dp vec_u_in3 = __vload_pred(pred_diag_elems, (vec_dp *) (U + 3 * colUStride));
144 vec_dp vec_u_in4 = __vload_pred(pred_diag_elems, (vec_dp *) (U + 4 * colUStride));
145 vec_dp vec_u_in5 = __vload_pred(pred_diag_elems, (vec_dp *) (U + 5 * colUStride));
147 vec_u_in0 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_u_in0)));
148 vec_u_in1 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_u_in1)));
149 vec_u_in2 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_u_in2)));
150 vec_u_in3 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_u_in3)));
151 vec_u_in4 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_u_in4)));
152 vec_u_in5 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_u_in5)));
154 __vstore_pred(pred_diag_elems, (vec_dp *)(U + 0 * colUStride), vec_u_in0);
155 __vstore_pred(pred_diag_elems, (vec_dp *)(U + 1 * colUStride), vec_u_in1);
156 __vstore_pred(pred_diag_elems, (vec_dp *)(U + 2 * colUStride), vec_u_in2);
157 __vstore_pred(pred_diag_elems, (vec_dp *)(U + 3 * colUStride), vec_u_in3);
158 __vstore_pred(pred_diag_elems, (vec_dp *)(U + 4 * colUStride), vec_u_in4);
159 __vstore_pred(pred_diag_elems, (vec_dp *)(U + 5 * colUStride), vec_u_in5);
163 vec_dp vec_u_in6 = __vload_pred(pred_diag_elems, (vec_dp *) (U + 6 * colUStride));
164 vec_u_in6 = c7x::as_double_vec(__permute(vec_rearrange, c7x::as_uchar_vec(vec_u_in6)));
165 __vstore_pred(pred_diag_elems, (vec_dp *)(U + 6 * colUStride), vec_u_in6);
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_svd.