29 #include "../../../common/c71/FFTLIB_debug.h"
30 #include "../../FFTLIB_fft1dBatched_i32fc_c32fc_o32fc/FFTLIB_fft1dBatched_i32fc_c32fc_o32fc.h"
31 #include "../FFTLIB_fft1dBatched_i32f_c32fc_o32fc.h"
32 #include "FFTLIB_types.h"
35 #define SE_PARAM_BASE (0x0000)
36 #define SE0_PARAM_OFFSET (SE_PARAM_BASE)
37 #define SE1_PARAM_OFFSET (SE0_PARAM_OFFSET + SE_PARAM_SIZE)
38 #define SA0_PARAM_OFFSET (SE1_PARAM_OFFSET + SE_PARAM_SIZE)
39 #define SA1_PARAM_OFFSET (SA0_PARAM_OFFSET + SE_PARAM_SIZE)
40 #define SA2_PARAM_OFFSET (SA1_PARAM_OFFSET + SE_PARAM_SIZE)
41 #define SA3_PARAM_OFFSET (SA2_PARAM_OFFSET + SE_PARAM_SIZE)
43 typedef typename c7x::cfloat_vec
CV;
44 typedef typename c7x::float_vec
V;
64 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
65 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1();
66 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
67 __SA_TEMPLATE_v1 sa1_param = __gen_SA_TEMPLATE_v1();
68 __SA_TEMPLATE_v1 sa2_param = __gen_SA_TEMPLATE_v1();
69 __SA_TEMPLATE_v1 sa3_param = __gen_SA_TEMPLATE_v1();
73 bufParamsX_FFT.
dim_x = numChannels * numPoints;
77 (
FFTLIB_F32 *) pXFFT, &bufParamsX_FFT, (numPoints >> 1), numChannels,
78 &((uint8_t *) pBlock)[7 * SE_PARAM_SIZE]);
80 uint32_t numPointsOut = (outmode == 1) ? numPoints : (numPoints >> 1) + 1;
81 uint32_t elementCount = c7x::element_count_of<CV>::value;
82 uint32_t SEBlocks = (numPoints >> 1) / elementCount;
85 se0_param.ICNT0 = elementCount;
86 se0_param.DIM1 = elementCount;
87 se0_param.ICNT1 = SEBlocks;
88 se0_param.DIM2 = (numPoints >> 1);
89 se0_param.ICNT2 = numChannels;
91 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
92 se0_param.VECLEN = c7x::se_veclen<CV>::value;
93 se0_param.DIMFMT = __SE_DIMFMT_3D;
96 se1_param.ICNT0 = elementCount;
97 se1_param.DIM1 = -elementCount;
98 se1_param.ICNT1 = SEBlocks;
99 se1_param.DIM2 = (numPoints >> 1);
100 se1_param.ICNT2 = numChannels;
102 se1_param.DIR = __SE_DIR_DEC;
103 se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
104 se1_param.VECLEN = c7x::se_veclen<CV>::value;
105 se1_param.DIMFMT = __SE_DIMFMT_3D;
106 *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock +
SE1_PARAM_OFFSET)) = se1_param;
109 sa2_param.ICNT0 = elementCount;
110 sa2_param.DIM1 = elementCount;
111 sa2_param.ICNT1 = SEBlocks;
113 sa2_param.ICNT2 = numChannels;
115 sa2_param.VECLEN = c7x::sa_veclen<CV>::value;
116 sa2_param.DIMFMT = __SA_DIMFMT_3D;
117 *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
SA2_PARAM_OFFSET)) = sa2_param;
120 sa3_param.ICNT0 = elementCount;
121 sa3_param.DIM1 = -elementCount;
122 sa3_param.ICNT1 = SEBlocks;
124 sa3_param.ICNT2 = numChannels;
126 sa3_param.VECLEN = c7x::sa_veclen<CV>::value;
127 sa3_param.DIMFMT = __SA_DIMFMT_3D;
128 *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
SA3_PARAM_OFFSET)) = sa3_param;
130 sa0_param.ICNT0 = elementCount;
131 sa0_param.DIM1 = elementCount;
132 sa0_param.ICNT1 = SEBlocks;
133 sa0_param.DECDIM1 = __SA_DECDIM_DIM1;
134 sa0_param.DECDIM1_WIDTH = numPointsOut - 1;
135 sa0_param.DIM2 = (numPointsOut);
136 sa0_param.ICNT2 = numChannels;
137 sa0_param.VECLEN = c7x::sa_veclen<CV>::value;
138 sa0_param.DIMFMT = __SA_DIMFMT_3D;
139 *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
SA0_PARAM_OFFSET)) = sa0_param;
143 sa1_param.ICNT0 = elementCount;
144 sa1_param.DIM1 = -elementCount;
145 sa1_param.ICNT1 = SEBlocks;
146 sa1_param.DIM2 = (numPoints);
147 sa1_param.ICNT2 = numChannels;
148 sa1_param.VECLEN = c7x::sa_veclen<CV>::value;
149 sa1_param.DIMFMT = __SA_DIMFMT_3D;
150 *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
SA1_PARAM_OFFSET)) = sa1_param;
172 uint32_t numChannels,
181 bufParamsX_FFT.
dim_x = numPoints * numChannels;
185 (
FFTLIB_F32 *) pXFFT, &bufParamsX_FFT, (numPoints >> 1), numChannels,
186 &((uint8_t *) pBlock)[7 * SE_PARAM_SIZE]);
188 FFTLIB_asm(
" MARK 6");
190 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
191 __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
194 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
196 __SA_TEMPLATE_v1 sa2Params = __gen_SA_TEMPLATE_v1();
197 __SA_TEMPLATE_v1 sa3Params = __gen_SA_TEMPLATE_v1();
205 uint32_t elementCount = c7x::element_count_of<CV>::value;
206 uint32_t SEBlocks = (numPoints >> 1) / elementCount;
207 uint32_t numPointsOut = (outmode == 1) ? numPoints : (numPoints >> 1) + 1;
212 __SE0_OPEN(pXFFT + 2, se0Params);
213 __SE1_OPEN(pXFFT + numPoints, se1Params);
214 __SA0_OPEN(sa0Params);
215 __SA2_OPEN(sa2Params);
216 __SA3_OPEN(sa3Params);
221 CV regStore0, regStore1;
226 #if defined(_HOST_BUILD)
227 c7x::ulong_vec xorVec = (c7x::ulong_vec)(0x0000000080000000);
230 c7x::ulong_vec xorVec = (0x0000000080000000);
240 __SA_TEMPLATE_v1 sa1Params = __gen_SA_TEMPLATE_v1();
242 __SA1_OPEN(sa1Params);
244 FFTLIB_F32 *restrict pYLocalReverse = pYLocal + (2 * numPoints - (elementCount << 1));
247 #pragma MUST_ITERATE(8, , 8)
249 for (ch = 0; ch < numChannels; ch++) {
250 for (i = 0; i < SEBlocks; i++) {
251 var0 = c7x::strm_eng<0, CV>::get_adv();
252 var1 = c7x::strm_eng<1, CV>::get_adv();
254 tmpSf = c7x::strm_agen<2, CV>::get_vpred();
255 addrSf = c7x::strm_agen<2, CV>::get_adv(pSfLocal + 2);
256 sFA = __vload_pred(tmpSf, addrSf);
258 tmpSf = c7x::strm_agen<3, CV>::get_vpred();
259 addrSf = c7x::strm_agen<3, CV>::get_adv(pSf + (numPoints * 2) - (elementCount * 2) - 2);
260 sFB = __reverse(__vload_pred(tmpSf, addrSf));
262 temp0 = (__complex_multiply(var0, c7x::as_cfloat_vec(sFA)));
263 temp1 = (__complex_conjugate_multiply(var1, c7x::as_cfloat_vec(sFB)));
265 regStore0 = temp0 + temp1;
266 tmp = c7x::strm_agen<0, CV>::get_vpred();
267 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal + 2);
269 __vstore_pred(tmp, addr, regStore0);
271 regStore1 = c7x::as_cfloat_vec((c7x::as_ulong_vec(regStore0)) ^ (xorVec));
273 tmp = c7x::strm_agen<1, CV>::get_vpred();
274 addr = c7x::strm_agen<1, CV>::get_adv(pYLocalReverse);
276 __vstore_pred(tmp, addr, (__reverse(regStore1)));
283 cfloat var2, temp2, temp3;
285 uint32_t c = numPoints * ch;
288 var2 = ((cfloat *) pXFFT)[(c >> 1) + 0];
290 temp2 = __complex_multiply(var2, (((cfloat *) pSf)[0]));
291 temp3 = __complex_conjugate_multiply(var2, (((cfloat *) pSf)[numPoints - 1]));
293 ((cfloat *) pY)[c + 0] = temp2 + temp3;
296 pY[(c << 1) + numPoints] = pXFFT[c + 0] - pXFFT[c + 1];
297 pY[(c << 1) + numPoints + 1] = 0;
308 #pragma MUST_ITERATE(8, , 8)
310 for (ch = 0; ch < numChannels; ch++) {
311 for (i = 0; i < SEBlocks; i++) {
312 var0 = c7x::strm_eng<0, CV>::get_adv();
313 var1 = c7x::strm_eng<1, CV>::get_adv();
315 tmpSf = c7x::strm_agen<2, CV>::get_vpred();
316 addrSf = c7x::strm_agen<2, CV>::get_adv(pSfLocal + 2);
317 sFA = __vload_pred(tmpSf, addrSf);
319 tmpSf = c7x::strm_agen<3, CV>::get_vpred();
320 addrSf = c7x::strm_agen<3, CV>::get_adv(pSf + (numPoints * 2) - (elementCount * 2) - 2);
321 sFB = __reverse(__vload_pred(tmpSf, addrSf));
323 temp0 = (__complex_multiply(var0, c7x::as_cfloat_vec(sFA)));
324 temp1 = (__complex_conjugate_multiply(var1, c7x::as_cfloat_vec(sFB)));
326 regStore0 = temp0 + temp1;
327 tmp = c7x::strm_agen<0, CV>::get_vpred();
328 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal + 2);
330 __vstore_pred(tmp, addr, regStore0);
337 cfloat var2, temp2, temp3;
339 uint32_t c = numPoints * ch;
340 uint32_t cout = numPointsOut * ch;
343 var2 = ((cfloat *) pXFFT)[(c >> 1) + 0];
345 temp2 = __complex_multiply(var2, (((cfloat *) pSf)[0]));
346 temp3 = __complex_conjugate_multiply(var2, (((cfloat *) pSf)[numPoints - 1]));
348 ((cfloat *) pY)[cout + 0] = temp2 + temp3;
354 pY[(cout << 1) + numPoints] = pXFFT[c + 0] - pXFFT[c + 1];
355 pY[(cout << 1) + numPoints + 1] = 0;
377 uint32_t numChannels,
383 if ((pX == NULL) || (pW == NULL) || (pY == NULL)) {
398 else if (((uint64_t) pX) & 0xFu) {
404 else if ((outmode != 0) && (outmode != 1)) {
410 else if (__C7X_VEC_SIZE_BITS__ == 512) {
419 if (bufParamsX->
dim_x & (1u << k)) {
424 if ((1u << k) != bufParamsX->
dim_x) {
FFTLIB_STATUS_NAME
The enumeration of all status codes.
@ FFTLIB_ERR_NOT_IMPLEMENTED
@ FFTLIB_ERR_INVALID_TYPE
@ FFTLIB_ERR_NULL_POINTER
@ FFTLIB_ERR_INVALID_DIMENSION
@ FFTLIB_ERR_INVALID_OUTMODE
@ FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES
float FFTLIB_F32
Single precision floating point.
FFTLIB_STATUS FFTLIB_fft1dBatched_i32f_c32fc_o32fc_kernel(FFTLIB_F32 *restrict pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *restrict pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *restrict pXFFT, FFTLIB_bufParams1D_t *bufParamsXFFT, FFTLIB_F32 *restrict pSf, FFTLIB_bufParams1D_t *bufParamsSf, FFTLIB_F32 *restrict pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, uint8_t outmode, void *pBlock)
This function is the main kernel compute function.
FFTLIB_STATUS FFTLIB_fft1dBatched_i32f_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pXFFT, FFTLIB_bufParams1D_t *bufParamsXFFT, FFTLIB_F32 *pSf, FFTLIB_bufParams1D_t *bufParamsSf, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, uint8_t outmode, void *pBlock)
This function should be called before the FFTLIB_fft1dBatched_i32f_c32fc_o32fc_kernel function is cal...
FFTLIB_STATUS FFTLIB_fft1dBatched_i32f_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, uint8_t outmode, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_fft1dBatched_i32f_c32fc_o32fc_in...
FFTLIB_STATUS FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function should be called before the FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_kernel function is ca...
FFTLIB_STATUS FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function is the main kernel compute function.
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.