FFTLIB User Guide
c71/FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_ci.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2 **+--------------------------------------------------------------------------+**
3 **| **** |**
4 **| **** |**
5 **| ******o*** |**
6 **| ********_///_**** |**
7 **| ***** /_//_/ **** |**
8 **| ** ** (__/ **** |**
9 **| ********* |**
10 **| **** |**
11 **| *** |**
12 **| |**
13 **| Copyright (c) 2017 Texas Instruments Incorporated |**
14 **| ALL RIGHTS RESERVED |**
15 **| |**
16 **| Permission to use, copy, modify, or distribute this software, |**
17 **| whether in part or in whole, for any purpose is forbidden without |**
18 **| a signed licensing agreement and NDA from Texas Instruments |**
19 **| Incorporated (TI). |**
20 **| |**
21 **| TI makes no representation or warranties with respect to the |**
22 **| performance of this computer program, and specifically disclaims |**
23 **| any responsibility for any damages, special or consequential, |**
24 **| connected with the use of this program. |**
25 **| |**
26 **+--------------------------------------------------------------------------+**
27 *******************************************************************************/
28 
29 #include "../FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc.h"
30 
31 #define TRACE_ON (0)
32 
33 #if TRACE_ON
34 #include "../../../common/printv.h"
35 #include <stdio.h>
36 #endif
37 
38 // CODE_SECTION(FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc, ".text:optimized")
39 // CODE_SECTION(FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_core, ".text:optimized")
40 // CODE_SECTION(FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_checkParams,
41 // ".text:optimized")
42 
43 #define SE_PARAM_BASE (0x0000)
44 #define SE_LOOP1_PARAM_OFFSET (SE_PARAM_BASE)
45 #define SE_LOOP2_PARAM_OFFSET (SE_LOOP1_PARAM_OFFSET + SE_PARAM_SIZE)
46 #define SE_LOOP3_PARAM_OFFSET (SE_LOOP2_PARAM_OFFSET + SE_PARAM_SIZE)
47 #define SE_LOOP4_PARAM_OFFSET (SE_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
48 #define SE_LOOP5_PARAM_OFFSET (SE_LOOP4_PARAM_OFFSET + SE_PARAM_SIZE)
49 #define SE_LOOP6_PARAM_OFFSET (SE_LOOP5_PARAM_OFFSET + SE_PARAM_SIZE)
50 #define SE_LOOP7_PARAM_OFFSET (SE_LOOP6_PARAM_OFFSET + SE_PARAM_SIZE)
51 #define SE_TWID_PARAM_OFFSET (SE_LOOP7_PARAM_OFFSET + SE_PARAM_SIZE)
52 #define SA_LOOP1_PARAM_OFFSET (SE_TWID_PARAM_OFFSET + SE_PARAM_SIZE)
53 #define SA_LOOP2_PARAM_OFFSET (SA_LOOP1_PARAM_OFFSET + SA_PARAM_SIZE)
54 #define SA_LOOP3_PARAM_OFFSET (SA_LOOP2_PARAM_OFFSET + SA_PARAM_SIZE)
55 #define SA_LOOP4_PARAM_OFFSET (SA_LOOP3_PARAM_OFFSET + SA_PARAM_SIZE)
56 #define SA_LOOP6_PARAM_OFFSET (SA_LOOP4_PARAM_OFFSET + SA_PARAM_SIZE)
57 #define SE_CONJ_LOOP_PARAM_OFFSET (SA_LOOP6_PARAM_OFFSET + SE_PARAM_SIZE)
58 #define SA_CONJ_LOOP_PARAM_OFFSET (SE_CONJ_LOOP_PARAM_OFFSET + SE_PARAM_SIZE)
59 
60 void ifft_i32fc_o32fc_conjugate_init_ci(void *pX, uint32_t size, void *pBlock)
61 {
62  __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
63  __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
64  /* cfloat *restrict pXLocal = (cfloat *) pX; */
65 
66  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
67  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
68  se0_param.DIMFMT = __SE_DIMFMT_1D;
69  se0_param.ICNT0 = size;
70 
71  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_CONJ_LOOP_PARAM_OFFSET)) = se0_param;
72 
73  sa0_param.ICNT0 = size;
74  sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
75  sa0_param.DIMFMT = __SA_DIMFMT_1D;
76 
77  *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_CONJ_LOOP_PARAM_OFFSET)) = sa0_param;
78 }
79 
82  FFTLIB_bufParams1D_t *bufParamsX,
83  FFTLIB_F32 *pW,
84  FFTLIB_bufParams1D_t *bufParamsW,
85  FFTLIB_F32 *pY,
86  FFTLIB_bufParams1D_t *bufParamsY,
87  uint32_t numPoints,
88  uint32_t numChannels,
89  void *pBlock)
90 {
92 
93 #if defined(FFTLIB_CHECK_PARAMS) || defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS)
94  /* status = FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_checkParams ( */
95  /* pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, numPoints,
96  * numChannels, */
97  /* pBlock); */
98  if (status == FFTLIB_SUCCESS)
99 #endif
100  {
101  uint32_t numPointsPerDft;
102  uint32_t seCnt1, seCnt2, seCnt3, seCnt4;
103  uint32_t seCnt6, seCnt7, seCnt8, seCnt9, seCnt10;
104  uint32_t seCnt11;
105  __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
106  __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1();
107  __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
108 
109  numPointsPerDft = numPoints;
110  seCnt1 = numPoints >> 2;
111  seCnt2 = numPoints >> 5;
112  seCnt3 = 1;
113  seCnt4 = numPoints >> 3;
114  seCnt6 = seCnt3 * numChannels;
115  seCnt7 = (numPoints * numChannels >> 5) > 1 ? numPoints * numChannels >> 5 : 1;
116  seCnt8 = numPoints * numChannels;
117  seCnt9 = (numPoints * numChannels > 32) ? numPoints * numChannels : 32;
118  seCnt10 = (numPoints * numChannels >> 6) > 1 ? numPoints * numChannels >> 6 : 1;
119  seCnt11 = (numPoints * numChannels > 64) ? numPoints * numChannels : 64;
120 
121  /* Init conjugate for IFFT */
122  ifft_i32fc_o32fc_conjugate_init_ci(pX, (numPointsPerDft * numChannels), pBlock);
123 
124  se0_param = __gen_SE_TEMPLATE_v1();
125  se0_param.ICNT0 = 8; /* 8-point vectors processed in one shot */
126  se0_param.ICNT1 = 4;
127  se0_param.DIM1 = seCnt1; /* 4 quarters(Offsets: 0, N/4, N/2, 3N/4) */
128  se0_param.ICNT2 = seCnt2; /* Number of 8-point fetches within each */
129  se0_param.DIM2 = 8; /* quarter */
130  se0_param.ICNT3 = seCnt6; /* Number of DFT's for all channels */
131  se0_param.DIM3 = numPointsPerDft;
132 
133  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
134  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
135  se0_param.DIMFMT = __SE_DIMFMT_4D;
136  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP1_PARAM_OFFSET)) = se0_param;
137 
138  se1_param = __gen_SE_TEMPLATE_v1();
139  se1_param.ICNT0 = 8; /* 8-point vectors processed in one shot */
140  se1_param.ICNT1 = 3;
141  se1_param.DIM1 = seCnt1; /* Twiddle factors for x1, x2 and x3 */
142  se1_param.ICNT2 = seCnt2; /* Number of 8-point fetches within each */
143  se1_param.DIM2 = 8; /* quarter */
144  se1_param.ICNT3 = seCnt6; /* Number of DFT's for all channels */
145  se1_param.DIM3 = 0;
146 
147  se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
148  se1_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
149  se1_param.DIMFMT = __SE_DIMFMT_4D;
150  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_TWID_PARAM_OFFSET)) = se1_param;
151 
152  sa0_param = __gen_SA_TEMPLATE_v1();
153  sa0_param.ICNT0 = 8;
154  sa0_param.ICNT1 = 4;
155  sa0_param.DIM1 = seCnt1; /* Save to each of the 4 quarters */
156  sa0_param.ICNT2 = seCnt2; /* Number of 8-point stores within each */
157  sa0_param.DIM2 = 8; /* quarter */
158  sa0_param.ICNT3 = seCnt6;
159  sa0_param.DIM3 = numPointsPerDft; /* Number of DFT's for all channels */
160 
161  sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
162  sa0_param.DIMFMT = __SA_DIMFMT_4D;
163  *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP1_PARAM_OFFSET)) = sa0_param;
164 
165  se0_param = __gen_SE_TEMPLATE_v1();
166  se0_param.ICNT0 = 8; /* Fetch first two quarters */
167  se0_param.ICNT1 = 2;
168  se0_param.DIM1 = 16; /* Process two 16-point DFTs in one shot */
169  se0_param.ICNT2 = seCnt7;
170  se0_param.DIM2 = 32;
171 
172  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
173  se0_param.TRANSPOSE = __SE_TRANSPOSE_256BIT; /* Using 256BIT transpose required */
174  /* 16-byte alignment on pX */
175  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
176  se0_param.DIMFMT = __SE_DIMFMT_3D;
177  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP2_PARAM_OFFSET)) = se0_param;
178 
179  sa0_param = __gen_SA_TEMPLATE_v1();
180  sa0_param.ICNT0 = seCnt8; /* Input buffer must be at least 32
181  * elements long even though
182  * numPoints*numChannels = 16 */
183 
184  sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
185  sa0_param.DIMFMT = __SA_DIMFMT_1D;
186  *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP2_PARAM_OFFSET)) = sa0_param;
187 
188  se0_param = __gen_SE_TEMPLATE_v1();
189  se0_param.ICNT0 = seCnt8;
190 
191  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
192  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
193  se0_param.DIMFMT = __SE_DIMFMT_1D;
194  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP3_PARAM_OFFSET)) = se0_param;
195 
196  sa0_param = __gen_SA_TEMPLATE_v1();
197  sa0_param.ICNT0 = seCnt8;
198 
199  sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
200  sa0_param.DIMFMT = __SA_DIMFMT_1D;
201  *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP3_PARAM_OFFSET)) = sa0_param;
202 
203  /* The following SE configuration may cause sub-optimal
204  * tile in SE because second row of tile starts in the
205  * middle of first row */
206  se0_param = __gen_SE_TEMPLATE_v1();
207  se0_param.ICNT0 = 4;
208  se0_param.ICNT1 = 8;
209  se0_param.DIM1 = 4;
210  se0_param.ICNT2 = seCnt7;
211  se0_param.DIM2 = 32;
212 
213  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
214  se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
215  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
216  se0_param.DIMFMT = __SE_DIMFMT_3D;
217  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP4_PARAM_OFFSET)) = se0_param;
218 
219  sa0_param = __gen_SA_TEMPLATE_v1();
220  sa0_param.ICNT0 = seCnt9; /* Input buffer must be at least 32
221  * elements long even though
222  * numPoints*numChannels = 16 */
223 
224  sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
225  sa0_param.DIMFMT = __SA_DIMFMT_1D;
226  *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP4_PARAM_OFFSET)) = sa0_param;
227 
228  se0_param = __gen_SE_TEMPLATE_v1();
229  se0_param.ICNT0 = seCnt4; /* Fetch consecutive four points for DFT */
230  se0_param.ICNT1 = 8;
231  se0_param.DIM1 =
232  /* Fetch 8 points separated by */ seCnt4; /* (numPoints >>
233  3). This fetch
234  pattern */
235  /* can be used for bit reversal */
236  se0_param.ICNT2 = numChannels;
237  se0_param.DIM2 = numPoints;
238 
239  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
240  se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
241  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
242  se0_param.DIMFMT = __SE_DIMFMT_3D;
243  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP5_PARAM_OFFSET)) = se0_param;
244 
245  se0_param = __gen_SE_TEMPLATE_v1();
246  se0_param.ICNT0 = 8;
247  se0_param.ICNT1 = 8;
248  se0_param.DIM1 = 8;
249  se0_param.ICNT2 = seCnt10;
250  se0_param.DIM2 = 64;
251 
252  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
253  se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
254  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
255  se0_param.DIMFMT = __SE_DIMFMT_3D;
256  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP6_PARAM_OFFSET)) = se0_param;
257 
258  sa0_param = __gen_SA_TEMPLATE_v1();
259  sa0_param.ICNT0 = seCnt11; /* Input buffer must be at least 64
260  * elements long even though
261  * numPoints*numChannels = 32 */
262 
263  sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
264  sa0_param.DIMFMT = __SA_DIMFMT_1D;
265  *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP6_PARAM_OFFSET)) = sa0_param;
266 
267  se0_param = __gen_SE_TEMPLATE_v1();
268  se0_param.ICNT0 = seCnt4;
269  se0_param.ICNT1 = 8;
270  se0_param.DIM1 =
271  /* Fetch 8 points separated by */ seCnt4; /* (numPoints >>
272  3). This fetch
273  pattern */
274  /* can be used for bit reversal */
275  se0_param.ICNT2 = numChannels;
276  se0_param.DIM2 = numPoints;
277 
278  se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
279  se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
280  se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
281  se0_param.DIMFMT = __SE_DIMFMT_3D;
282  *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP7_PARAM_OFFSET)) = se0_param;
283  }
284  return (status);
285 }
286 
287 /* Scale and Conjugate input buffer in-place */
288 static inline c7x::cfloat_vec
289 ifft_i32fc_o32fc_scaleAndConjugate(c7x::cfloat_vec in, c7x::float_vec scaleVec, c7x::ulong_vec xorVec)
290 {
291  return (c7x::as_cfloat_vec(scaleVec * c7x::as_float_vec(c7x::as_ulong_vec(in) ^ xorVec)));
292 }
293 
295  c7x::ulong_vec xorVec,
296  uint32_t size,
297  uint32_t numPoints,
298  void *pBlock)
299 {
300 
301  typedef typename c7x::cfloat_vec CV;
302  typedef CV *CVP;
303 
304  __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
305  __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
306  cfloat *restrict pXLocal = (cfloat *) pX;
307 
308  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_CONJ_LOOP_PARAM_OFFSET));
309 
310  sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_CONJ_LOOP_PARAM_OFFSET));
311 
312  __SE0_OPEN(pX, se0_param);
313  __SA0_OPEN(sa0_param);
314 
315  uint32_t i = 0;
316  uint32_t loopCount = (size) / c7x::element_count_of<c7x::cfloat_vec>::value;
317  c7x::cfloat_vec regIn, regStore;
318  float scale = 1.0f / numPoints;
319  c7x::float_vec scaleVec = __vload_dup(&scale);
320 
321  __vpred tmp;
322  CV *addr;
323 
324  for (i = 0; i < loopCount; i++) {
325  regIn = c7x::strm_eng<0, c7x::cfloat_vec>::get_adv();
326  /* FFTLIB_debugPrintFloatVector (c7x::as_float_vec (regIn)); */
327 
328  regStore = ifft_i32fc_o32fc_scaleAndConjugate(regIn, scaleVec, xorVec);
329  /* FFTLIB_debugPrintFloatVector (c7x::as_float_vec (regStore)); */
330 
331  tmp = c7x::strm_agen<0, CV>::get_vpred();
332  addr = c7x::strm_agen<0, CV>::get_adv(&pXLocal[0]);
333  __vstore_pred(tmp, addr, regStore);
334  }
335 
336  __SE0_CLOSE();
337  __SA0_CLOSE();
338 }
339 
342  FFTLIB_bufParams1D_t *bufParamsX,
343  FFTLIB_F32 *pW,
344  FFTLIB_bufParams1D_t *bufParamsW,
345  FFTLIB_F32 *pY,
346  FFTLIB_bufParams1D_t *bufParamsY,
347  uint32_t numPoints,
348  uint32_t numChannels,
349  void *pBlock)
350 {
351  uint32_t k, l;
352  FFTLIB_STATUS status = FFTLIB_SUCCESS;
353  uint32_t numPointsPerDft;
354  uint32_t numLeadingZeros;
355  uint32_t offsetBitReverse;
356  uint32_t seCnt1, seCnt2, seCnt3, seCnt6;
357 
358  __SE_TEMPLATE_v1 se0_param;
359  __SE_TEMPLATE_v1 se1_param;
360  __SA_TEMPLATE_v1 sa0_param;
361 
362  cfloat *restrict pXLocal;
363  cfloat *restrict pYLocal;
364  cfloat *restrict pWLocal;
365  cfloat *restrict pY0;
366  cfloat *restrict pY1;
367  cfloat *restrict pY2;
368  cfloat *restrict pY3;
369  cfloat *restrict pY4;
370  cfloat *restrict pY5;
371  cfloat *restrict pY6;
372  cfloat *restrict pY7;
373 
374  typedef typename c7x::cfloat_vec CV;
375  typedef CV *CVP;
376 
377  /* typedef typename c7x::float_vec V; */
378  /* typedef V* VP; */
379 
380  CV vX_0, vX_N_4, vX_N_2, vX_3N_4;
381  CV vSum1, vSum2, vDiff1, vDiff2;
382  CV vTwX1, vTwX2, vTwX3;
383  CV vX0Temp, vX1Temp, vX2Temp, vX3Temp;
384  CV vX0, vX1, vX2, vX3;
385  CV vX_0_1, vX_N_4_1, vX_N_2_1, vX_3N_4_1;
386  CV vSum1_1, vSum2_1, vDiff1_1, vDiff2_1;
387  CV vX0_1, vX1_1, vX2_1, vX3_1;
388  CV vX0_2PtDft_1, vX0_2PtDft_2;
389  CV vX1_2PtDft_1, vX1_2PtDft_2;
390  CV vX2_2PtDft_1, vX2_2PtDft_2;
391  CV vX3_2PtDft_1, vX3_2PtDft_2;
392  CV vX01_lo, vX23_lo, vX01_hi, vX23_hi;
393  cfloat twTemp;
394 
395 #ifdef FFTLIB_CHECK_PARAMS
396  /* status = FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_checkParams ( */
397  /* pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, numPoints,
398  * numChannels, */
399  /* pBlock); */
400  if (status == FFTLIB_SUCCESS)
401 #endif
402  {
403  numPointsPerDft = numPoints;
404 
405  float scale = 1.0;
406  c7x::float_vec scaleVec = __vload_dup(&scale);
407 
408  /* Set xor vector to flip sign bit of imaginary component of cfloat pair */
409 #if defined(_HOST_BUILD)
410  c7x::ulong_vec xorVec = (c7x::ulong_vec)(0x0000000080000000);
411 
412 #else
413  c7x::ulong_vec xorVec = (0x0000000080000000);
414 #endif
415 
416  /* Scale by 1/N and conjugate for batched IFFT, then follow batched FFT implementation */
417  ifft_i32fc_o32fc_conjugate_exec_ci((void *) pX, xorVec, (numPointsPerDft * numChannels), numPointsPerDft, pBlock);
418 
419  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP1_PARAM_OFFSET));
420  se1_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_TWID_PARAM_OFFSET));
421  sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP1_PARAM_OFFSET));
422  seCnt1 = numPointsPerDft >> 2;
423  seCnt2 = numPointsPerDft >> 5;
424  seCnt3 = 1;
425 
426  pXLocal = (cfloat *) pX;
427  pWLocal = (cfloat *) pW;
428  pYLocal = (cfloat *) pY;
429 
430  while (numPointsPerDft >= 64) {
431 
432  seCnt6 = seCnt3 * numChannels;
433  se0_param.ICNT1 = 4;
434  se0_param.DIM1 = seCnt1; /* 4 quarters(Offsets: 0, N/4, N/2, 3N/4) */
435  se0_param.ICNT2 = seCnt2;
436  se0_param.DIM2 = 8; /* Number of 8-point fetches within each quarter */
437  se0_param.ICNT3 = seCnt6;
438  se0_param.DIM3 = numPointsPerDft; /* Number of DFT's for all channels */
439  __SE0_OPEN((void *) pXLocal, se0_param);
440 
441  se1_param.ICNT1 = 3;
442  se1_param.DIM1 = seCnt1; /* Twiddle factors for x1, x2 and x3 */
443  se1_param.ICNT2 = seCnt2; /* Number of 8-point fetches within each quarter*/
444  se1_param.DIM2 = 8;
445  se1_param.ICNT3 = seCnt6; /* Number of DFT's for all channels */
446  se1_param.DIM3 = 0;
447  __SE1_OPEN((void *) pWLocal, se1_param);
448 
449  sa0_param.ICNT1 = 4;
450  sa0_param.DIM1 = seCnt1; /* Save to each of the 4 quarters */
451  sa0_param.ICNT2 = seCnt2;
452  sa0_param.DIM2 = 8;
453  /* Number of 8-point stores within each quarter */
454  sa0_param.ICNT3 = seCnt6;
455  sa0_param.DIM3 = numPointsPerDft; /* Number of DFT's */
456  __SA0_OPEN(sa0_param);
457 
458  /* Loop is unrolled twice for better optimization */
459  for (k = 0; k < numPoints * numChannels; k += 64) {
460 
461  /* First iteration of loop unroll */
462  vX_0 = c7x::strm_eng<0, CV>::get_adv();
463  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
464  vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
465  vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
466 
467  vSum1 = vX_0 + vX_N_2;
468  vSum2 = vX_N_4 + vX_3N_4;
469  vDiff1 = vX_0 - vX_N_2;
470  vDiff2 = vX_N_4 - vX_3N_4;
471 
472  vTwX1 = c7x::strm_eng<1, CV>::get_adv();
473  vTwX2 = c7x::strm_eng<1, CV>::get_adv();
474  vTwX3 = c7x::strm_eng<1, CV>::get_adv();
475 
476  vX0Temp = vSum1 + vSum2;
477  vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
478  vX2Temp = vSum1 - vSum2;
479  vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
480 
481  vX0 = vX0Temp;
482  vX1 = __complex_multiply(vX1Temp, vTwX1);
483  vX2 = __complex_multiply(vX2Temp, vTwX2);
484  vX3 = __complex_multiply(vX3Temp, vTwX3);
485 
486  __vpred tmp;
487  CVP addr;
488 
489  tmp = c7x::strm_agen<0, CV>::get_vpred();
490  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
491  __vstore_pred(tmp, addr, vX0);
492 
493  tmp = c7x::strm_agen<0, CV>::get_vpred();
494  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
495  __vstore_pred(tmp, addr, vX2);
496 
497  tmp = c7x::strm_agen<0, CV>::get_vpred();
498  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
499  __vstore_pred(tmp, addr, vX1);
500 
501  tmp = c7x::strm_agen<0, CV>::get_vpred();
502  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
503  __vstore_pred(tmp, addr, vX3);
504 
505  /* Second iteration of loop unroll */
506  vX_0 = c7x::strm_eng<0, CV>::get_adv();
507  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
508  vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
509  vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
510 
511  vSum1 = vX_0 + vX_N_2;
512  vSum2 = vX_N_4 + vX_3N_4;
513  vDiff1 = vX_0 - vX_N_2;
514  vDiff2 = vX_N_4 - vX_3N_4;
515 
516  vTwX1 = c7x::strm_eng<1, CV>::get_adv();
517  vTwX2 = c7x::strm_eng<1, CV>::get_adv();
518  vTwX3 = c7x::strm_eng<1, CV>::get_adv();
519 
520  vX0Temp = vSum1 + vSum2;
521  vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
522  vX2Temp = vSum1 - vSum2;
523  vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
524 
525  vX0 = vX0Temp;
526  vX1 = __complex_multiply(vX1Temp, vTwX1);
527  vX2 = __complex_multiply(vX2Temp, vTwX2);
528  vX3 = __complex_multiply(vX3Temp, vTwX3);
529 
530  tmp = c7x::strm_agen<0, CV>::get_vpred();
531  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
532  __vstore_pred(tmp, addr, vX0);
533 
534  tmp = c7x::strm_agen<0, CV>::get_vpred();
535  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
536  __vstore_pred(tmp, addr, vX2);
537 
538  tmp = c7x::strm_agen<0, CV>::get_vpred();
539  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
540  __vstore_pred(tmp, addr, vX1);
541 
542  tmp = c7x::strm_agen<0, CV>::get_vpred();
543  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
544  __vstore_pred(tmp, addr, vX3);
545  }
546  __SA0_CLOSE();
547  __SE0_CLOSE();
548  __SE1_CLOSE();
549 
550  numPointsPerDft >>= 2;
551  pWLocal += numPointsPerDft * 3;
552  seCnt1 >>= 2;
553  seCnt2 >>= 2;
554  seCnt3 <<= 2;
555  }
556 
557  if (numPointsPerDft == 16) {
558  /* 16-point stage */
559  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP2_PARAM_OFFSET));
560  __SE0_OPEN((void *) pXLocal, se0_param);
561  __SE1_OPEN((void *) (pXLocal + 8), se0_param);
562 
563  sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP2_PARAM_OFFSET));
564  __SA0_OPEN(sa0_param);
565 
566  vTwX1 = *((CVP) pWLocal);
567  vTwX2 = *((CVP) (pWLocal + 4));
568  vTwX3 = *((CVP) (pWLocal + 8));
569 
570 #if __C7X_HOSTEM__
571  vTwX1 = CV(vTwX1.lo(), vTwX1.lo());
572  vTwX2 = CV(vTwX2.lo(), vTwX2.lo());
573  vTwX3 = CV(vTwX3.lo(), vTwX3.lo());
574 #else
575  vTwX1 = (CV) (vTwX1.lo(), vTwX1.lo());
576  vTwX2 = (CV) (vTwX2.lo(), vTwX2.lo());
577  vTwX3 = (CV) (vTwX3.lo(), vTwX3.lo());
578 #endif
579 
580  for (k = 0; k < numPoints * numChannels; k += 32) {
581  vX_0 = c7x::strm_eng<0, CV>::get_adv();
582  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
583  vX_N_2 = c7x::strm_eng<1, CV>::get_adv();
584  vX_3N_4 = c7x::strm_eng<1, CV>::get_adv();
585 
586  vSum1 = vX_0 + vX_N_2;
587  vSum2 = vX_N_4 + vX_3N_4;
588  vDiff1 = vX_0 - vX_N_2;
589  vDiff2 = vX_N_4 - vX_3N_4;
590 
591  vX0Temp = vSum1 + vSum2;
592  vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
593  vX2Temp = vSum1 - vSum2;
594  vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
595 
596  vX0 = vX0Temp;
597  vX1 = __complex_multiply(vX1Temp, vTwX1);
598  vX2 = __complex_multiply(vX2Temp, vTwX2);
599  vX3 = __complex_multiply(vX3Temp, vTwX3);
600 
601 #if __C7X_HOSTEM__
602  __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
603  CVP addr;
604  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
605  __vstore_pred(tmp, addr, CV(vX0.lo(), vX2.lo()));
606 
607  tmp = c7x::strm_agen<0, CV>::get_vpred();
608  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
609  __vstore_pred(tmp, addr, CV(vX1.lo(), vX3.lo()));
610 
611  tmp = c7x::strm_agen<0, CV>::get_vpred();
612  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
613  __vstore_pred(tmp, addr, CV(vX0.hi(), vX2.hi()));
614 
615  tmp = c7x::strm_agen<0, CV>::get_vpred();
616  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
617  __vstore_pred(tmp, addr, CV(vX1.hi(), vX3.hi()));
618 #else
619  __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
620  CVP addr;
621  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
622  __vstore_pred(tmp, addr, (CV) (vX0.lo(), vX2.lo()));
623 
624  tmp = c7x::strm_agen<0, CV>::get_vpred();
625  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
626  __vstore_pred(tmp, addr, (CV) (vX1.lo(), vX3.lo()));
627 
628  tmp = c7x::strm_agen<0, CV>::get_vpred();
629  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
630  __vstore_pred(tmp, addr, (CV) (vX0.hi(), vX2.hi()));
631 
632  tmp = c7x::strm_agen<0, CV>::get_vpred();
633  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
634  __vstore_pred(tmp, addr, (CV) (vX1.hi(), vX3.hi()));
635 #endif
636  }
637  __SA0_CLOSE();
638  __SE0_CLOSE();
639  __SE1_CLOSE();
640  }
641  else {
642  /* 32-point stage */
643  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP3_PARAM_OFFSET));
644  __SE0_OPEN((void *) pXLocal, se0_param);
645 
646  sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP3_PARAM_OFFSET));
647  __SA0_OPEN(sa0_param);
648 
649  vTwX1 = *((CVP) pWLocal);
650  vTwX2 = *((CVP) (pWLocal + 8));
651  vTwX3 = *((CVP) (pWLocal + 16));
652 
653  for (k = 0; k < numPoints * numChannels; k += 32) {
654  vX_0 = c7x::strm_eng<0, CV>::get_adv();
655  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
656  vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
657  vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
658 
659  vSum1 = vX_0 + vX_N_2;
660  vSum2 = vX_N_4 + vX_3N_4;
661  vDiff1 = vX_0 - vX_N_2;
662  vDiff2 = vX_N_4 - vX_3N_4;
663 
664  vX0Temp = vSum1 + vSum2;
665  vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
666  vX2Temp = vSum1 - vSum2;
667  vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
668 
669  vX0 = vX0Temp;
670  vX1 = __complex_multiply(vX1Temp, vTwX1);
671  vX2 = __complex_multiply(vX2Temp, vTwX2);
672  vX3 = __complex_multiply(vX3Temp, vTwX3);
673 
674  __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
675  CVP addr;
676  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
677  __vstore_pred(tmp, addr, vX0);
678 
679  tmp = c7x::strm_agen<0, CV>::get_vpred();
680  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
681  __vstore_pred(tmp, addr, vX2);
682 
683  tmp = c7x::strm_agen<0, CV>::get_vpred();
684  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
685  __vstore_pred(tmp, addr, vX1);
686 
687  tmp = c7x::strm_agen<0, CV>::get_vpred();
688  addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
689  __vstore_pred(tmp, addr, vX3);
690  }
691  __SE0_CLOSE();
692  __SA0_CLOSE();
693  }
694 
695  numPointsPerDft >>= 2;
696  pWLocal += numPointsPerDft * 3;
697 
698  if (numPointsPerDft == 4) {
699  /* 4-point stage with bit-reversal */
700 
701  if (numPoints == 16) {
702 // clang-format off
703 #if __C7X_HOSTEM__
704  c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
705  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
706  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
707  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
708  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
709  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
710  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
711  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
712 #else
713  c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
714  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
715  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
716  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
717  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
718  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
719  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
720  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
721 #endif
722  // clang-format on
723 
724  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP4_PARAM_OFFSET));
725  __SE0_OPEN((void *) pXLocal, se0_param);
726 
727  sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP4_PARAM_OFFSET));
728  __SA0_OPEN(sa0_param);
729 
730  for (k = 0; k < numChannels << 4; k += 32) {
731  vX_0 = c7x::strm_eng<0, CV>::get_adv();
732  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
733  vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
734  vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
735 
736  vSum1 = vX_0 + vX_N_2;
737  vSum2 = vX_N_4 + vX_3N_4;
738  vDiff1 = vX_0 - vX_N_2;
739  vDiff2 = vX_N_4 - vX_3N_4;
740 
741  vX0 = vSum1 + vSum2;
742  vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
743  vX2 = vSum1 - vSum2;
744  vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
745 
746  /* permute + store = vstore reverse bit */
747  vX0 = ifft_i32fc_o32fc_scaleAndConjugate(vX0, scaleVec, xorVec);
748  vX1 = ifft_i32fc_o32fc_scaleAndConjugate(vX1, scaleVec, xorVec);
749  vX2 = ifft_i32fc_o32fc_scaleAndConjugate(vX2, scaleVec, xorVec);
750  vX3 = ifft_i32fc_o32fc_scaleAndConjugate(vX3, scaleVec, xorVec);
751 
752  vX01_lo = c7x::as_cfloat_vec(__vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1), c7x::as_uchar_vec(vX0)));
753  vX23_lo = c7x::as_cfloat_vec(__vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3), c7x::as_uchar_vec(vX2)));
754  vX01_hi = c7x::as_cfloat_vec(__vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1), c7x::as_uchar_vec(vX0)));
755  vX23_hi = c7x::as_cfloat_vec(__vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3), c7x::as_uchar_vec(vX2)));
756 
757  __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
758  CVP addr;
759  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
760  __vstore_pred(tmp, addr, vX01_lo);
761 
762  tmp = c7x::strm_agen<0, CV>::get_vpred();
763  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
764  __vstore_pred(tmp, addr, vX23_lo);
765 
766  tmp = c7x::strm_agen<0, CV>::get_vpred();
767  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
768  __vstore_pred(tmp, addr, vX01_hi);
769 
770  tmp = c7x::strm_agen<0, CV>::get_vpred();
771  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
772  __vstore_pred(tmp, addr, vX23_hi);
773  }
774  __SE0_CLOSE();
775  __SA0_CLOSE();
776  }
777  else {
778  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP5_PARAM_OFFSET));
779  __SE0_OPEN((void *) pXLocal, se0_param);
780 
781  numLeadingZeros = __norm((int32_t) (numPoints - 1)) + 1;
782 
783  /* pY0 = &pYLocal[0x00000000u]; */
784  /* pY1 = &pYLocal[0x40000000u >> numLeadingZeros]; */
785  /* pY2 = &pYLocal[0x80000000u >> numLeadingZeros]; */
786  /* pY3 = &pYLocal[0xC0000000u >> numLeadingZeros]; */
787 
788  pY0 = (cfloat *) (pY + 0);
789  pY1 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
790  pY2 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
791  pY3 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
792 
793 #ifdef CL7X_HE_CFLOAT_PTR_BUG
794  float *myPY0 = (float *) pY0;
795  float *myPY1 = (float *) pY1;
796  float *myPY2 = (float *) pY2;
797  float *myPY3 = (float *) pY3;
798 #endif
799 
800  for (l = 0; l < numChannels; l++) {
801  for (k = 0; k < numPoints >> 3; k += 4) {
802  offsetBitReverse = __bit_reverse(k) >> numLeadingZeros;
803 
804  vX_0 = c7x::strm_eng<0, CV>::get_adv();
805  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
806  vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
807  vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
808 
809  vSum1 = vX_0 + vX_N_2;
810  vSum2 = vX_N_4 + vX_3N_4;
811  vDiff1 = vX_0 - vX_N_2;
812  vDiff2 = vX_N_4 - vX_3N_4;
813 
814  vX0 = vSum1 + vSum2;
815  vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
816  vX2 = vSum1 - vSum2;
817  vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
818 
819  /* __vstore_reverse_bit ((CVP) &pY0[offsetBitReverse],
820  */
821  /* vX0); */
822  /* __vstore_reverse_bit ((CVP) &pY1[offsetBitReverse],
823  */
824  /* vX1); */
825  /* __vstore_reverse_bit ((CVP) &pY2[offsetBitReverse],
826  */
827  /* vX2); */
828  /* __vstore_reverse_bit ((CVP) &pY3[offsetBitReverse],
829  */
830  /* vX3); */
831 
832  /* permute + store = vstore reverse bit */
833  vX0 = ifft_i32fc_o32fc_scaleAndConjugate(vX0, scaleVec, xorVec);
834  vX1 = ifft_i32fc_o32fc_scaleAndConjugate(vX1, scaleVec, xorVec);
835  vX2 = ifft_i32fc_o32fc_scaleAndConjugate(vX2, scaleVec, xorVec);
836  vX3 = ifft_i32fc_o32fc_scaleAndConjugate(vX3, scaleVec, xorVec);
837 
838  __vstore_reverse_bit((CVP) (pY0 + offsetBitReverse), vX0);
839  __vstore_reverse_bit((CVP) (pY1 + offsetBitReverse), vX1);
840  __vstore_reverse_bit((CVP) (pY2 + offsetBitReverse), vX2);
841  __vstore_reverse_bit((CVP) (pY3 + offsetBitReverse), vX3);
842  }
843 
844 #ifdef CL7X_HE_CFLOAT_PTR_BUG
845  myPY0 += (numPoints << 1);
846  myPY1 += (numPoints << 1);
847  myPY2 += (numPoints << 1);
848  myPY3 += (numPoints << 1);
849 
850  pY0 = (cfloat *) myPY0;
851  pY1 = (cfloat *) myPY1;
852  pY2 = (cfloat *) myPY2;
853  pY3 = (cfloat *) myPY3;
854 
855 #else
856  pY0 += numPoints;
857  pY1 += numPoints;
858  pY2 += numPoints;
859  pY3 += numPoints;
860 #endif
861  }
862  __SE0_CLOSE();
863  }
864  }
865  else {
866  /* 4-point stage followed by 2-point stage with bit-reversal */
867 
868 #if __C7X_HOSTEM__
869  pWLocal += 1;
870  twTemp = *pWLocal;
871  vTwX1 = CV(twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
872  pWLocal += 2;
873  twTemp = *pWLocal;
874  vTwX2 = CV(twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
875  pWLocal += 2;
876  twTemp = *pWLocal;
877  vTwX3 = CV(twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
878 #else
879  pWLocal += 1;
880  twTemp = *pWLocal;
881  vTwX1 = (CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
882  pWLocal += 2;
883  twTemp = *pWLocal;
884  vTwX2 = (CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
885  pWLocal += 2;
886  twTemp = *pWLocal;
887  vTwX3 = (CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
888 #endif
889 
890  if (numPoints == 32) {
891  // clang-format off
892  #if __C7X_HOSTEM__
893  c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
894  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
895  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
896  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
897  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
898  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
899  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
900  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
901  #else
902  c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
903  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
904  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
905  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
906  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
907  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
908  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
909  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
910  #endif
911  // clang-format on
912  CV vX01_2PtDft_1_lo, vX23_2PtDft_1_lo, vX01_2PtDft_2_lo, vX23_2PtDft_2_lo;
913  CV vX01_2PtDft_1_hi, vX23_2PtDft_1_hi, vX01_2PtDft_2_hi, vX23_2PtDft_2_hi;
914 
915  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP6_PARAM_OFFSET));
916  __SE0_OPEN((void *) pXLocal, se0_param);
917 
918  sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SA_LOOP6_PARAM_OFFSET));
919  __SA0_OPEN(sa0_param);
920 
921  for (k = 0; k < numChannels << 5; k += 64) {
922  vX_0 = c7x::strm_eng<0, CV>::get_adv();
923  vX_0_1 = c7x::strm_eng<0, CV>::get_adv();
924  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
925  vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv();
926  vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
927  vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv();
928  vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
929  vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv();
930 
931  vSum1 = vX_0 + vX_N_2;
932  vSum2 = vX_N_4 + vX_3N_4;
933  vDiff1 = vX_0 - vX_N_2;
934  vDiff2 = vX_N_4 - vX_3N_4;
935 
936  vX0 = vSum1 + vSum2;
937  vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
938  vX2 = vSum1 - vSum2;
939  vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
940 
941  vSum1_1 = vX_0_1 + vX_N_2_1;
942  vSum2_1 = vX_N_4_1 + vX_3N_4_1;
943  vDiff1_1 = vX_0_1 - vX_N_2_1;
944  vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
945 
946  vX0Temp = vSum1_1 + vSum2_1;
947  vX1Temp = vDiff1_1 - __vcrot90sp_vv(vDiff2_1);
948  vX2Temp = vSum1_1 - vSum2_1;
949  vX3Temp = vDiff1_1 + __vcrot90sp_vv(vDiff2_1);
950 
951  vX0_1 = vX0Temp;
952  vX1_1 = __complex_multiply(vX1Temp, vTwX1);
953  vX2_1 = __complex_multiply(vX2Temp, vTwX2);
954  vX3_1 = __complex_multiply(vX3Temp, vTwX3);
955 
956  vX0_2PtDft_1 = vX0 + vX0_1;
957  vX0_2PtDft_2 = vX0 - vX0_1;
958  vX1_2PtDft_1 = vX1 + vX1_1;
959  vX1_2PtDft_2 = vX1 - vX1_1;
960  vX2_2PtDft_1 = vX2 + vX2_1;
961  vX2_2PtDft_2 = vX2 - vX2_1;
962  vX3_2PtDft_1 = vX3 + vX3_1;
963  vX3_2PtDft_2 = vX3 - vX3_1;
964 
965  vX0_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX0_2PtDft_1, scaleVec, xorVec);
966  vX0_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX0_2PtDft_2, scaleVec, xorVec);
967  vX1_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX1_2PtDft_1, scaleVec, xorVec);
968  vX1_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX1_2PtDft_2, scaleVec, xorVec);
969  vX2_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX2_2PtDft_1, scaleVec, xorVec);
970  vX2_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX2_2PtDft_2, scaleVec, xorVec);
971  vX3_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX3_2PtDft_1, scaleVec, xorVec);
972  vX3_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX3_2PtDft_2, scaleVec, xorVec);
973 
974  /* Permute to obtain bit-reversal order */
975  vX01_2PtDft_1_lo = c7x::as_cfloat_vec(
976  __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_1), c7x::as_uchar_vec(vX0_2PtDft_1)));
977  vX23_2PtDft_1_lo = c7x::as_cfloat_vec(
978  __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_1), c7x::as_uchar_vec(vX2_2PtDft_1)));
979  vX01_2PtDft_2_lo = c7x::as_cfloat_vec(
980  __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_2), c7x::as_uchar_vec(vX0_2PtDft_2)));
981  vX23_2PtDft_2_lo = c7x::as_cfloat_vec(
982  __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_2), c7x::as_uchar_vec(vX2_2PtDft_2)));
983  vX01_2PtDft_1_hi = c7x::as_cfloat_vec(
984  __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_1), c7x::as_uchar_vec(vX0_2PtDft_1)));
985  vX23_2PtDft_1_hi = c7x::as_cfloat_vec(
986  __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_1), c7x::as_uchar_vec(vX2_2PtDft_1)));
987  vX01_2PtDft_2_hi = c7x::as_cfloat_vec(
988  __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_2), c7x::as_uchar_vec(vX0_2PtDft_2)));
989  vX23_2PtDft_2_hi = c7x::as_cfloat_vec(
990  __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_2), c7x::as_uchar_vec(vX2_2PtDft_2)));
991 
992  __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
993  CVP addr;
994  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
995  __vstore_pred(tmp, addr, vX01_2PtDft_1_lo);
996 
997  tmp = c7x::strm_agen<0, CV>::get_vpred();
998  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
999  __vstore_pred(tmp, addr, vX23_2PtDft_1_lo);
1000 
1001  tmp = c7x::strm_agen<0, CV>::get_vpred();
1002  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1003  __vstore_pred(tmp, addr, vX01_2PtDft_2_lo);
1004 
1005  tmp = c7x::strm_agen<0, CV>::get_vpred();
1006  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1007  __vstore_pred(tmp, addr, vX23_2PtDft_2_lo);
1008 
1009  tmp = c7x::strm_agen<0, CV>::get_vpred();
1010  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1011  __vstore_pred(tmp, addr, vX01_2PtDft_1_hi);
1012 
1013  tmp = c7x::strm_agen<0, CV>::get_vpred();
1014  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1015  __vstore_pred(tmp, addr, vX23_2PtDft_1_hi);
1016 
1017  tmp = c7x::strm_agen<0, CV>::get_vpred();
1018  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1019  __vstore_pred(tmp, addr, vX01_2PtDft_2_hi);
1020 
1021  tmp = c7x::strm_agen<0, CV>::get_vpred();
1022  addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1023  __vstore_pred(tmp, addr, vX23_2PtDft_2_hi);
1024  }
1025  __SE0_CLOSE();
1026  __SA0_CLOSE();
1027  }
1028  else {
1029  se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_LOOP7_PARAM_OFFSET));
1030  __SE0_OPEN((void *) pXLocal, se0_param);
1031 
1032  numLeadingZeros = __norm((int32_t) (numPoints - 1)) + 1;
1033 
1034  /* pY0 = &pYLocal[0x00000000u]; */
1035  /* pY1 = &pYLocal[0x80000000u >> numLeadingZeros]; */
1036  /* pY2 = &pYLocal[0x20000000u >> numLeadingZeros]; */
1037  /* pY3 = &pYLocal[0xA0000000u >> numLeadingZeros]; */
1038  /* pY4 = &pYLocal[0x40000000u >> numLeadingZeros]; */
1039  /* pY5 = &pYLocal[0xC0000000u >> numLeadingZeros]; */
1040  /* pY6 = &pYLocal[0x60000000u >> numLeadingZeros]; */
1041  /* pY7 = &pYLocal[0xE0000000u >> numLeadingZeros]; */
1042 
1043  pY0 = (cfloat *) (pY + (0x00000000u));
1044  pY1 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
1045  pY2 = (cfloat *) (pY + ((0x20000000u >> numLeadingZeros) << 1));
1046  pY3 = (cfloat *) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
1047  pY4 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
1048  pY5 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
1049  pY6 = (cfloat *) (pY + ((0x60000000u >> numLeadingZeros) << 1));
1050  pY7 = (cfloat *) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
1051 
1052 #ifdef CL7X_HE_CFLOAT_PTR_BUG
1053  float *myPY0 = (float *) pY0;
1054  float *myPY1 = (float *) pY1;
1055  float *myPY2 = (float *) pY2;
1056  float *myPY3 = (float *) pY3;
1057  float *myPY4 = (float *) pY4;
1058  float *myPY5 = (float *) pY5;
1059  float *myPY6 = (float *) pY6;
1060  float *myPY7 = (float *) pY7;
1061 #endif
1062 
1063  for (l = 0; l < numChannels; l++) {
1064  for (k = 0; k < numPoints >> 3; k += 8) {
1065  offsetBitReverse = __bit_reverse(k) >> numLeadingZeros;
1066 
1067  vX_0 = c7x::strm_eng<0, CV>::get_adv();
1068  vX_0_1 = c7x::strm_eng<0, CV>::get_adv();
1069  vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
1070  vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv();
1071  vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
1072  vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv();
1073  vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
1074  vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv();
1075 
1076  vSum1 = vX_0 + vX_N_2;
1077  vSum2 = vX_N_4 + vX_3N_4;
1078  vDiff1 = vX_0 - vX_N_2;
1079  vDiff2 = vX_N_4 - vX_3N_4;
1080 
1081  vX0 = vSum1 + vSum2;
1082  vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
1083  vX2 = vSum1 - vSum2;
1084  vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
1085 
1086  vSum1_1 = vX_0_1 + vX_N_2_1;
1087  vSum2_1 = vX_N_4_1 + vX_3N_4_1;
1088  vDiff1_1 = vX_0_1 - vX_N_2_1;
1089  vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
1090 
1091  vX0Temp = vSum1_1 + vSum2_1;
1092  vX1Temp = vDiff1_1 - __vcrot90sp_vv(vDiff2_1);
1093  vX2Temp = vSum1_1 - vSum2_1;
1094  vX3Temp = vDiff1_1 + __vcrot90sp_vv(vDiff2_1);
1095 
1096  vX0_1 = vX0Temp;
1097  vX1_1 = __complex_multiply(vX1Temp, vTwX1);
1098  vX2_1 = __complex_multiply(vX2Temp, vTwX2);
1099  vX3_1 = __complex_multiply(vX3Temp, vTwX3);
1100 
1101  vX0_2PtDft_1 = vX0 + vX0_1;
1102  vX0_2PtDft_2 = vX0 - vX0_1;
1103  vX1_2PtDft_1 = vX1 + vX1_1;
1104  vX1_2PtDft_2 = vX1 - vX1_1;
1105  vX2_2PtDft_1 = vX2 + vX2_1;
1106  vX2_2PtDft_2 = vX2 - vX2_1;
1107  vX3_2PtDft_1 = vX3 + vX3_1;
1108  vX3_2PtDft_2 = vX3 - vX3_1;
1109 
1110  vX0_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX0_2PtDft_1, scaleVec, xorVec);
1111  vX0_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX0_2PtDft_2, scaleVec, xorVec);
1112  vX1_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX1_2PtDft_1, scaleVec, xorVec);
1113  vX1_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX1_2PtDft_2, scaleVec, xorVec);
1114  vX2_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX2_2PtDft_1, scaleVec, xorVec);
1115  vX2_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX2_2PtDft_2, scaleVec, xorVec);
1116  vX3_2PtDft_1 = ifft_i32fc_o32fc_scaleAndConjugate(vX3_2PtDft_1, scaleVec, xorVec);
1117  vX3_2PtDft_2 = ifft_i32fc_o32fc_scaleAndConjugate(vX3_2PtDft_2, scaleVec, xorVec);
1118 
1119  __vstore_reverse_bit((CVP) (pY0 + offsetBitReverse), vX0_2PtDft_1);
1120  __vstore_reverse_bit((CVP) (pY1 + offsetBitReverse), vX0_2PtDft_2);
1121  __vstore_reverse_bit((CVP) (pY2 + offsetBitReverse), vX1_2PtDft_1);
1122  __vstore_reverse_bit((CVP) (pY3 + offsetBitReverse), vX1_2PtDft_2);
1123  __vstore_reverse_bit((CVP) (pY4 + offsetBitReverse), vX2_2PtDft_1);
1124  __vstore_reverse_bit((CVP) (pY5 + offsetBitReverse), vX2_2PtDft_2);
1125  __vstore_reverse_bit((CVP) (pY6 + offsetBitReverse), vX3_2PtDft_1);
1126  __vstore_reverse_bit((CVP) (pY7 + offsetBitReverse), vX3_2PtDft_2);
1127  }
1128 
1129 #ifdef CL7X_HE_CFLOAT_PTR_BUG
1130  myPY0 += (numPoints << 1);
1131  myPY1 += (numPoints << 1);
1132  myPY2 += (numPoints << 1);
1133  myPY3 += (numPoints << 1);
1134  myPY4 += (numPoints << 1);
1135  myPY5 += (numPoints << 1);
1136  myPY6 += (numPoints << 1);
1137  myPY7 += (numPoints << 1);
1138 
1139  pY0 = (cfloat *) myPY0;
1140  pY1 = (cfloat *) myPY1;
1141  pY2 = (cfloat *) myPY2;
1142  pY3 = (cfloat *) myPY3;
1143  pY4 = (cfloat *) myPY4;
1144  pY5 = (cfloat *) myPY5;
1145  pY6 = (cfloat *) myPY6;
1146  pY7 = (cfloat *) myPY7;
1147 
1148 #else
1149  pY0 += numPoints;
1150  pY1 += numPoints;
1151  pY2 += numPoints;
1152  pY3 += numPoints;
1153  pY4 += numPoints;
1154  pY5 += numPoints;
1155  pY6 += numPoints;
1156  pY7 += numPoints;
1157 #endif
1158  }
1159  __SE0_CLOSE();
1160  }
1161  }
1162  }
1163 
1164  return (status);
1165 }
1166 
1167 #if (!defined(FFTLIB_REMOVE_CHECK_PARAMS) && !defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_REMOVE_CHECK_PARAMS)) || \
1168  (defined(FFTLIB_CHECK_PARAMS)) || (defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS))
1169 
1171  FFTLIB_bufParams1D_t *bufParamsX,
1172  FFTLIB_F32 *pW,
1173  FFTLIB_bufParams1D_t *bufParamsW,
1174  FFTLIB_F32 *pY,
1175  FFTLIB_bufParams1D_t *bufParamsY,
1176  uint32_t numPoints,
1177  uint32_t numChannels,
1178  void *pBlock)
1179 {
1180  FFTLIB_STATUS status = FFTLIB_SUCCESS;
1181 
1182  if ((pX == NULL) || (pW == NULL) || (pY == NULL) || (pBlock == NULL)) {
1183  status = FFTLIB_ERR_NULL_POINTER;
1184  }
1185  else if (bufParamsX->dim_x != bufParamsY->dim_x) {
1187  }
1188  else if (bufParamsX->dim_x < numPoints * numChannels * 2) {
1189  /* In general, dim_x == numPoints*numChannels*2. However,
1190  * optimized kernel requires dim_x to be atleast 64*2. Hence, for
1191  * small values of numPoints*numChannels, dim_x could be greater
1192  * than numPoints*numChannels*2 */
1194  }
1195  else if (bufParamsX->dim_x < 64 * 2) {
1197  }
1198  else if (bufParamsW->dim_x != numPoints * 2) {
1200  }
1201  else if ((bufParamsX->data_type != FFTLIB_FLOAT32) || (bufParamsW->data_type != FFTLIB_FLOAT32) ||
1202  (bufParamsY->data_type != FFTLIB_FLOAT32)) {
1203  status = FFTLIB_ERR_INVALID_TYPE;
1204  }
1205  else if (((uint64_t) pX) & 0xFu) { /* pX must be 16-byte aligned for a */
1206  status = FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES; /* streaming engine
1207  configuration */
1208  }
1209  else {
1210  /* Check if number of pts is a power of 2 */
1211  uint32_t k = 0;
1212  while (k < 32) {
1213  if (numPoints & (1u << k)) {
1214  break;
1215  }
1216  k++;
1217  }
1218  if ((1u << k) != numPoints) {
1220  }
1221 
1222  if ((numChannels != 1) && (numChannels != 2) && (numChannels != 4) && (numChannels != 8) && (numChannels != 16)) {
1224  }
1225  }
1226  return (status);
1227 }
1228 
1229 #endif
@ FFTLIB_FLOAT32
c7x::cfloat_vec CV
FFTLIB_STATUS_NAME
The enumeration of all status codes.
Definition: FFTLIB_types.h:172
@ FFTLIB_ERR_INVALID_TYPE
Definition: FFTLIB_types.h:176
@ FFTLIB_ERR_NULL_POINTER
Definition: FFTLIB_types.h:178
@ FFTLIB_ERR_INVALID_DIMENSION
Definition: FFTLIB_types.h:177
@ FFTLIB_SUCCESS
Definition: FFTLIB_types.h:173
@ FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES
Definition: FFTLIB_types.h:181
float FFTLIB_F32
Single precision floating point.
Definition: FFTLIB_types.h:169
void ifft_i32fc_o32fc_conjugate_init_ci(void *pX, uint32_t size, void *pBlock)
static c7x::cfloat_vec ifft_i32fc_o32fc_scaleAndConjugate(c7x::cfloat_vec in, c7x::float_vec scaleVec, c7x::ulong_vec xorVec)
void ifft_i32fc_o32fc_conjugate_exec_ci(void *pX, c7x::ulong_vec xorVec, uint32_t size, uint32_t numPoints, void *pBlock)
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function is the main kernel compute function.
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function should be called before the FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_kernel function is c...
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_...
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.