29 #include "../FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc.h"
34 #include "../../../common/printv.h"
43 #define SE_PARAM_BASE (0x0000)
44 #define SE_LOOP1_PARAM_OFFSET (SE_PARAM_BASE)
45 #define SE_LOOP2_PARAM_OFFSET (SE_LOOP1_PARAM_OFFSET + SE_PARAM_SIZE)
46 #define SE_LOOP3_PARAM_OFFSET (SE_LOOP2_PARAM_OFFSET + SE_PARAM_SIZE)
47 #define SE_LOOP4_PARAM_OFFSET (SE_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
48 #define SE_LOOP5_PARAM_OFFSET (SE_LOOP4_PARAM_OFFSET + SE_PARAM_SIZE)
49 #define SE_LOOP6_PARAM_OFFSET (SE_LOOP5_PARAM_OFFSET + SE_PARAM_SIZE)
50 #define SE_LOOP7_PARAM_OFFSET (SE_LOOP6_PARAM_OFFSET + SE_PARAM_SIZE)
51 #define SE_TWID_PARAM_OFFSET (SE_LOOP7_PARAM_OFFSET + SE_PARAM_SIZE)
52 #define SA_LOOP1_PARAM_OFFSET (SE_TWID_PARAM_OFFSET + SE_PARAM_SIZE)
53 #define SA_LOOP2_PARAM_OFFSET (SA_LOOP1_PARAM_OFFSET + SA_PARAM_SIZE)
54 #define SA_LOOP3_PARAM_OFFSET (SA_LOOP2_PARAM_OFFSET + SA_PARAM_SIZE)
55 #define SA_LOOP4_PARAM_OFFSET (SA_LOOP3_PARAM_OFFSET + SA_PARAM_SIZE)
56 #define SA_LOOP6_PARAM_OFFSET (SA_LOOP4_PARAM_OFFSET + SA_PARAM_SIZE)
57 #define SE_CONJ_LOOP_PARAM_OFFSET (SA_LOOP6_PARAM_OFFSET + SE_PARAM_SIZE)
58 #define SA_CONJ_LOOP_PARAM_OFFSET (SE_CONJ_LOOP_PARAM_OFFSET + SE_PARAM_SIZE)
62 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
63 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
66 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
67 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
68 se0_param.DIMFMT = __SE_DIMFMT_1D;
69 se0_param.ICNT0 = size;
73 sa0_param.ICNT0 = size;
74 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
75 sa0_param.DIMFMT = __SA_DIMFMT_1D;
93 #if defined(FFTLIB_CHECK_PARAMS) || defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS)
101 uint32_t numPointsPerDft;
102 uint32_t seCnt1, seCnt2, seCnt3, seCnt4;
103 uint32_t seCnt6, seCnt7, seCnt8, seCnt9, seCnt10;
105 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
106 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1();
107 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
109 numPointsPerDft = numPoints;
110 seCnt1 = numPoints >> 2;
111 seCnt2 = numPoints >> 4;
113 seCnt4 = numPoints >> 2;
114 seCnt6 = seCnt3 * numChannels;
115 seCnt7 = (numPoints * numChannels >> 4) > 1 ? numPoints * numChannels >> 4 : 1;
116 seCnt8 = numPoints * numChannels;
117 seCnt9 = (numPoints * numChannels > 16) ? numPoints * numChannels : 16;
118 seCnt10 = (numPoints * numChannels >> 5) > 1 ? numPoints * numChannels >> 5 : 1;
119 seCnt11 = (numPoints * numChannels > 32) ? numPoints * numChannels : 32;
124 uint32_t elementSize = c7x::element_count_of<c7x::cfloat_vec>::value;
126 se0_param = __gen_SE_TEMPLATE_v1();
127 se0_param.ICNT0 = elementSize;
129 se0_param.DIM1 = seCnt1;
130 se0_param.ICNT2 = seCnt2;
131 se0_param.DIM2 = elementSize;
132 se0_param.ICNT3 = seCnt6;
133 se0_param.DIM3 = numPointsPerDft;
135 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
136 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
137 se0_param.DIMFMT = __SE_DIMFMT_4D;
140 se1_param = __gen_SE_TEMPLATE_v1();
141 se1_param.ICNT0 = elementSize;
143 se1_param.DIM1 = seCnt1;
144 se1_param.ICNT2 = seCnt2;
145 se1_param.DIM2 = elementSize;
146 se1_param.ICNT3 = seCnt6;
149 se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
150 se1_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
151 se1_param.DIMFMT = __SE_DIMFMT_4D;
154 sa0_param = __gen_SA_TEMPLATE_v1();
155 sa0_param.ICNT0 = elementSize;
157 sa0_param.DIM1 = seCnt1;
158 sa0_param.ICNT2 = seCnt2;
159 sa0_param.DIM2 = elementSize;
160 sa0_param.ICNT3 = seCnt6;
161 sa0_param.DIM3 = numPointsPerDft;
163 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
164 sa0_param.DIMFMT = __SA_DIMFMT_4D;
167 se0_param = __gen_SE_TEMPLATE_v1();
168 se0_param.ICNT0 = elementSize;
171 se0_param.ICNT2 = seCnt7;
174 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
178 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
179 se0_param.DIMFMT = __SE_DIMFMT_3D;
182 sa0_param = __gen_SA_TEMPLATE_v1();
183 sa0_param.ICNT0 = seCnt8;
187 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
188 sa0_param.DIMFMT = __SA_DIMFMT_1D;
191 se0_param = __gen_SE_TEMPLATE_v1();
192 se0_param.ICNT0 = seCnt8;
194 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
195 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
196 se0_param.DIMFMT = __SE_DIMFMT_1D;
199 sa0_param = __gen_SA_TEMPLATE_v1();
200 sa0_param.ICNT0 = seCnt8;
202 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
203 sa0_param.DIMFMT = __SA_DIMFMT_1D;
209 se0_param = __gen_SE_TEMPLATE_v1();
213 se0_param.ICNT2 = seCnt7;
216 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
217 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
218 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
219 se0_param.DIMFMT = __SE_DIMFMT_3D;
222 sa0_param = __gen_SA_TEMPLATE_v1();
223 sa0_param.ICNT0 = seCnt9;
227 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
228 sa0_param.DIMFMT = __SA_DIMFMT_1D;
231 se0_param = __gen_SE_TEMPLATE_v1();
232 se0_param.ICNT0 = seCnt4;
233 se0_param.ICNT1 = elementSize;
234 se0_param.DIM1 = seCnt4;
235 se0_param.ICNT2 = numChannels;
236 se0_param.DIM2 = numPoints;
238 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
239 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
240 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
241 se0_param.DIMFMT = __SE_DIMFMT_3D;
244 se0_param = __gen_SE_TEMPLATE_v1();
248 se0_param.ICNT2 = seCnt10;
251 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
252 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
253 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
254 se0_param.DIMFMT = __SE_DIMFMT_3D;
257 sa0_param = __gen_SA_TEMPLATE_v1();
258 sa0_param.ICNT0 = seCnt11;
262 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
263 sa0_param.DIMFMT = __SA_DIMFMT_1D;
266 se0_param = __gen_SE_TEMPLATE_v1();
267 se0_param.ICNT0 = seCnt4;
268 se0_param.ICNT1 = elementSize;
269 se0_param.DIM1 = seCnt4;
270 se0_param.ICNT2 = numChannels;
271 se0_param.DIM2 = numPoints;
273 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
274 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
275 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
276 se0_param.DIMFMT = __SE_DIMFMT_3D;
283 static inline c7x::cfloat_vec
286 return (c7x::as_cfloat_vec(scaleVec * c7x::as_float_vec(c7x::as_ulong_vec(in) ^ xorVec)));
293 typedef typename c7x::cfloat_vec
CV;
296 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
297 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
298 cfloat *restrict pXLocal = (cfloat *) pX;
304 __SE0_OPEN(pX, se0_param);
305 __SA0_OPEN(sa0_param);
308 uint32_t loopCount = (size) / c7x::element_count_of<c7x::cfloat_vec>::value;
309 c7x::cfloat_vec regIn, regStore;
310 float scale = 1.0f / numPoints;
311 c7x::float_vec scaleVec = __vload_dup(&scale);
316 for (i = 0; i < loopCount; i++) {
317 regIn = c7x::strm_eng<0, c7x::cfloat_vec>::get_adv();
323 tmp = c7x::strm_agen<0, CV>::get_vpred();
324 addr = c7x::strm_agen<0, CV>::get_adv(&pXLocal[0]);
325 __vstore_pred(tmp, addr, regStore);
340 uint32_t numChannels,
345 uint32_t numPointsPerDft;
346 uint32_t numLeadingZeros;
347 uint32_t offsetBitReverse;
348 uint32_t seCnt1, seCnt2, seCnt3, seCnt6;
350 __SE_TEMPLATE_v1 se0_param;
351 __SE_TEMPLATE_v1 se1_param;
352 __SA_TEMPLATE_v1 sa0_param;
354 cfloat *restrict pXLocal;
355 cfloat *restrict pYLocal;
356 cfloat *restrict pWLocal;
357 cfloat *restrict pY0;
358 cfloat *restrict pY1;
359 cfloat *restrict pY2;
360 cfloat *restrict pY3;
361 cfloat *restrict pY4;
362 cfloat *restrict pY5;
363 cfloat *restrict pY6;
364 cfloat *restrict pY7;
366 typedef typename c7x::cfloat_vec
CV;
372 CV vX_0, vX_N_4, vX_N_2, vX_3N_4;
373 CV vSum1, vSum2, vDiff1, vDiff2;
374 CV vTwX1, vTwX2, vTwX3;
375 CV vX0Temp, vX1Temp, vX2Temp, vX3Temp;
376 CV vX0, vX1, vX2, vX3;
377 CV vX_0_1, vX_N_4_1, vX_N_2_1, vX_3N_4_1;
378 CV vSum1_1, vSum2_1, vDiff1_1, vDiff2_1;
379 CV vX0_1, vX1_1, vX2_1, vX3_1;
380 CV vX0_2PtDft_1, vX0_2PtDft_2;
381 CV vX1_2PtDft_1, vX1_2PtDft_2;
382 CV vX2_2PtDft_1, vX2_2PtDft_2;
383 CV vX3_2PtDft_1, vX3_2PtDft_2;
384 CV vX01_lo, vX23_lo, vX01_hi, vX23_hi;
387 #ifdef FFTLIB_CHECK_PARAMS
395 numPointsPerDft = numPoints;
398 c7x::float_vec scaleVec = __vload_dup(&scale);
401 #if defined(_HOST_BUILD)
402 c7x::ulong_vec xorVec = (c7x::ulong_vec)(0x0000000080000000);
405 c7x::ulong_vec xorVec = (0x0000000080000000);
414 seCnt1 = numPointsPerDft >> 2;
415 seCnt2 = numPointsPerDft >> 4;
418 pXLocal = (cfloat *) pX;
419 pWLocal = (cfloat *) pW;
420 pYLocal = (cfloat *) pY;
422 while (numPointsPerDft >= 16) {
424 seCnt6 = seCnt3 * numChannels;
426 se0_param.DIM1 = seCnt1;
427 se0_param.ICNT2 = seCnt2;
429 se0_param.ICNT3 = seCnt6;
430 se0_param.DIM3 = numPointsPerDft;
431 __SE0_OPEN((
void *) pXLocal, se0_param);
434 se1_param.DIM1 = seCnt1;
435 se1_param.ICNT2 = seCnt2;
437 se1_param.ICNT3 = seCnt6;
439 __SE1_OPEN((
void *) pWLocal, se1_param);
442 sa0_param.DIM1 = seCnt1;
443 sa0_param.ICNT2 = seCnt2;
446 sa0_param.ICNT3 = seCnt6;
447 sa0_param.DIM3 = numPointsPerDft;
448 __SA0_OPEN(sa0_param);
451 for (k = 0; k < numPoints * numChannels; k += 32) {
454 vX_0 = c7x::strm_eng<0, CV>::get_adv();
455 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
456 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
457 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
459 vSum1 = vX_0 + vX_N_2;
460 vSum2 = vX_N_4 + vX_3N_4;
461 vDiff1 = vX_0 - vX_N_2;
462 vDiff2 = vX_N_4 - vX_3N_4;
464 vTwX1 = c7x::strm_eng<1, CV>::get_adv();
465 vTwX2 = c7x::strm_eng<1, CV>::get_adv();
466 vTwX3 = c7x::strm_eng<1, CV>::get_adv();
468 vX0Temp = vSum1 + vSum2;
469 vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
470 vX2Temp = vSum1 - vSum2;
471 vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
474 vX1 = __complex_multiply(vX1Temp, vTwX1);
475 vX2 = __complex_multiply(vX2Temp, vTwX2);
476 vX3 = __complex_multiply(vX3Temp, vTwX3);
481 tmp = c7x::strm_agen<0, CV>::get_vpred();
482 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
483 __vstore_pred(tmp, addr, vX0);
485 tmp = c7x::strm_agen<0, CV>::get_vpred();
486 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
487 __vstore_pred(tmp, addr, vX2);
489 tmp = c7x::strm_agen<0, CV>::get_vpred();
490 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
491 __vstore_pred(tmp, addr, vX1);
493 tmp = c7x::strm_agen<0, CV>::get_vpred();
494 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
495 __vstore_pred(tmp, addr, vX3);
498 vX_0 = c7x::strm_eng<0, CV>::get_adv();
499 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
500 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
501 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
503 vSum1 = vX_0 + vX_N_2;
504 vSum2 = vX_N_4 + vX_3N_4;
505 vDiff1 = vX_0 - vX_N_2;
506 vDiff2 = vX_N_4 - vX_3N_4;
508 vTwX1 = c7x::strm_eng<1, CV>::get_adv();
509 vTwX2 = c7x::strm_eng<1, CV>::get_adv();
510 vTwX3 = c7x::strm_eng<1, CV>::get_adv();
512 vX0Temp = vSum1 + vSum2;
513 vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
514 vX2Temp = vSum1 - vSum2;
515 vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
518 vX1 = __complex_multiply(vX1Temp, vTwX1);
519 vX2 = __complex_multiply(vX2Temp, vTwX2);
520 vX3 = __complex_multiply(vX3Temp, vTwX3);
522 tmp = c7x::strm_agen<0, CV>::get_vpred();
523 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
524 __vstore_pred(tmp, addr, vX0);
526 tmp = c7x::strm_agen<0, CV>::get_vpred();
527 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
528 __vstore_pred(tmp, addr, vX2);
530 tmp = c7x::strm_agen<0, CV>::get_vpred();
531 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
532 __vstore_pred(tmp, addr, vX1);
534 tmp = c7x::strm_agen<0, CV>::get_vpred();
535 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
536 __vstore_pred(tmp, addr, vX3);
542 numPointsPerDft >>= 2;
543 pWLocal += numPointsPerDft * 3;
549 if (numPointsPerDft == 16) {
552 __SE0_OPEN((
void *) pXLocal, se0_param);
553 __SE1_OPEN((
void *) (pXLocal + 8), se0_param);
556 __SA0_OPEN(sa0_param);
558 vTwX1 = *((
CVP) pWLocal);
559 vTwX2 = *((
CVP) (pWLocal + 4));
560 vTwX3 = *((
CVP) (pWLocal + 8));
563 vTwX1 =
CV(vTwX1.lo(), vTwX1.lo());
564 vTwX2 =
CV(vTwX2.lo(), vTwX2.lo());
565 vTwX3 =
CV(vTwX3.lo(), vTwX3.lo());
567 vTwX1 = (
CV) (vTwX1.lo(), vTwX1.lo());
568 vTwX2 = (
CV) (vTwX2.lo(), vTwX2.lo());
569 vTwX3 = (
CV) (vTwX3.lo(), vTwX3.lo());
572 for (k = 0; k < numPoints * numChannels; k += 32) {
573 vX_0 = c7x::strm_eng<0, CV>::get_adv();
574 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
575 vX_N_2 = c7x::strm_eng<1, CV>::get_adv();
576 vX_3N_4 = c7x::strm_eng<1, CV>::get_adv();
578 vSum1 = vX_0 + vX_N_2;
579 vSum2 = vX_N_4 + vX_3N_4;
580 vDiff1 = vX_0 - vX_N_2;
581 vDiff2 = vX_N_4 - vX_3N_4;
583 vX0Temp = vSum1 + vSum2;
584 vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
585 vX2Temp = vSum1 - vSum2;
586 vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
589 vX1 = __complex_multiply(vX1Temp, vTwX1);
590 vX2 = __complex_multiply(vX2Temp, vTwX2);
591 vX3 = __complex_multiply(vX3Temp, vTwX3);
594 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
596 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
597 __vstore_pred(tmp, addr,
CV(vX0.lo(), vX2.lo()));
599 tmp = c7x::strm_agen<0, CV>::get_vpred();
600 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
601 __vstore_pred(tmp, addr,
CV(vX1.lo(), vX3.lo()));
603 tmp = c7x::strm_agen<0, CV>::get_vpred();
604 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
605 __vstore_pred(tmp, addr,
CV(vX0.hi(), vX2.hi()));
607 tmp = c7x::strm_agen<0, CV>::get_vpred();
608 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
609 __vstore_pred(tmp, addr,
CV(vX1.hi(), vX3.hi()));
611 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
613 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
614 __vstore_pred(tmp, addr, (
CV) (vX0.lo(), vX2.lo()));
616 tmp = c7x::strm_agen<0, CV>::get_vpred();
617 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
618 __vstore_pred(tmp, addr, (
CV) (vX1.lo(), vX3.lo()));
620 tmp = c7x::strm_agen<0, CV>::get_vpred();
621 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
622 __vstore_pred(tmp, addr, (
CV) (vX0.hi(), vX2.hi()));
624 tmp = c7x::strm_agen<0, CV>::get_vpred();
625 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
626 __vstore_pred(tmp, addr, (
CV) (vX1.hi(), vX3.hi()));
638 __SE0_OPEN ((
void *) pXLocal, se0_param);
642 __SA0_OPEN (sa0_param);
644 vTwX1 = *((
CVP) pWLocal);
645 vTwX2 = *((
CVP) (pWLocal + 8));
646 vTwX3 = *((
CVP) (pWLocal + 16));
648 for (k = 0; k < numPoints * numChannels; k += 32) {
649 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
650 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
651 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
652 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
654 vSum1 = vX_0 + vX_N_2;
655 vSum2 = vX_N_4 + vX_3N_4;
656 vDiff1 = vX_0 - vX_N_2;
657 vDiff2 = vX_N_4 - vX_3N_4;
659 vX0Temp = vSum1 + vSum2;
660 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
661 vX2Temp = vSum1 - vSum2;
662 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
665 vX1 = __complex_multiply (vX1Temp, vTwX1);
666 vX2 = __complex_multiply (vX2Temp, vTwX2);
667 vX3 = __complex_multiply (vX3Temp, vTwX3);
669 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
671 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
672 __vstore_pred (tmp, addr, vX0);
674 tmp = c7x::strm_agen<0, CV>::get_vpred ();
675 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
676 __vstore_pred (tmp, addr, vX2);
678 tmp = c7x::strm_agen<0, CV>::get_vpred ();
679 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
680 __vstore_pred (tmp, addr, vX1);
682 tmp = c7x::strm_agen<0, CV>::get_vpred ();
683 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
684 __vstore_pred (tmp, addr, vX3);
694 if (numPointsPerDft == 4) {
697 if (numPoints == 16) {
701 c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
702 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
703 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
704 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
705 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
706 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
707 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
708 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
710 c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
711 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
712 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
713 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
714 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
715 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
716 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
717 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
721 se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock +
723 __SE0_OPEN ((
void *) pXLocal, se0_param);
725 sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
727 __SA0_OPEN (sa0_param);
729 for (k = 0; k < numChannels << 4; k += 32) {
730 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
731 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
732 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
733 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
735 vSum1 = vX_0 + vX_N_2;
736 vSum2 = vX_N_4 + vX_3N_4;
737 vDiff1 = vX_0 - vX_N_2;
738 vDiff2 = vX_N_4 - vX_3N_4;
741 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
743 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
745 vX01_lo = c7x::as_cfloat_vec (
746 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1),
747 c7x::as_uchar_vec (vX0)));
748 vX23_lo = c7x::as_cfloat_vec (
749 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3),
750 c7x::as_uchar_vec (vX2)));
751 vX01_hi = c7x::as_cfloat_vec (
752 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1),
753 c7x::as_uchar_vec (vX0)));
754 vX23_hi = c7x::as_cfloat_vec (
755 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3),
756 c7x::as_uchar_vec (vX2)));
758 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
760 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
761 __vstore_pred (tmp, addr, vX01_lo);
763 tmp = c7x::strm_agen<0, CV>::get_vpred ();
764 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
765 __vstore_pred (tmp, addr, vX23_lo);
767 tmp = c7x::strm_agen<0, CV>::get_vpred ();
768 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
769 __vstore_pred (tmp, addr, vX01_hi);
771 tmp = c7x::strm_agen<0, CV>::get_vpred ();
772 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
773 __vstore_pred (tmp, addr, vX23_hi);
781 __SE0_OPEN((
void *) pXLocal, se0_param);
783 numLeadingZeros = __norm((int32_t) (numPoints - 1)) + 1;
790 pY0 = (cfloat *) (pY + 0);
791 pY1 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
792 pY2 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
793 pY3 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
795 #ifdef CL7X_HE_CFLOAT_PTR_BUG
796 float *myPY0 = (
float *) pY0;
797 float *myPY1 = (
float *) pY1;
798 float *myPY2 = (
float *) pY2;
799 float *myPY3 = (
float *) pY3;
802 for (l = 0; l < numChannels; l++) {
803 for (k = 0; k < numPoints >> 2; k += 4) {
804 offsetBitReverse = __bit_reverse(k) >> numLeadingZeros;
806 vX_0 = c7x::strm_eng<0, CV>::get_adv();
807 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
808 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
809 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
811 vSum1 = vX_0 + vX_N_2;
812 vSum2 = vX_N_4 + vX_3N_4;
813 vDiff1 = vX_0 - vX_N_2;
814 vDiff2 = vX_N_4 - vX_3N_4;
817 vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
819 vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
826 __vstore_reverse_bit((
CVP) (pY0 + offsetBitReverse), vX0);
827 __vstore_reverse_bit((
CVP) (pY1 + offsetBitReverse), vX1);
828 __vstore_reverse_bit((
CVP) (pY2 + offsetBitReverse), vX2);
829 __vstore_reverse_bit((
CVP) (pY3 + offsetBitReverse), vX3);
832 #ifdef CL7X_HE_CFLOAT_PTR_BUG
833 myPY0 += (numPoints << 1);
834 myPY1 += (numPoints << 1);
835 myPY2 += (numPoints << 1);
836 myPY3 += (numPoints << 1);
838 pY0 = (cfloat *) myPY0;
839 pY1 = (cfloat *) myPY1;
840 pY2 = (cfloat *) myPY2;
841 pY3 = (cfloat *) myPY3;
859 vTwX1 =
CV(twTemp, twTemp, twTemp, twTemp);
862 vTwX2 =
CV(twTemp, twTemp, twTemp, twTemp);
865 vTwX3 =
CV(twTemp, twTemp, twTemp, twTemp);
869 vTwX1 = (
CV) (twTemp, twTemp, twTemp, twTemp);
872 vTwX2 = (
CV) (twTemp, twTemp, twTemp, twTemp);
875 vTwX3 = (
CV) (twTemp, twTemp, twTemp, twTemp);
880 if (numPoints == 32) {
884 c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
885 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
886 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
887 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
888 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
889 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
890 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
891 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
893 c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
894 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
895 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
896 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
897 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
898 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
899 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
900 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
903 CV vX01_2PtDft_1_lo, vX23_2PtDft_1_lo, vX01_2PtDft_2_lo,
905 CV vX01_2PtDft_1_hi, vX23_2PtDft_1_hi, vX01_2PtDft_2_hi,
908 se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock +
910 __SE0_OPEN ((
void *) pXLocal, se0_param);
912 sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
914 __SA0_OPEN (sa0_param);
916 for (k = 0; k < numChannels << 5; k += 64) {
917 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
918 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
919 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
920 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
921 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
922 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
923 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
924 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
926 vSum1 = vX_0 + vX_N_2;
927 vSum2 = vX_N_4 + vX_3N_4;
928 vDiff1 = vX_0 - vX_N_2;
929 vDiff2 = vX_N_4 - vX_3N_4;
932 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
934 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
936 vSum1_1 = vX_0_1 + vX_N_2_1;
937 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
938 vDiff1_1 = vX_0_1 - vX_N_2_1;
939 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
941 vX0Temp = vSum1_1 + vSum2_1;
942 vX1Temp = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
943 vX2Temp = vSum1_1 - vSum2_1;
944 vX3Temp = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
947 vX1_1 = __complex_multiply (vX1Temp, vTwX1);
948 vX2_1 = __complex_multiply (vX2Temp, vTwX2);
949 vX3_1 = __complex_multiply (vX3Temp, vTwX3);
951 vX0_2PtDft_1 = vX0 + vX0_1;
952 vX0_2PtDft_2 = vX0 - vX0_1;
953 vX1_2PtDft_1 = vX1 + vX1_1;
954 vX1_2PtDft_2 = vX1 - vX1_1;
955 vX2_2PtDft_1 = vX2 + vX2_1;
956 vX2_2PtDft_2 = vX2 - vX2_1;
957 vX3_2PtDft_1 = vX3 + vX3_1;
958 vX3_2PtDft_2 = vX3 - vX3_1;
971 vX01_2PtDft_1_lo = c7x::as_cfloat_vec (
972 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_1),
973 c7x::as_uchar_vec (vX0_2PtDft_1)));
974 vX23_2PtDft_1_lo = c7x::as_cfloat_vec (
975 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_1),
976 c7x::as_uchar_vec (vX2_2PtDft_1)));
977 vX01_2PtDft_2_lo = c7x::as_cfloat_vec (
978 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_2),
979 c7x::as_uchar_vec (vX0_2PtDft_2)));
980 vX23_2PtDft_2_lo = c7x::as_cfloat_vec (
981 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_2),
982 c7x::as_uchar_vec (vX2_2PtDft_2)));
983 vX01_2PtDft_1_hi = c7x::as_cfloat_vec (
984 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_1),
985 c7x::as_uchar_vec (vX0_2PtDft_1)));
986 vX23_2PtDft_1_hi = c7x::as_cfloat_vec (
987 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_1),
988 c7x::as_uchar_vec (vX2_2PtDft_1)));
989 vX01_2PtDft_2_hi = c7x::as_cfloat_vec (
990 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_2),
991 c7x::as_uchar_vec (vX0_2PtDft_2)));
992 vX23_2PtDft_2_hi = c7x::as_cfloat_vec (
993 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_2),
994 c7x::as_uchar_vec (vX2_2PtDft_2)));
997 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
999 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1000 __vstore_pred (tmp, addr, vX01_2PtDft_1_lo);
1002 tmp = c7x::strm_agen<0, CV>::get_vpred ();
1003 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1004 __vstore_pred (tmp, addr, vX23_2PtDft_1_lo);
1006 tmp = c7x::strm_agen<0, CV>::get_vpred ();
1007 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1008 __vstore_pred (tmp, addr, vX01_2PtDft_2_lo);
1010 tmp = c7x::strm_agen<0, CV>::get_vpred ();
1011 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1012 __vstore_pred (tmp, addr, vX23_2PtDft_2_lo);
1014 tmp = c7x::strm_agen<0, CV>::get_vpred ();
1015 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1016 __vstore_pred (tmp, addr, vX01_2PtDft_1_hi);
1018 tmp = c7x::strm_agen<0, CV>::get_vpred ();
1019 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1020 __vstore_pred (tmp, addr, vX23_2PtDft_1_hi);
1022 tmp = c7x::strm_agen<0, CV>::get_vpred ();
1023 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1024 __vstore_pred (tmp, addr, vX01_2PtDft_2_hi);
1026 tmp = c7x::strm_agen<0, CV>::get_vpred ();
1027 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
1028 __vstore_pred (tmp, addr, vX23_2PtDft_2_hi);
1038 __SE0_OPEN((
void *) pXLocal, se0_param);
1040 numLeadingZeros = __norm((int32_t) (numPoints - 1)) + 1;
1042 pY0 = (cfloat *) (pY + (0x00000000u));
1043 pY1 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
1044 pY2 = (cfloat *) (pY + ((0x20000000u >> numLeadingZeros) << 1));
1045 pY3 = (cfloat *) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
1046 pY4 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
1047 pY5 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
1048 pY6 = (cfloat *) (pY + ((0x60000000u >> numLeadingZeros) << 1));
1049 pY7 = (cfloat *) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
1051 #ifdef CL7X_HE_CFLOAT_PTR_BUG
1052 float *myPY0 = (
float *) pY0;
1053 float *myPY1 = (
float *) pY1;
1054 float *myPY2 = (
float *) pY2;
1055 float *myPY3 = (
float *) pY3;
1056 float *myPY4 = (
float *) pY4;
1057 float *myPY5 = (
float *) pY5;
1058 float *myPY6 = (
float *) pY6;
1059 float *myPY7 = (
float *) pY7;
1062 for (l = 0; l < numChannels; l++) {
1063 for (k = 0; k < numPoints >> 2; k += 8) {
1064 offsetBitReverse = __bit_reverse(k) >> numLeadingZeros;
1066 vX_0 = c7x::strm_eng<0, CV>::get_adv();
1067 vX_0_1 = c7x::strm_eng<0, CV>::get_adv();
1068 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
1069 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv();
1070 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
1071 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv();
1072 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
1073 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv();
1075 vSum1 = vX_0 + vX_N_2;
1076 vSum2 = vX_N_4 + vX_3N_4;
1077 vDiff1 = vX_0 - vX_N_2;
1078 vDiff2 = vX_N_4 - vX_3N_4;
1080 vX0 = vSum1 + vSum2;
1081 vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
1082 vX2 = vSum1 - vSum2;
1083 vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
1085 vSum1_1 = vX_0_1 + vX_N_2_1;
1086 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
1087 vDiff1_1 = vX_0_1 - vX_N_2_1;
1088 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
1090 vX0Temp = vSum1_1 + vSum2_1;
1091 vX1Temp = vDiff1_1 - __vcrot90sp_vv(vDiff2_1);
1092 vX2Temp = vSum1_1 - vSum2_1;
1093 vX3Temp = vDiff1_1 + __vcrot90sp_vv(vDiff2_1);
1096 vX1_1 = __complex_multiply(vX1Temp, vTwX1);
1097 vX2_1 = __complex_multiply(vX2Temp, vTwX2);
1098 vX3_1 = __complex_multiply(vX3Temp, vTwX3);
1100 vX0_2PtDft_1 = vX0 + vX0_1;
1101 vX0_2PtDft_2 = vX0 - vX0_1;
1102 vX1_2PtDft_1 = vX1 + vX1_1;
1103 vX1_2PtDft_2 = vX1 - vX1_1;
1104 vX2_2PtDft_1 = vX2 + vX2_1;
1105 vX2_2PtDft_2 = vX2 - vX2_1;
1106 vX3_2PtDft_1 = vX3 + vX3_1;
1107 vX3_2PtDft_2 = vX3 - vX3_1;
1118 __vstore_reverse_bit((
CVP) (pY0 + offsetBitReverse), vX0_2PtDft_1);
1119 __vstore_reverse_bit((
CVP) (pY1 + offsetBitReverse), vX0_2PtDft_2);
1120 __vstore_reverse_bit((
CVP) (pY2 + offsetBitReverse), vX1_2PtDft_1);
1121 __vstore_reverse_bit((
CVP) (pY3 + offsetBitReverse), vX1_2PtDft_2);
1122 __vstore_reverse_bit((
CVP) (pY4 + offsetBitReverse), vX2_2PtDft_1);
1123 __vstore_reverse_bit((
CVP) (pY5 + offsetBitReverse), vX2_2PtDft_2);
1124 __vstore_reverse_bit((
CVP) (pY6 + offsetBitReverse), vX3_2PtDft_1);
1125 __vstore_reverse_bit((
CVP) (pY7 + offsetBitReverse), vX3_2PtDft_2);
1128 #ifdef CL7X_HE_CFLOAT_PTR_BUG
1129 myPY0 += (numPoints << 1);
1130 myPY1 += (numPoints << 1);
1131 myPY2 += (numPoints << 1);
1132 myPY3 += (numPoints << 1);
1133 myPY4 += (numPoints << 1);
1134 myPY5 += (numPoints << 1);
1135 myPY6 += (numPoints << 1);
1136 myPY7 += (numPoints << 1);
1138 pY0 = (cfloat *) myPY0;
1139 pY1 = (cfloat *) myPY1;
1140 pY2 = (cfloat *) myPY2;
1141 pY3 = (cfloat *) myPY3;
1142 pY4 = (cfloat *) myPY4;
1143 pY5 = (cfloat *) myPY5;
1144 pY6 = (cfloat *) myPY6;
1145 pY7 = (cfloat *) myPY7;
1166 #if (!defined(FFTLIB_REMOVE_CHECK_PARAMS) && !defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_REMOVE_CHECK_PARAMS)) || \
1167 (defined(FFTLIB_CHECK_PARAMS)) || (defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS))
1176 uint32_t numChannels,
1181 if ((pX == NULL) || (pW == NULL) || (pY == NULL) || (pBlock == NULL)) {
1184 else if (bufParamsX->
dim_x != bufParamsY->
dim_x) {
1187 else if (bufParamsX->
dim_x < numPoints * numChannels * 2) {
1194 else if (bufParamsX->
dim_x < 64 * 2) {
1197 else if (bufParamsW->
dim_x != numPoints * 2) {
1204 else if (((uint64_t) pX) & 0xFu) {
1212 if (numPoints & (1u << k)) {
1217 if ((1u << k) != numPoints) {
1221 if ((numChannels != 1) && (numChannels != 2) && (numChannels != 4) && (numChannels != 8) && (numChannels != 16)) {
FFTLIB_STATUS_NAME
The enumeration of all status codes.
@ FFTLIB_ERR_INVALID_TYPE
@ FFTLIB_ERR_NULL_POINTER
@ FFTLIB_ERR_INVALID_DIMENSION
@ FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES
float FFTLIB_F32
Single precision floating point.
#define SA_LOOP4_PARAM_OFFSET
static void ifft_i32fc_o32fc_conjugate_exec_ci(void *pX, c7x::ulong_vec xorVec, uint32_t size, uint32_t numPoints, void *pBlock)
#define SE_LOOP6_PARAM_OFFSET
#define SE_LOOP4_PARAM_OFFSET
static c7x::cfloat_vec ifft_i32fc_o32fc_scaleAndConjugate(c7x::cfloat_vec in, c7x::float_vec scaleVec, c7x::ulong_vec xorVec)
#define SA_LOOP6_PARAM_OFFSET
#define SE_CONJ_LOOP_PARAM_OFFSET
#define SA_LOOP2_PARAM_OFFSET
#define SA_CONJ_LOOP_PARAM_OFFSET
static void ifft_i32fc_o32fc_conjugate_init_ci(void *pX, uint32_t size, void *pBlock)
#define SE_LOOP7_PARAM_OFFSET
#define SE_LOOP1_PARAM_OFFSET
#define SA_LOOP1_PARAM_OFFSET
#define SE_TWID_PARAM_OFFSET
#define SE_LOOP2_PARAM_OFFSET
#define SE_LOOP3_PARAM_OFFSET
#define SA_LOOP3_PARAM_OFFSET
#define SE_LOOP5_PARAM_OFFSET
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function is the main kernel compute function.
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function should be called before the FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_kernel function is c...
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_...
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.