29 #include "../FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc.h"
34 #include "../../../common/printv.h"
43 #define SE_PARAM_BASE (0x0000)
44 #define SE_LOOP1_PARAM_OFFSET (SE_PARAM_BASE)
45 #define SE_LOOP2_PARAM_OFFSET (SE_LOOP1_PARAM_OFFSET + SE_PARAM_SIZE)
46 #define SE_LOOP3_PARAM_OFFSET (SE_LOOP2_PARAM_OFFSET + SE_PARAM_SIZE)
47 #define SE_LOOP4_PARAM_OFFSET (SE_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
48 #define SE_LOOP5_PARAM_OFFSET (SE_LOOP4_PARAM_OFFSET + SE_PARAM_SIZE)
49 #define SE_LOOP6_PARAM_OFFSET (SE_LOOP5_PARAM_OFFSET + SE_PARAM_SIZE)
50 #define SE_LOOP7_PARAM_OFFSET (SE_LOOP6_PARAM_OFFSET + SE_PARAM_SIZE)
51 #define SE_TWID_PARAM_OFFSET (SE_LOOP7_PARAM_OFFSET + SE_PARAM_SIZE)
52 #define SA_LOOP1_PARAM_OFFSET (SE_TWID_PARAM_OFFSET + SE_PARAM_SIZE)
53 #define SA_LOOP2_PARAM_OFFSET (SA_LOOP1_PARAM_OFFSET + SA_PARAM_SIZE)
54 #define SA_LOOP3_PARAM_OFFSET (SA_LOOP2_PARAM_OFFSET + SA_PARAM_SIZE)
55 #define SA_LOOP4_PARAM_OFFSET (SA_LOOP3_PARAM_OFFSET + SA_PARAM_SIZE)
56 #define SA_LOOP6_PARAM_OFFSET (SA_LOOP4_PARAM_OFFSET + SA_PARAM_SIZE)
57 #define SE_CONJ_LOOP_PARAM_OFFSET (SA_LOOP6_PARAM_OFFSET + SE_PARAM_SIZE)
58 #define SA_CONJ_LOOP_PARAM_OFFSET (SE_CONJ_LOOP_PARAM_OFFSET + SE_PARAM_SIZE)
62 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
63 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
66 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
67 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
68 se0_param.DIMFMT = __SE_DIMFMT_1D;
69 se0_param.ICNT0 = size;
73 sa0_param.ICNT0 = size;
74 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
75 sa0_param.DIMFMT = __SA_DIMFMT_1D;
93 #if defined(FFTLIB_CHECK_PARAMS) || defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS)
101 uint32_t numPointsPerDft;
102 uint32_t seCnt1, seCnt2, seCnt3, seCnt4;
103 uint32_t seCnt6, seCnt7, seCnt8, seCnt9, seCnt10;
105 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
106 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1();
107 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
109 numPointsPerDft = numPoints;
110 seCnt1 = numPoints >> 2;
111 seCnt2 = numPoints >> 5;
113 seCnt4 = numPoints >> 3;
114 seCnt6 = seCnt3 * numChannels;
115 seCnt7 = (numPoints * numChannels >> 5) > 1 ? numPoints * numChannels >> 5 : 1;
116 seCnt8 = numPoints * numChannels;
117 seCnt9 = (numPoints * numChannels > 32) ? numPoints * numChannels : 32;
118 seCnt10 = (numPoints * numChannels >> 6) > 1 ? numPoints * numChannels >> 6 : 1;
119 seCnt11 = (numPoints * numChannels > 64) ? numPoints * numChannels : 64;
124 se0_param = __gen_SE_TEMPLATE_v1();
127 se0_param.DIM1 = seCnt1;
128 se0_param.ICNT2 = seCnt2;
130 se0_param.ICNT3 = seCnt6;
131 se0_param.DIM3 = numPointsPerDft;
133 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
134 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
135 se0_param.DIMFMT = __SE_DIMFMT_4D;
138 se1_param = __gen_SE_TEMPLATE_v1();
141 se1_param.DIM1 = seCnt1;
142 se1_param.ICNT2 = seCnt2;
144 se1_param.ICNT3 = seCnt6;
147 se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
148 se1_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
149 se1_param.DIMFMT = __SE_DIMFMT_4D;
152 sa0_param = __gen_SA_TEMPLATE_v1();
155 sa0_param.DIM1 = seCnt1;
156 sa0_param.ICNT2 = seCnt2;
158 sa0_param.ICNT3 = seCnt6;
159 sa0_param.DIM3 = numPointsPerDft;
161 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
162 sa0_param.DIMFMT = __SA_DIMFMT_4D;
165 se0_param = __gen_SE_TEMPLATE_v1();
169 se0_param.ICNT2 = seCnt7;
172 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
173 se0_param.TRANSPOSE = __SE_TRANSPOSE_256BIT;
175 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
176 se0_param.DIMFMT = __SE_DIMFMT_3D;
179 sa0_param = __gen_SA_TEMPLATE_v1();
180 sa0_param.ICNT0 = seCnt8;
184 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
185 sa0_param.DIMFMT = __SA_DIMFMT_1D;
188 se0_param = __gen_SE_TEMPLATE_v1();
189 se0_param.ICNT0 = seCnt8;
191 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
192 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
193 se0_param.DIMFMT = __SE_DIMFMT_1D;
196 sa0_param = __gen_SA_TEMPLATE_v1();
197 sa0_param.ICNT0 = seCnt8;
199 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
200 sa0_param.DIMFMT = __SA_DIMFMT_1D;
206 se0_param = __gen_SE_TEMPLATE_v1();
210 se0_param.ICNT2 = seCnt7;
213 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
214 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
215 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
216 se0_param.DIMFMT = __SE_DIMFMT_3D;
219 sa0_param = __gen_SA_TEMPLATE_v1();
220 sa0_param.ICNT0 = seCnt9;
224 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
225 sa0_param.DIMFMT = __SA_DIMFMT_1D;
228 se0_param = __gen_SE_TEMPLATE_v1();
229 se0_param.ICNT0 = seCnt4;
236 se0_param.ICNT2 = numChannels;
237 se0_param.DIM2 = numPoints;
239 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
240 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
241 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
242 se0_param.DIMFMT = __SE_DIMFMT_3D;
245 se0_param = __gen_SE_TEMPLATE_v1();
249 se0_param.ICNT2 = seCnt10;
252 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
253 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
254 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
255 se0_param.DIMFMT = __SE_DIMFMT_3D;
258 sa0_param = __gen_SA_TEMPLATE_v1();
259 sa0_param.ICNT0 = seCnt11;
263 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
264 sa0_param.DIMFMT = __SA_DIMFMT_1D;
267 se0_param = __gen_SE_TEMPLATE_v1();
268 se0_param.ICNT0 = seCnt4;
275 se0_param.ICNT2 = numChannels;
276 se0_param.DIM2 = numPoints;
278 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
279 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
280 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
281 se0_param.DIMFMT = __SE_DIMFMT_3D;
288 static inline c7x::cfloat_vec
291 return (c7x::as_cfloat_vec(scaleVec * c7x::as_float_vec(c7x::as_ulong_vec(in) ^ xorVec)));
295 c7x::ulong_vec xorVec,
301 typedef typename c7x::cfloat_vec
CV;
304 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1();
305 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1();
306 cfloat *restrict pXLocal = (cfloat *) pX;
312 __SE0_OPEN(pX, se0_param);
313 __SA0_OPEN(sa0_param);
316 uint32_t loopCount = (size) / c7x::element_count_of<c7x::cfloat_vec>::value;
317 c7x::cfloat_vec regIn, regStore;
318 float scale = 1.0f / numPoints;
319 c7x::float_vec scaleVec = __vload_dup(&scale);
324 for (i = 0; i < loopCount; i++) {
325 regIn = c7x::strm_eng<0, c7x::cfloat_vec>::get_adv();
331 tmp = c7x::strm_agen<0, CV>::get_vpred();
332 addr = c7x::strm_agen<0, CV>::get_adv(&pXLocal[0]);
333 __vstore_pred(tmp, addr, regStore);
348 uint32_t numChannels,
353 uint32_t numPointsPerDft;
354 uint32_t numLeadingZeros;
355 uint32_t offsetBitReverse;
356 uint32_t seCnt1, seCnt2, seCnt3, seCnt6;
358 __SE_TEMPLATE_v1 se0_param;
359 __SE_TEMPLATE_v1 se1_param;
360 __SA_TEMPLATE_v1 sa0_param;
362 cfloat *restrict pXLocal;
363 cfloat *restrict pYLocal;
364 cfloat *restrict pWLocal;
365 cfloat *restrict pY0;
366 cfloat *restrict pY1;
367 cfloat *restrict pY2;
368 cfloat *restrict pY3;
369 cfloat *restrict pY4;
370 cfloat *restrict pY5;
371 cfloat *restrict pY6;
372 cfloat *restrict pY7;
374 typedef typename c7x::cfloat_vec
CV;
380 CV vX_0, vX_N_4, vX_N_2, vX_3N_4;
381 CV vSum1, vSum2, vDiff1, vDiff2;
382 CV vTwX1, vTwX2, vTwX3;
383 CV vX0Temp, vX1Temp, vX2Temp, vX3Temp;
384 CV vX0, vX1, vX2, vX3;
385 CV vX_0_1, vX_N_4_1, vX_N_2_1, vX_3N_4_1;
386 CV vSum1_1, vSum2_1, vDiff1_1, vDiff2_1;
387 CV vX0_1, vX1_1, vX2_1, vX3_1;
388 CV vX0_2PtDft_1, vX0_2PtDft_2;
389 CV vX1_2PtDft_1, vX1_2PtDft_2;
390 CV vX2_2PtDft_1, vX2_2PtDft_2;
391 CV vX3_2PtDft_1, vX3_2PtDft_2;
392 CV vX01_lo, vX23_lo, vX01_hi, vX23_hi;
395 #ifdef FFTLIB_CHECK_PARAMS
403 numPointsPerDft = numPoints;
406 c7x::float_vec scaleVec = __vload_dup(&scale);
409 #if defined(_HOST_BUILD)
410 c7x::ulong_vec xorVec = (c7x::ulong_vec)(0x0000000080000000);
413 c7x::ulong_vec xorVec = (0x0000000080000000);
422 seCnt1 = numPointsPerDft >> 2;
423 seCnt2 = numPointsPerDft >> 5;
426 pXLocal = (cfloat *) pX;
427 pWLocal = (cfloat *) pW;
428 pYLocal = (cfloat *) pY;
430 while (numPointsPerDft >= 64) {
432 seCnt6 = seCnt3 * numChannels;
434 se0_param.DIM1 = seCnt1;
435 se0_param.ICNT2 = seCnt2;
437 se0_param.ICNT3 = seCnt6;
438 se0_param.DIM3 = numPointsPerDft;
439 __SE0_OPEN((
void *) pXLocal, se0_param);
442 se1_param.DIM1 = seCnt1;
443 se1_param.ICNT2 = seCnt2;
445 se1_param.ICNT3 = seCnt6;
447 __SE1_OPEN((
void *) pWLocal, se1_param);
450 sa0_param.DIM1 = seCnt1;
451 sa0_param.ICNT2 = seCnt2;
454 sa0_param.ICNT3 = seCnt6;
455 sa0_param.DIM3 = numPointsPerDft;
456 __SA0_OPEN(sa0_param);
459 for (k = 0; k < numPoints * numChannels; k += 64) {
462 vX_0 = c7x::strm_eng<0, CV>::get_adv();
463 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
464 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
465 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
467 vSum1 = vX_0 + vX_N_2;
468 vSum2 = vX_N_4 + vX_3N_4;
469 vDiff1 = vX_0 - vX_N_2;
470 vDiff2 = vX_N_4 - vX_3N_4;
472 vTwX1 = c7x::strm_eng<1, CV>::get_adv();
473 vTwX2 = c7x::strm_eng<1, CV>::get_adv();
474 vTwX3 = c7x::strm_eng<1, CV>::get_adv();
476 vX0Temp = vSum1 + vSum2;
477 vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
478 vX2Temp = vSum1 - vSum2;
479 vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
482 vX1 = __complex_multiply(vX1Temp, vTwX1);
483 vX2 = __complex_multiply(vX2Temp, vTwX2);
484 vX3 = __complex_multiply(vX3Temp, vTwX3);
489 tmp = c7x::strm_agen<0, CV>::get_vpred();
490 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
491 __vstore_pred(tmp, addr, vX0);
493 tmp = c7x::strm_agen<0, CV>::get_vpred();
494 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
495 __vstore_pred(tmp, addr, vX2);
497 tmp = c7x::strm_agen<0, CV>::get_vpred();
498 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
499 __vstore_pred(tmp, addr, vX1);
501 tmp = c7x::strm_agen<0, CV>::get_vpred();
502 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
503 __vstore_pred(tmp, addr, vX3);
506 vX_0 = c7x::strm_eng<0, CV>::get_adv();
507 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
508 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
509 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
511 vSum1 = vX_0 + vX_N_2;
512 vSum2 = vX_N_4 + vX_3N_4;
513 vDiff1 = vX_0 - vX_N_2;
514 vDiff2 = vX_N_4 - vX_3N_4;
516 vTwX1 = c7x::strm_eng<1, CV>::get_adv();
517 vTwX2 = c7x::strm_eng<1, CV>::get_adv();
518 vTwX3 = c7x::strm_eng<1, CV>::get_adv();
520 vX0Temp = vSum1 + vSum2;
521 vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
522 vX2Temp = vSum1 - vSum2;
523 vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
526 vX1 = __complex_multiply(vX1Temp, vTwX1);
527 vX2 = __complex_multiply(vX2Temp, vTwX2);
528 vX3 = __complex_multiply(vX3Temp, vTwX3);
530 tmp = c7x::strm_agen<0, CV>::get_vpred();
531 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
532 __vstore_pred(tmp, addr, vX0);
534 tmp = c7x::strm_agen<0, CV>::get_vpred();
535 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
536 __vstore_pred(tmp, addr, vX2);
538 tmp = c7x::strm_agen<0, CV>::get_vpred();
539 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
540 __vstore_pred(tmp, addr, vX1);
542 tmp = c7x::strm_agen<0, CV>::get_vpred();
543 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
544 __vstore_pred(tmp, addr, vX3);
550 numPointsPerDft >>= 2;
551 pWLocal += numPointsPerDft * 3;
557 if (numPointsPerDft == 16) {
560 __SE0_OPEN((
void *) pXLocal, se0_param);
561 __SE1_OPEN((
void *) (pXLocal + 8), se0_param);
564 __SA0_OPEN(sa0_param);
566 vTwX1 = *((
CVP) pWLocal);
567 vTwX2 = *((
CVP) (pWLocal + 4));
568 vTwX3 = *((
CVP) (pWLocal + 8));
571 vTwX1 =
CV(vTwX1.lo(), vTwX1.lo());
572 vTwX2 =
CV(vTwX2.lo(), vTwX2.lo());
573 vTwX3 =
CV(vTwX3.lo(), vTwX3.lo());
575 vTwX1 = (
CV) (vTwX1.lo(), vTwX1.lo());
576 vTwX2 = (
CV) (vTwX2.lo(), vTwX2.lo());
577 vTwX3 = (
CV) (vTwX3.lo(), vTwX3.lo());
580 for (k = 0; k < numPoints * numChannels; k += 32) {
581 vX_0 = c7x::strm_eng<0, CV>::get_adv();
582 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
583 vX_N_2 = c7x::strm_eng<1, CV>::get_adv();
584 vX_3N_4 = c7x::strm_eng<1, CV>::get_adv();
586 vSum1 = vX_0 + vX_N_2;
587 vSum2 = vX_N_4 + vX_3N_4;
588 vDiff1 = vX_0 - vX_N_2;
589 vDiff2 = vX_N_4 - vX_3N_4;
591 vX0Temp = vSum1 + vSum2;
592 vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
593 vX2Temp = vSum1 - vSum2;
594 vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
597 vX1 = __complex_multiply(vX1Temp, vTwX1);
598 vX2 = __complex_multiply(vX2Temp, vTwX2);
599 vX3 = __complex_multiply(vX3Temp, vTwX3);
602 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
604 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
605 __vstore_pred(tmp, addr,
CV(vX0.lo(), vX2.lo()));
607 tmp = c7x::strm_agen<0, CV>::get_vpred();
608 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
609 __vstore_pred(tmp, addr,
CV(vX1.lo(), vX3.lo()));
611 tmp = c7x::strm_agen<0, CV>::get_vpred();
612 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
613 __vstore_pred(tmp, addr,
CV(vX0.hi(), vX2.hi()));
615 tmp = c7x::strm_agen<0, CV>::get_vpred();
616 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
617 __vstore_pred(tmp, addr,
CV(vX1.hi(), vX3.hi()));
619 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
621 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
622 __vstore_pred(tmp, addr, (
CV) (vX0.lo(), vX2.lo()));
624 tmp = c7x::strm_agen<0, CV>::get_vpred();
625 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
626 __vstore_pred(tmp, addr, (
CV) (vX1.lo(), vX3.lo()));
628 tmp = c7x::strm_agen<0, CV>::get_vpred();
629 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
630 __vstore_pred(tmp, addr, (
CV) (vX0.hi(), vX2.hi()));
632 tmp = c7x::strm_agen<0, CV>::get_vpred();
633 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
634 __vstore_pred(tmp, addr, (
CV) (vX1.hi(), vX3.hi()));
644 __SE0_OPEN((
void *) pXLocal, se0_param);
647 __SA0_OPEN(sa0_param);
649 vTwX1 = *((
CVP) pWLocal);
650 vTwX2 = *((
CVP) (pWLocal + 8));
651 vTwX3 = *((
CVP) (pWLocal + 16));
653 for (k = 0; k < numPoints * numChannels; k += 32) {
654 vX_0 = c7x::strm_eng<0, CV>::get_adv();
655 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
656 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
657 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
659 vSum1 = vX_0 + vX_N_2;
660 vSum2 = vX_N_4 + vX_3N_4;
661 vDiff1 = vX_0 - vX_N_2;
662 vDiff2 = vX_N_4 - vX_3N_4;
664 vX0Temp = vSum1 + vSum2;
665 vX1Temp = vDiff1 - __vcrot90sp_vv(vDiff2);
666 vX2Temp = vSum1 - vSum2;
667 vX3Temp = vDiff1 + __vcrot90sp_vv(vDiff2);
670 vX1 = __complex_multiply(vX1Temp, vTwX1);
671 vX2 = __complex_multiply(vX2Temp, vTwX2);
672 vX3 = __complex_multiply(vX3Temp, vTwX3);
674 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
676 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
677 __vstore_pred(tmp, addr, vX0);
679 tmp = c7x::strm_agen<0, CV>::get_vpred();
680 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
681 __vstore_pred(tmp, addr, vX2);
683 tmp = c7x::strm_agen<0, CV>::get_vpred();
684 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
685 __vstore_pred(tmp, addr, vX1);
687 tmp = c7x::strm_agen<0, CV>::get_vpred();
688 addr = c7x::strm_agen<0, CV>::get_adv(pXLocal);
689 __vstore_pred(tmp, addr, vX3);
695 numPointsPerDft >>= 2;
696 pWLocal += numPointsPerDft * 3;
698 if (numPointsPerDft == 4) {
701 if (numPoints == 16) {
704 c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
705 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
706 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
707 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
708 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
709 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
710 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
711 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
713 c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
714 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
715 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
716 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
717 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
718 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
719 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
720 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
725 __SE0_OPEN((
void *) pXLocal, se0_param);
728 __SA0_OPEN(sa0_param);
730 for (k = 0; k < numChannels << 4; k += 32) {
731 vX_0 = c7x::strm_eng<0, CV>::get_adv();
732 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
733 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
734 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
736 vSum1 = vX_0 + vX_N_2;
737 vSum2 = vX_N_4 + vX_3N_4;
738 vDiff1 = vX_0 - vX_N_2;
739 vDiff2 = vX_N_4 - vX_3N_4;
742 vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
744 vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
752 vX01_lo = c7x::as_cfloat_vec(__vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1), c7x::as_uchar_vec(vX0)));
753 vX23_lo = c7x::as_cfloat_vec(__vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3), c7x::as_uchar_vec(vX2)));
754 vX01_hi = c7x::as_cfloat_vec(__vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1), c7x::as_uchar_vec(vX0)));
755 vX23_hi = c7x::as_cfloat_vec(__vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3), c7x::as_uchar_vec(vX2)));
757 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
759 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
760 __vstore_pred(tmp, addr, vX01_lo);
762 tmp = c7x::strm_agen<0, CV>::get_vpred();
763 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
764 __vstore_pred(tmp, addr, vX23_lo);
766 tmp = c7x::strm_agen<0, CV>::get_vpred();
767 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
768 __vstore_pred(tmp, addr, vX01_hi);
770 tmp = c7x::strm_agen<0, CV>::get_vpred();
771 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
772 __vstore_pred(tmp, addr, vX23_hi);
779 __SE0_OPEN((
void *) pXLocal, se0_param);
781 numLeadingZeros = __norm((int32_t) (numPoints - 1)) + 1;
788 pY0 = (cfloat *) (pY + 0);
789 pY1 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
790 pY2 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
791 pY3 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
793 #ifdef CL7X_HE_CFLOAT_PTR_BUG
794 float *myPY0 = (
float *) pY0;
795 float *myPY1 = (
float *) pY1;
796 float *myPY2 = (
float *) pY2;
797 float *myPY3 = (
float *) pY3;
800 for (l = 0; l < numChannels; l++) {
801 for (k = 0; k < numPoints >> 3; k += 4) {
802 offsetBitReverse = __bit_reverse(k) >> numLeadingZeros;
804 vX_0 = c7x::strm_eng<0, CV>::get_adv();
805 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
806 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
807 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
809 vSum1 = vX_0 + vX_N_2;
810 vSum2 = vX_N_4 + vX_3N_4;
811 vDiff1 = vX_0 - vX_N_2;
812 vDiff2 = vX_N_4 - vX_3N_4;
815 vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
817 vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
838 __vstore_reverse_bit((
CVP) (pY0 + offsetBitReverse), vX0);
839 __vstore_reverse_bit((
CVP) (pY1 + offsetBitReverse), vX1);
840 __vstore_reverse_bit((
CVP) (pY2 + offsetBitReverse), vX2);
841 __vstore_reverse_bit((
CVP) (pY3 + offsetBitReverse), vX3);
844 #ifdef CL7X_HE_CFLOAT_PTR_BUG
845 myPY0 += (numPoints << 1);
846 myPY1 += (numPoints << 1);
847 myPY2 += (numPoints << 1);
848 myPY3 += (numPoints << 1);
850 pY0 = (cfloat *) myPY0;
851 pY1 = (cfloat *) myPY1;
852 pY2 = (cfloat *) myPY2;
853 pY3 = (cfloat *) myPY3;
871 vTwX1 =
CV(twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
874 vTwX2 =
CV(twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
877 vTwX3 =
CV(twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
881 vTwX1 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
884 vTwX2 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
887 vTwX3 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp);
890 if (numPoints == 32) {
893 c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
894 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
895 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
896 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
897 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
898 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
899 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
900 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
902 c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
903 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
904 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
905 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
906 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
907 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
908 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
909 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
912 CV vX01_2PtDft_1_lo, vX23_2PtDft_1_lo, vX01_2PtDft_2_lo, vX23_2PtDft_2_lo;
913 CV vX01_2PtDft_1_hi, vX23_2PtDft_1_hi, vX01_2PtDft_2_hi, vX23_2PtDft_2_hi;
916 __SE0_OPEN((
void *) pXLocal, se0_param);
919 __SA0_OPEN(sa0_param);
921 for (k = 0; k < numChannels << 5; k += 64) {
922 vX_0 = c7x::strm_eng<0, CV>::get_adv();
923 vX_0_1 = c7x::strm_eng<0, CV>::get_adv();
924 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
925 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv();
926 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
927 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv();
928 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
929 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv();
931 vSum1 = vX_0 + vX_N_2;
932 vSum2 = vX_N_4 + vX_3N_4;
933 vDiff1 = vX_0 - vX_N_2;
934 vDiff2 = vX_N_4 - vX_3N_4;
937 vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
939 vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
941 vSum1_1 = vX_0_1 + vX_N_2_1;
942 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
943 vDiff1_1 = vX_0_1 - vX_N_2_1;
944 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
946 vX0Temp = vSum1_1 + vSum2_1;
947 vX1Temp = vDiff1_1 - __vcrot90sp_vv(vDiff2_1);
948 vX2Temp = vSum1_1 - vSum2_1;
949 vX3Temp = vDiff1_1 + __vcrot90sp_vv(vDiff2_1);
952 vX1_1 = __complex_multiply(vX1Temp, vTwX1);
953 vX2_1 = __complex_multiply(vX2Temp, vTwX2);
954 vX3_1 = __complex_multiply(vX3Temp, vTwX3);
956 vX0_2PtDft_1 = vX0 + vX0_1;
957 vX0_2PtDft_2 = vX0 - vX0_1;
958 vX1_2PtDft_1 = vX1 + vX1_1;
959 vX1_2PtDft_2 = vX1 - vX1_1;
960 vX2_2PtDft_1 = vX2 + vX2_1;
961 vX2_2PtDft_2 = vX2 - vX2_1;
962 vX3_2PtDft_1 = vX3 + vX3_1;
963 vX3_2PtDft_2 = vX3 - vX3_1;
975 vX01_2PtDft_1_lo = c7x::as_cfloat_vec(
976 __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_1), c7x::as_uchar_vec(vX0_2PtDft_1)));
977 vX23_2PtDft_1_lo = c7x::as_cfloat_vec(
978 __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_1), c7x::as_uchar_vec(vX2_2PtDft_1)));
979 vX01_2PtDft_2_lo = c7x::as_cfloat_vec(
980 __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_2), c7x::as_uchar_vec(vX0_2PtDft_2)));
981 vX23_2PtDft_2_lo = c7x::as_cfloat_vec(
982 __vpermll_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_2), c7x::as_uchar_vec(vX2_2PtDft_2)));
983 vX01_2PtDft_1_hi = c7x::as_cfloat_vec(
984 __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_1), c7x::as_uchar_vec(vX0_2PtDft_1)));
985 vX23_2PtDft_1_hi = c7x::as_cfloat_vec(
986 __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_1), c7x::as_uchar_vec(vX2_2PtDft_1)));
987 vX01_2PtDft_2_hi = c7x::as_cfloat_vec(
988 __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX1_2PtDft_2), c7x::as_uchar_vec(vX0_2PtDft_2)));
989 vX23_2PtDft_2_hi = c7x::as_cfloat_vec(
990 __vpermhh_yvvv(vXPermCtrl, c7x::as_uchar_vec(vX3_2PtDft_2), c7x::as_uchar_vec(vX2_2PtDft_2)));
992 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred();
994 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
995 __vstore_pred(tmp, addr, vX01_2PtDft_1_lo);
997 tmp = c7x::strm_agen<0, CV>::get_vpred();
998 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
999 __vstore_pred(tmp, addr, vX23_2PtDft_1_lo);
1001 tmp = c7x::strm_agen<0, CV>::get_vpred();
1002 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1003 __vstore_pred(tmp, addr, vX01_2PtDft_2_lo);
1005 tmp = c7x::strm_agen<0, CV>::get_vpred();
1006 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1007 __vstore_pred(tmp, addr, vX23_2PtDft_2_lo);
1009 tmp = c7x::strm_agen<0, CV>::get_vpred();
1010 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1011 __vstore_pred(tmp, addr, vX01_2PtDft_1_hi);
1013 tmp = c7x::strm_agen<0, CV>::get_vpred();
1014 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1015 __vstore_pred(tmp, addr, vX23_2PtDft_1_hi);
1017 tmp = c7x::strm_agen<0, CV>::get_vpred();
1018 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1019 __vstore_pred(tmp, addr, vX01_2PtDft_2_hi);
1021 tmp = c7x::strm_agen<0, CV>::get_vpred();
1022 addr = c7x::strm_agen<0, CV>::get_adv(pYLocal);
1023 __vstore_pred(tmp, addr, vX23_2PtDft_2_hi);
1030 __SE0_OPEN((
void *) pXLocal, se0_param);
1032 numLeadingZeros = __norm((int32_t) (numPoints - 1)) + 1;
1043 pY0 = (cfloat *) (pY + (0x00000000u));
1044 pY1 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
1045 pY2 = (cfloat *) (pY + ((0x20000000u >> numLeadingZeros) << 1));
1046 pY3 = (cfloat *) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
1047 pY4 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
1048 pY5 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
1049 pY6 = (cfloat *) (pY + ((0x60000000u >> numLeadingZeros) << 1));
1050 pY7 = (cfloat *) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
1052 #ifdef CL7X_HE_CFLOAT_PTR_BUG
1053 float *myPY0 = (
float *) pY0;
1054 float *myPY1 = (
float *) pY1;
1055 float *myPY2 = (
float *) pY2;
1056 float *myPY3 = (
float *) pY3;
1057 float *myPY4 = (
float *) pY4;
1058 float *myPY5 = (
float *) pY5;
1059 float *myPY6 = (
float *) pY6;
1060 float *myPY7 = (
float *) pY7;
1063 for (l = 0; l < numChannels; l++) {
1064 for (k = 0; k < numPoints >> 3; k += 8) {
1065 offsetBitReverse = __bit_reverse(k) >> numLeadingZeros;
1067 vX_0 = c7x::strm_eng<0, CV>::get_adv();
1068 vX_0_1 = c7x::strm_eng<0, CV>::get_adv();
1069 vX_N_4 = c7x::strm_eng<0, CV>::get_adv();
1070 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv();
1071 vX_N_2 = c7x::strm_eng<0, CV>::get_adv();
1072 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv();
1073 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv();
1074 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv();
1076 vSum1 = vX_0 + vX_N_2;
1077 vSum2 = vX_N_4 + vX_3N_4;
1078 vDiff1 = vX_0 - vX_N_2;
1079 vDiff2 = vX_N_4 - vX_3N_4;
1081 vX0 = vSum1 + vSum2;
1082 vX1 = vDiff1 - __vcrot90sp_vv(vDiff2);
1083 vX2 = vSum1 - vSum2;
1084 vX3 = vDiff1 + __vcrot90sp_vv(vDiff2);
1086 vSum1_1 = vX_0_1 + vX_N_2_1;
1087 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
1088 vDiff1_1 = vX_0_1 - vX_N_2_1;
1089 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
1091 vX0Temp = vSum1_1 + vSum2_1;
1092 vX1Temp = vDiff1_1 - __vcrot90sp_vv(vDiff2_1);
1093 vX2Temp = vSum1_1 - vSum2_1;
1094 vX3Temp = vDiff1_1 + __vcrot90sp_vv(vDiff2_1);
1097 vX1_1 = __complex_multiply(vX1Temp, vTwX1);
1098 vX2_1 = __complex_multiply(vX2Temp, vTwX2);
1099 vX3_1 = __complex_multiply(vX3Temp, vTwX3);
1101 vX0_2PtDft_1 = vX0 + vX0_1;
1102 vX0_2PtDft_2 = vX0 - vX0_1;
1103 vX1_2PtDft_1 = vX1 + vX1_1;
1104 vX1_2PtDft_2 = vX1 - vX1_1;
1105 vX2_2PtDft_1 = vX2 + vX2_1;
1106 vX2_2PtDft_2 = vX2 - vX2_1;
1107 vX3_2PtDft_1 = vX3 + vX3_1;
1108 vX3_2PtDft_2 = vX3 - vX3_1;
1119 __vstore_reverse_bit((
CVP) (pY0 + offsetBitReverse), vX0_2PtDft_1);
1120 __vstore_reverse_bit((
CVP) (pY1 + offsetBitReverse), vX0_2PtDft_2);
1121 __vstore_reverse_bit((
CVP) (pY2 + offsetBitReverse), vX1_2PtDft_1);
1122 __vstore_reverse_bit((
CVP) (pY3 + offsetBitReverse), vX1_2PtDft_2);
1123 __vstore_reverse_bit((
CVP) (pY4 + offsetBitReverse), vX2_2PtDft_1);
1124 __vstore_reverse_bit((
CVP) (pY5 + offsetBitReverse), vX2_2PtDft_2);
1125 __vstore_reverse_bit((
CVP) (pY6 + offsetBitReverse), vX3_2PtDft_1);
1126 __vstore_reverse_bit((
CVP) (pY7 + offsetBitReverse), vX3_2PtDft_2);
1129 #ifdef CL7X_HE_CFLOAT_PTR_BUG
1130 myPY0 += (numPoints << 1);
1131 myPY1 += (numPoints << 1);
1132 myPY2 += (numPoints << 1);
1133 myPY3 += (numPoints << 1);
1134 myPY4 += (numPoints << 1);
1135 myPY5 += (numPoints << 1);
1136 myPY6 += (numPoints << 1);
1137 myPY7 += (numPoints << 1);
1139 pY0 = (cfloat *) myPY0;
1140 pY1 = (cfloat *) myPY1;
1141 pY2 = (cfloat *) myPY2;
1142 pY3 = (cfloat *) myPY3;
1143 pY4 = (cfloat *) myPY4;
1144 pY5 = (cfloat *) myPY5;
1145 pY6 = (cfloat *) myPY6;
1146 pY7 = (cfloat *) myPY7;
1167 #if (!defined(FFTLIB_REMOVE_CHECK_PARAMS) && !defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_REMOVE_CHECK_PARAMS)) || \
1168 (defined(FFTLIB_CHECK_PARAMS)) || (defined(FFTLIB_IFFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS))
1177 uint32_t numChannels,
1182 if ((pX == NULL) || (pW == NULL) || (pY == NULL) || (pBlock == NULL)) {
1185 else if (bufParamsX->
dim_x != bufParamsY->
dim_x) {
1188 else if (bufParamsX->
dim_x < numPoints * numChannels * 2) {
1195 else if (bufParamsX->
dim_x < 64 * 2) {
1198 else if (bufParamsW->
dim_x != numPoints * 2) {
1205 else if (((uint64_t) pX) & 0xFu) {
1213 if (numPoints & (1u << k)) {
1218 if ((1u << k) != numPoints) {
1222 if ((numChannels != 1) && (numChannels != 2) && (numChannels != 4) && (numChannels != 8) && (numChannels != 16)) {
FFTLIB_STATUS_NAME
The enumeration of all status codes.
@ FFTLIB_ERR_INVALID_TYPE
@ FFTLIB_ERR_NULL_POINTER
@ FFTLIB_ERR_INVALID_DIMENSION
@ FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES
float FFTLIB_F32
Single precision floating point.
#define SA_LOOP4_PARAM_OFFSET
#define SE_LOOP6_PARAM_OFFSET
#define SE_LOOP4_PARAM_OFFSET
void ifft_i32fc_o32fc_conjugate_init_ci(void *pX, uint32_t size, void *pBlock)
static c7x::cfloat_vec ifft_i32fc_o32fc_scaleAndConjugate(c7x::cfloat_vec in, c7x::float_vec scaleVec, c7x::ulong_vec xorVec)
#define SA_LOOP6_PARAM_OFFSET
void ifft_i32fc_o32fc_conjugate_exec_ci(void *pX, c7x::ulong_vec xorVec, uint32_t size, uint32_t numPoints, void *pBlock)
#define SE_CONJ_LOOP_PARAM_OFFSET
#define SA_LOOP2_PARAM_OFFSET
#define SA_CONJ_LOOP_PARAM_OFFSET
#define SE_LOOP7_PARAM_OFFSET
#define SE_LOOP1_PARAM_OFFSET
#define SA_LOOP1_PARAM_OFFSET
#define SE_TWID_PARAM_OFFSET
#define SE_LOOP2_PARAM_OFFSET
#define SE_LOOP3_PARAM_OFFSET
#define SA_LOOP3_PARAM_OFFSET
#define SE_LOOP5_PARAM_OFFSET
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function is the main kernel compute function.
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function should be called before the FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_kernel function is c...
FFTLIB_STATUS FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_ifft1dBatched_i32fc_c32fc_o32fc_...
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.