29 #include "../../../common/c71/FFTLIB_debug.h"
30 #include "../FFTLIB_ifft1d_i32fc_c32fc_o32fc.h"
33 #define LAST_LOOP_UNROLL 0
36 #include "../../../common/printv.h"
44 #define SE_PARAM_BASE (0x0000)
45 #define SE_LOOP1_PARAM_OFFSET (SE_PARAM_BASE)
46 #define SE_LOOP2_PARAM_OFFSET (SE_LOOP1_PARAM_OFFSET + SE_PARAM_SIZE)
47 #define SE_LOOP3_PARAM_OFFSET (SE_LOOP2_PARAM_OFFSET + SE_PARAM_SIZE)
48 #define SE_LOOP4_PARAM_OFFSET (SE_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
49 #define SE_LOOP5_PARAM_OFFSET (SE_LOOP4_PARAM_OFFSET + SE_PARAM_SIZE)
50 #define SE_TWID_PARAM_OFFSET (SE_LOOP5_PARAM_OFFSET + SE_PARAM_SIZE)
51 #define SA_LOOP1_PARAM_OFFSET (SE_TWID_PARAM_OFFSET + SE_PARAM_SIZE)
52 #define SA_LOOP2_PARAM_OFFSET (SA_LOOP1_PARAM_OFFSET + SA_PARAM_SIZE)
53 #define SA_LOOP3_PARAM_OFFSET (SA_LOOP2_PARAM_OFFSET + SA_PARAM_SIZE)
54 #define SE_CONJ_LOOP_PARAM_OFFSET (SA_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
55 #define SA_CONJ_LOOP_PARAM_OFFSET (SE_CONJ_LOOP_PARAM_OFFSET + SE_PARAM_SIZE)
57 typedef typename c7x::cfloat_vec
CV;
60 typedef typename c7x::float_vec
V;
65 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
66 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
69 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
70 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
71 se0_param.DIMFMT = __SE_DIMFMT_1D;
72 se0_param.ICNT0 = size;
77 sa0_param.ICNT0 = size;
78 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
79 sa0_param.DIMFMT = __SA_DIMFMT_1D;
96 #if defined(FFTLIB_CHECK_PARAMS) || \
97 defined(FFTLIB_IFFT1D_I32FC_C32FC_O32FC_CHECK_PARAMS)
99 pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, pBlock);
104 uint32_t numPointsPerDft;
105 uint32_t seCnt1, seCnt2, seCnt3, seCnt4;
106 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
107 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1 ();
108 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
110 numPoints = bufParamsX->
dim_x >> 1;
111 numPointsPerDft = numPoints;
112 seCnt1 = numPoints >> 2;
113 seCnt2 = numPoints >> 4;
115 seCnt4 = numPoints >> 2;
119 uint32_t elementSize = c7x::element_count_of<c7x::cfloat_vec>::value;
122 se0_param.ICNT0 = elementSize;
124 se0_param.DIM1 = seCnt1;
125 se0_param.ICNT2 = seCnt2;
126 se0_param.DIM2 = elementSize;
127 se0_param.ICNT3 = seCnt3;
128 se0_param.DIM3 = numPointsPerDft;
130 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
131 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
132 se0_param.DIMFMT = __SE_DIMFMT_4D;
137 se1_param.ICNT0 = elementSize;
139 se1_param.DIM1 = seCnt1;
140 se1_param.ICNT2 = seCnt2;
141 se1_param.DIM2 = elementSize;
142 se1_param.ICNT3 = seCnt3;
145 se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
146 se1_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
147 se1_param.DIMFMT = __SE_DIMFMT_4D;
152 sa0_param.ICNT0 = elementSize;
154 sa0_param.DIM1 = seCnt1;
155 sa0_param.ICNT2 = seCnt2;
156 sa0_param.DIM2 = elementSize;
157 sa0_param.ICNT3 = seCnt3;
158 sa0_param.DIM3 = numPointsPerDft;
160 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
161 sa0_param.DIMFMT = __SA_DIMFMT_4D;
166 se0_param.ICNT0 = elementSize;
169 se0_param.ICNT2 = seCnt2;
172 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
176 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
177 se0_param.DIMFMT = __SE_DIMFMT_3D;
182 sa0_param.ICNT0 = numPoints;
184 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
185 sa0_param.DIMFMT = __SA_DIMFMT_1D;
190 se0_param = __gen_SE_TEMPLATE_v1 ();
191 se0_param.ICNT0 = numPoints;
193 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
194 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
195 se0_param.DIMFMT = __SE_DIMFMT_1D;
200 sa0_param.ICNT0 = numPoints;
202 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
203 sa0_param.DIMFMT = __SA_DIMFMT_1D;
208 se0_param.ICNT0 = seCnt4;
209 se0_param.ICNT1 = elementSize;
210 se0_param.DIM1 = seCnt4;
216 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
217 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
218 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
219 se0_param.DIMFMT = __SE_DIMFMT_2D;
224 se0_param.ICNT0 = seCnt4;
225 se0_param.ICNT1 = elementSize;
226 se0_param.DIM1 = seCnt4;
232 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
233 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
234 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
235 se0_param.DIMFMT = __SE_DIMFMT_2D;
242 static inline c7x::cfloat_vec
244 c7x::float_vec scaleVec,
245 c7x::ulong_vec xorVec)
247 return (c7x::as_cfloat_vec (
248 scaleVec * c7x::as_float_vec (c7x::as_ulong_vec (in) ^ xorVec)));
252 c7x::ulong_vec xorVec,
257 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
258 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
259 cfloat *restrict pXLocal = (cfloat *) pX;
267 __SE0_OPEN (pX, se0_param);
268 __SA0_OPEN (sa0_param);
271 uint32_t loopCount = (size) / c7x::element_count_of<c7x::cfloat_vec>::value;
272 c7x::cfloat_vec regIn, regStore;
274 c7x::float_vec scaleVec = __vload_dup (&scale);
279 for (i = 0; i < loopCount; i++) {
280 regIn = c7x::strm_eng<0, c7x::cfloat_vec>::get_adv ();
286 tmp = c7x::strm_agen<0, CV>::get_vpred ();
287 addr = c7x::strm_agen<0, CV>::get_adv (&pXLocal[0]);
288 __vstore_pred (tmp, addr, regStore);
307 uint32_t numPointsPerDft;
308 uint32_t numLeadingZeros;
309 uint32_t offsetBitReverse;
310 uint32_t seCnt1, seCnt2, seCnt3;
311 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
312 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1 ();
313 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
315 cfloat *restrict pXLocal;
317 cfloat *restrict pWLocal;
318 cfloat *restrict pY0;
319 cfloat *restrict pY1;
320 cfloat *restrict pY2;
321 cfloat *restrict pY3;
322 cfloat *restrict pY4;
323 cfloat *restrict pY5;
324 cfloat *restrict pY6;
325 cfloat *restrict pY7;
327 CV vX_0, vX_N_4, vX_N_2, vX_3N_4;
328 CV vSum1, vSum2, vDiff1, vDiff2;
329 CV vTwX1, vTwX2, vTwX3;
330 CV vX0Temp, vX1Temp, vX2Temp, vX3Temp;
331 CV vX0, vX1, vX2, vX3;
332 CV vX_0_1, vX_N_4_1, vX_N_2_1, vX_3N_4_1;
333 CV vSum1_1, vSum2_1, vDiff1_1, vDiff2_1;
334 CV vX0_1, vX1_1, vX2_1, vX3_1;
335 CV vX0_2PtDft_1, vX0_2PtDft_2;
336 CV vX1_2PtDft_1, vX1_2PtDft_2;
337 CV vX2_2PtDft_1, vX2_2PtDft_2;
338 CV vX3_2PtDft_1, vX3_2PtDft_2;
341 #ifdef FFTLIB_CHECK_PARAMS
343 pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, pBlock);
347 numPoints = bufParamsX->
dim_x >> 1;
348 numPointsPerDft = numPoints;
350 float scale = 1.0 / (numPoints);
351 c7x::float_vec scaleVec = __vload_dup (&scale);
353 #if defined(_HOST_BUILD)
354 c7x::ulong_vec xorVec = (c7x::ulong_vec) (0x0000000080000000);
357 c7x::ulong_vec xorVec = (0x0000000080000000);
369 seCnt1 = numPointsPerDft >> 2;
370 seCnt2 = numPointsPerDft >> 4;
373 pXLocal = (cfloat *) pX;
374 pWLocal = (cfloat *) pW;
377 while (numPointsPerDft >= 16) {
381 se0_param.DIM1 = seCnt1;
382 se0_param.ICNT2 = seCnt2;
385 se0_param.ICNT3 = seCnt3;
386 se0_param.DIM3 = numPointsPerDft;
387 __SE0_OPEN ((
void *) pXLocal, se0_param);
390 se1_param.DIM1 = seCnt1;
391 se1_param.ICNT2 = seCnt2;
393 se1_param.ICNT3 = seCnt3;
395 __SE1_OPEN ((
void *) pWLocal, se1_param);
398 sa0_param.DIM1 = seCnt1;
399 sa0_param.ICNT2 = seCnt2;
402 sa0_param.ICNT3 = seCnt3;
403 sa0_param.DIM3 = numPointsPerDft;
405 __SA0_OPEN (sa0_param);
408 for (k = 0; k < numPoints; k += 32) {
411 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
412 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
413 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
414 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
416 vSum1 = vX_0 + vX_N_2;
417 vSum2 = vX_N_4 + vX_3N_4;
418 vDiff1 = vX_0 - vX_N_2;
419 vDiff2 = vX_N_4 - vX_3N_4;
421 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
422 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
423 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
425 vX0Temp = vSum1 + vSum2;
426 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
427 vX2Temp = vSum1 - vSum2;
428 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
431 vX1 = __complex_multiply (vX1Temp, vTwX1);
432 vX2 = __complex_multiply (vX2Temp, vTwX2);
433 vX3 = __complex_multiply (vX3Temp, vTwX3);
442 tmp = c7x::strm_agen<0, CV>::get_vpred ();
443 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
444 __vstore_pred (tmp, addr, vX0);
446 tmp = c7x::strm_agen<0, CV>::get_vpred ();
447 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
448 __vstore_pred (tmp, addr, vX2);
450 tmp = c7x::strm_agen<0, CV>::get_vpred ();
451 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
452 __vstore_pred (tmp, addr, vX1);
454 tmp = c7x::strm_agen<0, CV>::get_vpred ();
455 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
456 __vstore_pred (tmp, addr, vX3);
460 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
461 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
462 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
463 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
465 vSum1 = vX_0 + vX_N_2;
466 vSum2 = vX_N_4 + vX_3N_4;
467 vDiff1 = vX_0 - vX_N_2;
468 vDiff2 = vX_N_4 - vX_3N_4;
470 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
471 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
472 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
474 vX0Temp = vSum1 + vSum2;
475 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
476 vX2Temp = vSum1 - vSum2;
477 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
480 vX1 = __complex_multiply (vX1Temp, vTwX1);
481 vX2 = __complex_multiply (vX2Temp, vTwX2);
482 vX3 = __complex_multiply (vX3Temp, vTwX3);
489 tmp = c7x::strm_agen<0, CV>::get_vpred ();
490 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
491 __vstore_pred (tmp, addr, vX0);
493 tmp = c7x::strm_agen<0, CV>::get_vpred ();
494 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
495 __vstore_pred (tmp, addr, vX2);
497 tmp = c7x::strm_agen<0, CV>::get_vpred ();
498 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
499 __vstore_pred (tmp, addr, vX1);
501 tmp = c7x::strm_agen<0, CV>::get_vpred ();
502 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
503 __vstore_pred (tmp, addr, vX3);
509 numPointsPerDft >>= 2;
510 pWLocal += numPointsPerDft * 3;
519 if (numPointsPerDft == 4) {
523 __SE0_OPEN ((
void *) pXLocal, se0_param);
525 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
532 pY0 = (cfloat *) (pY + 0);
533 pY1 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
534 pY2 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
535 pY3 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
537 #ifdef LAST_LOOP_UNROLL
543 pY4 = (cfloat *) (pY + ((0x20000000u >> numLeadingZeros) << 1));
544 pY5 = (cfloat *) (pY + ((0x60000000u >> numLeadingZeros) << 1));
545 pY6 = (cfloat *) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
546 pY7 = (cfloat *) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
550 for (k = 0; k<numPoints>> 2; k += 8)
552 for (k = 0; k<numPoints>> 2; k += 4)
555 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
557 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
558 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
559 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
560 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
562 vSum1 = vX_0 + vX_N_2;
563 vSum2 = vX_N_4 + vX_3N_4;
564 vDiff1 = vX_0 - vX_N_2;
565 vDiff2 = vX_N_4 - vX_3N_4;
568 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
570 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
577 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse), vX0);
578 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse), vX1);
579 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse), vX2);
580 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse), vX3);
583 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
584 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
585 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
586 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
588 vSum1_1 = vX_0_1 + vX_N_2_1;
589 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
590 vDiff1_1 = vX_0_1 - vX_N_2_1;
591 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
593 vX0_1 = vSum1_1 + vSum2_1;
594 vX1_1 = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
595 vX2_1 = vSum1_1 - vSum2_1;
596 vX3_1 = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
607 __vstore_reverse_bit ((
CVP) (pY4 + offsetBitReverse), vX0_1);
608 __vstore_reverse_bit ((
CVP) (pY5 + offsetBitReverse), vX1_1);
609 __vstore_reverse_bit ((
CVP) (pY6 + offsetBitReverse), vX2_1);
610 __vstore_reverse_bit ((
CVP) (pY7 + offsetBitReverse), vX3_1);
625 __SE0_OPEN ((
void *) pXLocal, se0_param);
627 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
631 vTwX1 =
CV (twTemp, twTemp, twTemp, twTemp);
635 vTwX1 = (
CV) (twTemp, twTemp, twTemp, twTemp);
641 vTwX2 =
CV (twTemp, twTemp, twTemp, twTemp);
645 vTwX2 = (
CV) (twTemp, twTemp, twTemp, twTemp);
650 vTwX3 =
CV (twTemp, twTemp, twTemp, twTemp);
654 vTwX3 = (
CV) (twTemp, twTemp, twTemp, twTemp);
666 pY0 = (cfloat *) (pY + (0x00000000u));
667 pY1 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
668 pY2 = (cfloat *) (pY + ((0x20000000u >> numLeadingZeros) << 1));
669 pY3 = (cfloat *) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
670 pY4 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
671 pY5 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
672 pY6 = (cfloat *) (pY + ((0x60000000u >> numLeadingZeros) << 1));
673 pY7 = (cfloat *) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
675 for (k = 0; k<numPoints>> 2; k += 8) {
676 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
678 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
679 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
680 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
681 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
682 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
683 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
684 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
685 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
687 vSum1 = vX_0 + vX_N_2;
688 vSum2 = vX_N_4 + vX_3N_4;
689 vDiff1 = vX_0 - vX_N_2;
690 vDiff2 = vX_N_4 - vX_3N_4;
693 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
695 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
697 vSum1_1 = vX_0_1 + vX_N_2_1;
698 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
699 vDiff1_1 = vX_0_1 - vX_N_2_1;
700 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
702 vX0Temp = vSum1_1 + vSum2_1;
703 vX1Temp = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
704 vX2Temp = vSum1_1 - vSum2_1;
705 vX3Temp = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
708 vX1_1 = __complex_multiply (vX1Temp, vTwX1);
709 vX2_1 = __complex_multiply (vX2Temp, vTwX2);
710 vX3_1 = __complex_multiply (vX3Temp, vTwX3);
712 vX0_2PtDft_1 = vX0 + vX0_1;
713 vX0_2PtDft_2 = vX0 - vX0_1;
714 vX1_2PtDft_1 = vX1 + vX1_1;
715 vX1_2PtDft_2 = vX1 - vX1_1;
716 vX2_2PtDft_1 = vX2 + vX2_1;
717 vX2_2PtDft_2 = vX2 - vX2_1;
718 vX3_2PtDft_1 = vX3 + vX3_1;
719 vX3_2PtDft_2 = vX3 - vX3_1;
722 vX0_2PtDft_1, scaleVec, xorVec);
724 vX0_2PtDft_2, scaleVec, xorVec);
726 vX1_2PtDft_1, scaleVec, xorVec);
728 vX1_2PtDft_2, scaleVec, xorVec);
730 vX2_2PtDft_1, scaleVec, xorVec);
732 vX2_2PtDft_2, scaleVec, xorVec);
734 vX3_2PtDft_1, scaleVec, xorVec);
736 vX3_2PtDft_2, scaleVec, xorVec);
738 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse), vX0_2PtDft_1);
739 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse), vX0_2PtDft_2);
740 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse), vX1_2PtDft_1);
741 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse), vX1_2PtDft_2);
742 __vstore_reverse_bit ((
CVP) (pY4 + offsetBitReverse), vX2_2PtDft_1);
743 __vstore_reverse_bit ((
CVP) (pY5 + offsetBitReverse), vX2_2PtDft_2);
744 __vstore_reverse_bit ((
CVP) (pY6 + offsetBitReverse), vX3_2PtDft_1);
745 __vstore_reverse_bit ((
CVP) (pY7 + offsetBitReverse), vX3_2PtDft_2);
754 #if (!defined(FFTLIB_REMOVE_CHECK_PARAMS) && \
755 !defined(FFTLIB_IFFT1D_I32FC_C32FC_O32FC_REMOVE_CHECK_PARAMS)) || \
756 (defined(FFTLIB_CHECK_PARAMS)) || \
757 (defined(FFTLIB_IFFT1D_I32FC_C32FC_O32FC_CHECK_PARAMS))
770 if ((pX == NULL) || (pW == NULL) || (pY == NULL)) {
773 else if (bufParamsX->
dim_x != bufParamsW->
dim_x ||
777 else if (bufParamsX->
dim_x < 64 * 2) {
785 else if (((uint64_t) pX) & 0xFu) {
793 if (bufParamsX->
dim_x & (1u << k)) {
798 if ((1u << k) != bufParamsX->
dim_x) {
FFTLIB_STATUS_NAME
The enumeration of all status codes.
@ FFTLIB_ERR_INVALID_TYPE
@ FFTLIB_ERR_NULL_POINTER
@ FFTLIB_ERR_INVALID_DIMENSION
@ FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES
float FFTLIB_F32
Single precision floating point.
#define SE_LOOP4_PARAM_OFFSET
void ifft_i32fc_o32fc_conjugate_init_ci(void *pX, uint32_t size, void *pBlock)
static c7x::cfloat_vec ifft_i32fc_o32fc_scaleAndConjugate(c7x::cfloat_vec in, c7x::float_vec scaleVec, c7x::ulong_vec xorVec)
void ifft_i32fc_o32fc_conjugate_exec_ci(void *pX, c7x::ulong_vec xorVec, uint32_t size, void *pBlock)
#define SE_CONJ_LOOP_PARAM_OFFSET
#define SA_LOOP2_PARAM_OFFSET
#define SA_CONJ_LOOP_PARAM_OFFSET
#define SE_LOOP1_PARAM_OFFSET
#define SA_LOOP1_PARAM_OFFSET
#define SE_TWID_PARAM_OFFSET
#define SE_LOOP2_PARAM_OFFSET
#define SE_LOOP3_PARAM_OFFSET
#define SA_LOOP3_PARAM_OFFSET
#define SE_LOOP5_PARAM_OFFSET
FFTLIB_STATUS FFTLIB_ifft1d_i32fc_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_ifft1d_i32fc_c32fc_o32fc_init an...
FFTLIB_STATUS FFTLIB_ifft1d_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function should be called before the FFTLIB_ifft1d_i32fc_c32fc_o32fc_kernel function is called....
FFTLIB_STATUS FFTLIB_ifft1d_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function is the main kernel compute function.
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.