29 #include "../FFTLIB_ifft1d_i32fc_c32fc_o32fc.h"
34 #include "../../../common/printv.h"
42 #define SE_PARAM_BASE (0x0000)
43 #define SE_LOOP1_PARAM_OFFSET (SE_PARAM_BASE)
44 #define SE_LOOP2_PARAM_OFFSET (SE_LOOP1_PARAM_OFFSET + SE_PARAM_SIZE)
45 #define SE_LOOP3_PARAM_OFFSET (SE_LOOP2_PARAM_OFFSET + SE_PARAM_SIZE)
46 #define SE_LOOP4_PARAM_OFFSET (SE_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
47 #define SE_LOOP5_PARAM_OFFSET (SE_LOOP4_PARAM_OFFSET + SE_PARAM_SIZE)
48 #define SE_TWID_PARAM_OFFSET (SE_LOOP5_PARAM_OFFSET + SE_PARAM_SIZE)
49 #define SA_LOOP1_PARAM_OFFSET (SE_TWID_PARAM_OFFSET + SE_PARAM_SIZE)
50 #define SA_LOOP2_PARAM_OFFSET (SA_LOOP1_PARAM_OFFSET + SA_PARAM_SIZE)
51 #define SA_LOOP3_PARAM_OFFSET (SA_LOOP2_PARAM_OFFSET + SA_PARAM_SIZE)
52 #define SE_CONJ_LOOP_PARAM_OFFSET (SA_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
53 #define SA_CONJ_LOOP_PARAM_OFFSET (SE_CONJ_LOOP_PARAM_OFFSET + SE_PARAM_SIZE)
55 typedef typename c7x::cfloat_vec
CV;
58 typedef typename c7x::float_vec
V;
63 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
64 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
67 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
68 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
69 se0_param.DIMFMT = __SE_DIMFMT_1D;
70 se0_param.ICNT0 = size;
75 sa0_param.ICNT0 = size;
76 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
77 sa0_param.DIMFMT = __SA_DIMFMT_1D;
94 #if defined(FFTLIB_CHECK_PARAMS) || \
95 defined(FFTLIB_IFFT1D_I32FC_C32FC_O32FC_CHECK_PARAMS)
97 pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, pBlock);
102 uint32_t numPointsPerDft;
103 uint32_t seCnt1, seCnt2, seCnt3, seCnt4;
104 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
105 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1 ();
106 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
108 numPoints = bufParamsX->
dim_x >> 1;
109 numPointsPerDft = numPoints;
110 seCnt1 = numPoints >> 2;
111 seCnt2 = numPoints >> 5;
113 seCnt4 = numPoints >> 3;
120 se0_param.DIM1 = seCnt1;
121 se0_param.ICNT2 = seCnt2;
123 se0_param.ICNT3 = seCnt3;
124 se0_param.DIM3 = numPointsPerDft;
126 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
127 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
128 se0_param.DIMFMT = __SE_DIMFMT_4D;
135 se1_param.DIM1 = seCnt1;
136 se1_param.ICNT2 = seCnt2;
138 se1_param.ICNT3 = seCnt3;
141 se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
142 se1_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
143 se1_param.DIMFMT = __SE_DIMFMT_4D;
150 sa0_param.DIM1 = seCnt1;
151 sa0_param.ICNT2 = seCnt2;
153 sa0_param.ICNT3 = seCnt3;
154 sa0_param.DIM3 = numPointsPerDft;
156 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
157 sa0_param.DIMFMT = __SA_DIMFMT_4D;
165 se0_param.ICNT2 = seCnt2;
168 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
169 se0_param.TRANSPOSE =
170 __SE_TRANSPOSE_256BIT;
172 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
173 se0_param.DIMFMT = __SE_DIMFMT_3D;
178 sa0_param.ICNT0 = numPoints;
180 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
181 sa0_param.DIMFMT = __SA_DIMFMT_1D;
186 se0_param = __gen_SE_TEMPLATE_v1 ();
187 se0_param.ICNT0 = numPoints;
189 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
190 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
191 se0_param.DIMFMT = __SE_DIMFMT_1D;
196 sa0_param.ICNT0 = numPoints;
198 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
199 sa0_param.DIMFMT = __SA_DIMFMT_1D;
204 se0_param.ICNT0 = seCnt4;
206 se0_param.DIM1 = seCnt4;
212 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
213 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
214 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
215 se0_param.DIMFMT = __SE_DIMFMT_2D;
220 se0_param.ICNT0 = seCnt4;
222 se0_param.DIM1 = seCnt4;
228 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
229 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
230 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
231 se0_param.DIMFMT = __SE_DIMFMT_2D;
238 static inline c7x::cfloat_vec
240 c7x::float_vec scaleVec,
241 c7x::ulong_vec xorVec)
243 return (c7x::as_cfloat_vec (
244 scaleVec * c7x::as_float_vec (c7x::as_ulong_vec (in) ^ xorVec)));
248 c7x::ulong_vec xorVec,
253 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
254 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
255 cfloat *restrict pXLocal = (cfloat *) pX;
263 __SE0_OPEN (pX, se0_param);
264 __SA0_OPEN (sa0_param);
267 uint32_t loopCount = (size) / c7x::element_count_of<c7x::cfloat_vec>::value;
268 c7x::cfloat_vec regIn, regStore;
270 c7x::float_vec scaleVec = __vload_dup (&scale);
275 for (i = 0; i < loopCount; i++) {
276 regIn = c7x::strm_eng<0, c7x::cfloat_vec>::get_adv ();
282 tmp = c7x::strm_agen<0, CV>::get_vpred ();
283 addr = c7x::strm_agen<0, CV>::get_adv (&pXLocal[0]);
284 __vstore_pred (tmp, addr, regStore);
303 uint32_t numPointsPerDft;
304 uint32_t numLeadingZeros;
305 uint32_t offsetBitReverse;
306 uint32_t seCnt1, seCnt2, seCnt3;
307 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
308 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1 ();
309 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
311 cfloat *restrict pXLocal;
312 cfloat *restrict pYLocal;
313 cfloat *restrict pWLocal;
314 cfloat *restrict pY0;
315 cfloat *restrict pY1;
316 cfloat *restrict pY2;
317 cfloat *restrict pY3;
318 cfloat *restrict pY4;
319 cfloat *restrict pY5;
320 cfloat *restrict pY6;
321 cfloat *restrict pY7;
323 CV vX_0, vX_N_4, vX_N_2, vX_3N_4;
324 CV vSum1, vSum2, vDiff1, vDiff2;
325 CV vTwX1, vTwX2, vTwX3;
326 CV vX0Temp, vX1Temp, vX2Temp, vX3Temp;
327 CV vX0, vX1, vX2, vX3;
328 CV vX_0_1, vX_N_4_1, vX_N_2_1, vX_3N_4_1;
329 CV vSum1_1, vSum2_1, vDiff1_1, vDiff2_1;
330 CV vX0_1, vX1_1, vX2_1, vX3_1;
331 CV vX0_2PtDft_1, vX0_2PtDft_2;
332 CV vX1_2PtDft_1, vX1_2PtDft_2;
333 CV vX2_2PtDft_1, vX2_2PtDft_2;
334 CV vX3_2PtDft_1, vX3_2PtDft_2;
337 #ifdef FFTLIB_CHECK_PARAMS
339 pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, pBlock);
343 numPoints = bufParamsX->
dim_x >> 1;
344 numPointsPerDft = numPoints;
346 float scale = 1.0 / (numPoints);
347 c7x::float_vec scaleVec = __vload_dup (&scale);
349 #if defined(_HOST_BUILD)
350 c7x::ulong_vec xorVec = (c7x::ulong_vec) (0x0000000080000000);
353 c7x::ulong_vec xorVec = (0x0000000080000000);
365 seCnt1 = numPointsPerDft >> 2;
366 seCnt2 = numPointsPerDft >> 5;
369 pXLocal = (cfloat *) pX;
370 pWLocal = (cfloat *) pW;
371 pYLocal = (cfloat *) pY;
373 while (numPointsPerDft >= 64) {
377 se0_param.DIM1 = seCnt1;
378 se0_param.ICNT2 = seCnt2;
381 se0_param.ICNT3 = seCnt3;
382 se0_param.DIM3 = numPointsPerDft;
383 __SE0_OPEN ((
void *) pXLocal, se0_param);
386 se1_param.DIM1 = seCnt1;
387 se1_param.ICNT2 = seCnt2;
389 se1_param.ICNT3 = seCnt3;
391 __SE1_OPEN ((
void *) pWLocal, se1_param);
394 sa0_param.DIM1 = seCnt1;
395 sa0_param.ICNT2 = seCnt2;
398 sa0_param.ICNT3 = seCnt3;
399 sa0_param.DIM3 = numPointsPerDft;
401 __SA0_OPEN (sa0_param);
404 for (k = 0; k < numPoints; k += 64) {
407 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
408 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
409 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
410 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
412 vSum1 = vX_0 + vX_N_2;
413 vSum2 = vX_N_4 + vX_3N_4;
414 vDiff1 = vX_0 - vX_N_2;
415 vDiff2 = vX_N_4 - vX_3N_4;
417 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
418 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
419 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
421 vX0Temp = vSum1 + vSum2;
422 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
423 vX2Temp = vSum1 - vSum2;
424 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
427 vX1 = __complex_multiply (vX1Temp, vTwX1);
428 vX2 = __complex_multiply (vX2Temp, vTwX2);
429 vX3 = __complex_multiply (vX3Temp, vTwX3);
438 tmp = c7x::strm_agen<0, CV>::get_vpred ();
439 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
440 __vstore_pred (tmp, addr, vX0);
442 tmp = c7x::strm_agen<0, CV>::get_vpred ();
443 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
444 __vstore_pred (tmp, addr, vX2);
446 tmp = c7x::strm_agen<0, CV>::get_vpred ();
447 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
448 __vstore_pred (tmp, addr, vX1);
450 tmp = c7x::strm_agen<0, CV>::get_vpred ();
451 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
452 __vstore_pred (tmp, addr, vX3);
455 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
456 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
457 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
458 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
460 vSum1 = vX_0 + vX_N_2;
461 vSum2 = vX_N_4 + vX_3N_4;
462 vDiff1 = vX_0 - vX_N_2;
463 vDiff2 = vX_N_4 - vX_3N_4;
465 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
466 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
467 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
469 vX0Temp = vSum1 + vSum2;
470 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
471 vX2Temp = vSum1 - vSum2;
472 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
475 vX1 = __complex_multiply (vX1Temp, vTwX1);
476 vX2 = __complex_multiply (vX2Temp, vTwX2);
477 vX3 = __complex_multiply (vX3Temp, vTwX3);
484 tmp = c7x::strm_agen<0, CV>::get_vpred ();
485 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
486 __vstore_pred (tmp, addr, vX0);
488 tmp = c7x::strm_agen<0, CV>::get_vpred ();
489 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
490 __vstore_pred (tmp, addr, vX2);
492 tmp = c7x::strm_agen<0, CV>::get_vpred ();
493 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
494 __vstore_pred (tmp, addr, vX1);
496 tmp = c7x::strm_agen<0, CV>::get_vpred ();
497 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
498 __vstore_pred (tmp, addr, vX3);
504 numPointsPerDft >>= 2;
505 pWLocal += numPointsPerDft * 3;
511 if (numPointsPerDft == 16) {
518 __SE0_OPEN ((
void *) pXLocal, se0_param);
519 __SE1_OPEN ((
void *) (pXLocal + 8), se0_param);
523 __SA0_OPEN (sa0_param);
526 vTwX1 = *((
CVP) pWLocal);
527 vTwX1 =
CV (vTwX1.lo (), vTwX1.lo ());
528 vTwX2 = *((
CVP) (pWLocal + 4));
529 vTwX2 =
CV (vTwX2.lo (), vTwX2.lo ());
530 vTwX3 = *((
CVP) (pWLocal + 8));
531 vTwX3 =
CV (vTwX3.lo (), vTwX3.lo ());
533 vTwX1 = *((
CVP) pWLocal);
534 vTwX1 = (
CV) (vTwX1.lo (), vTwX1.lo ());
535 vTwX2 = *((
CVP) (pWLocal + 4));
536 vTwX2 = (
CV) (vTwX2.lo (), vTwX2.lo ());
537 vTwX3 = *((
CVP) (pWLocal + 8));
538 vTwX3 = (
CV) (vTwX3.lo (), vTwX3.lo ());
541 for (k = 0; k < numPoints; k += 32) {
542 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
543 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
544 vX_N_2 = c7x::strm_eng<1, CV>::get_adv ();
545 vX_3N_4 = c7x::strm_eng<1, CV>::get_adv ();
547 vSum1 = vX_0 + vX_N_2;
548 vSum2 = vX_N_4 + vX_3N_4;
549 vDiff1 = vX_0 - vX_N_2;
550 vDiff2 = vX_N_4 - vX_3N_4;
552 vX0Temp = vSum1 + vSum2;
553 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
554 vX2Temp = vSum1 - vSum2;
555 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
558 vX1 = __complex_multiply (vX1Temp, vTwX1);
559 vX2 = __complex_multiply (vX2Temp, vTwX2);
560 vX3 = __complex_multiply (vX3Temp, vTwX3);
574 tmp = c7x::strm_agen<0, CV>::get_vpred ();
575 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
576 __vstore_pred (tmp, addr,
CV (vX0.lo (), vX2.lo ()));
577 tmp = c7x::strm_agen<0, CV>::get_vpred ();
578 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
579 __vstore_pred (tmp, addr,
CV (vX1.lo (), vX3.lo ()));
580 tmp = c7x::strm_agen<0, CV>::get_vpred ();
581 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
582 __vstore_pred (tmp, addr,
CV (vX0.hi (), vX2.hi ()));
583 tmp = c7x::strm_agen<0, CV>::get_vpred ();
584 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
585 __vstore_pred (tmp, addr,
CV (vX1.hi (), vX3.hi ()));
589 tmp = c7x::strm_agen<0, CV>::get_vpred ();
590 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
591 __vstore_pred (tmp, addr, (
CV) (vX0.lo (), vX2.lo ()));
592 tmp = c7x::strm_agen<0, CV>::get_vpred ();
593 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
594 __vstore_pred (tmp, addr, (
CV) (vX1.lo (), vX3.lo ()));
595 tmp = c7x::strm_agen<0, CV>::get_vpred ();
596 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
597 __vstore_pred (tmp, addr, (
CV) (vX0.hi (), vX2.hi ()));
598 tmp = c7x::strm_agen<0, CV>::get_vpred ();
599 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
600 __vstore_pred (tmp, addr, (
CV) (vX1.hi (), vX3.hi ()));
611 __SE0_OPEN ((
void *) pXLocal, se0_param);
615 __SA0_OPEN (sa0_param);
617 vTwX1 = *((
CVP) pWLocal);
618 vTwX2 = *((
CVP) (pWLocal + 8));
619 vTwX3 = *((
CVP) (pWLocal + 16));
621 for (k = 0; k < numPoints; k += 64) {
622 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
623 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
624 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
625 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
627 vSum1 = vX_0 + vX_N_2;
628 vSum2 = vX_N_4 + vX_3N_4;
629 vDiff1 = vX_0 - vX_N_2;
630 vDiff2 = vX_N_4 - vX_3N_4;
632 vX0Temp = vSum1 + vSum2;
633 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
634 vX2Temp = vSum1 - vSum2;
635 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
638 vX1 = __complex_multiply (vX1Temp, vTwX1);
639 vX2 = __complex_multiply (vX2Temp, vTwX2);
640 vX3 = __complex_multiply (vX3Temp, vTwX3);
649 tmp = c7x::strm_agen<0, CV>::get_vpred ();
650 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
651 __vstore_pred (tmp, addr, vX0);
653 tmp = c7x::strm_agen<0, CV>::get_vpred ();
654 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
655 __vstore_pred (tmp, addr, vX2);
657 tmp = c7x::strm_agen<0, CV>::get_vpred ();
658 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
659 __vstore_pred (tmp, addr, vX1);
661 tmp = c7x::strm_agen<0, CV>::get_vpred ();
662 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
663 __vstore_pred (tmp, addr, vX3);
665 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
666 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
667 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
668 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
670 vSum1 = vX_0 + vX_N_2;
671 vSum2 = vX_N_4 + vX_3N_4;
672 vDiff1 = vX_0 - vX_N_2;
673 vDiff2 = vX_N_4 - vX_3N_4;
675 vX0Temp = vSum1 + vSum2;
676 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
677 vX2Temp = vSum1 - vSum2;
678 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
681 vX1 = __complex_multiply (vX1Temp, vTwX1);
682 vX2 = __complex_multiply (vX2Temp, vTwX2);
683 vX3 = __complex_multiply (vX3Temp, vTwX3);
690 tmp = c7x::strm_agen<0, CV>::get_vpred ();
691 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
692 __vstore_pred (tmp, addr, vX0);
694 tmp = c7x::strm_agen<0, CV>::get_vpred ();
695 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
696 __vstore_pred (tmp, addr, vX2);
698 tmp = c7x::strm_agen<0, CV>::get_vpred ();
699 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
700 __vstore_pred (tmp, addr, vX1);
702 tmp = c7x::strm_agen<0, CV>::get_vpred ();
703 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
704 __vstore_pred (tmp, addr, vX3);
710 numPointsPerDft >>= 2;
711 pWLocal += numPointsPerDft * 3;
713 if (numPointsPerDft == 4) {
717 __SE0_OPEN ((
void *) pXLocal, se0_param);
719 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
726 pY0 = (cfloat *) (pY + 0);
727 pY1 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
728 pY2 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
729 pY3 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
731 #ifdef LAST_LOOP_UNROLL
737 pY4 = (cfloat *) (pY + ((0x20000000u >> numLeadingZeros) << 1));
738 pY5 = (cfloat *) (pY + ((0x60000000u >> numLeadingZeros) << 1));
739 pY6 = (cfloat *) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
740 pY7 = (cfloat *) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
743 #ifdef LAST_LOOP_UNROLL
744 for (k = 0; k<numPoints>> 3; k += 8)
746 for (k = 0; k<numPoints>> 3; k += 4)
749 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
751 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
752 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
753 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
754 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
756 vSum1 = vX_0 + vX_N_2;
757 vSum2 = vX_N_4 + vX_3N_4;
758 vDiff1 = vX_0 - vX_N_2;
759 vDiff2 = vX_N_4 - vX_3N_4;
762 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
764 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
771 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse), vX0);
772 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse), vX1);
773 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse), vX2);
774 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse), vX3);
776 #ifdef LAST_LOOP_UNROLL
777 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
778 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
779 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
780 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
782 vSum1_1 = vX_0_1 + vX_N_2_1;
783 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
784 vDiff1_1 = vX_0_1 - vX_N_2_1;
785 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
787 vX0_1 = vSum1_1 + vSum2_1;
788 vX1_1 = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
789 vX2_1 = vSum1_1 - vSum2_1;
790 vX3_1 = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
801 __vstore_reverse_bit ((
CVP) (pY4 + offsetBitReverse), vX0_1);
802 __vstore_reverse_bit ((
CVP) (pY5 + offsetBitReverse), vX1_1);
803 __vstore_reverse_bit ((
CVP) (pY6 + offsetBitReverse), vX2_1);
804 __vstore_reverse_bit ((
CVP) (pY7 + offsetBitReverse), vX3_1);
813 __SE0_OPEN ((
void *) pXLocal, se0_param);
815 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
819 vTwX1 =
CV (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
824 vTwX1 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
831 vTwX2 =
CV (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
836 vTwX2 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
842 vTwX3 =
CV (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
847 vTwX3 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
860 pY0 = (cfloat *) (pY + (0x00000000u));
861 pY1 = (cfloat *) (pY + ((0x80000000u >> numLeadingZeros) << 1));
862 pY2 = (cfloat *) (pY + ((0x20000000u >> numLeadingZeros) << 1));
863 pY3 = (cfloat *) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
864 pY4 = (cfloat *) (pY + ((0x40000000u >> numLeadingZeros) << 1));
865 pY5 = (cfloat *) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
866 pY6 = (cfloat *) (pY + ((0x60000000u >> numLeadingZeros) << 1));
867 pY7 = (cfloat *) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
869 for (k = 0; k<numPoints>> 3; k += 8) {
870 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
872 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
873 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
874 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
875 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
876 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
877 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
878 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
879 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
881 vSum1 = vX_0 + vX_N_2;
882 vSum2 = vX_N_4 + vX_3N_4;
883 vDiff1 = vX_0 - vX_N_2;
884 vDiff2 = vX_N_4 - vX_3N_4;
887 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
889 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
891 vSum1_1 = vX_0_1 + vX_N_2_1;
892 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
893 vDiff1_1 = vX_0_1 - vX_N_2_1;
894 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
896 vX0Temp = vSum1_1 + vSum2_1;
897 vX1Temp = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
898 vX2Temp = vSum1_1 - vSum2_1;
899 vX3Temp = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
902 vX1_1 = __complex_multiply (vX1Temp, vTwX1);
903 vX2_1 = __complex_multiply (vX2Temp, vTwX2);
904 vX3_1 = __complex_multiply (vX3Temp, vTwX3);
906 vX0_2PtDft_1 = vX0 + vX0_1;
907 vX0_2PtDft_2 = vX0 - vX0_1;
908 vX1_2PtDft_1 = vX1 + vX1_1;
909 vX1_2PtDft_2 = vX1 - vX1_1;
910 vX2_2PtDft_1 = vX2 + vX2_1;
911 vX2_2PtDft_2 = vX2 - vX2_1;
912 vX3_2PtDft_1 = vX3 + vX3_1;
913 vX3_2PtDft_2 = vX3 - vX3_1;
916 vX0_2PtDft_1, scaleVec, xorVec);
918 vX0_2PtDft_2, scaleVec, xorVec);
920 vX1_2PtDft_1, scaleVec, xorVec);
922 vX1_2PtDft_2, scaleVec, xorVec);
924 vX2_2PtDft_1, scaleVec, xorVec);
926 vX2_2PtDft_2, scaleVec, xorVec);
928 vX3_2PtDft_1, scaleVec, xorVec);
930 vX3_2PtDft_2, scaleVec, xorVec);
932 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse), vX0_2PtDft_1);
933 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse), vX0_2PtDft_2);
934 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse), vX1_2PtDft_1);
935 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse), vX1_2PtDft_2);
936 __vstore_reverse_bit ((
CVP) (pY4 + offsetBitReverse), vX2_2PtDft_1);
937 __vstore_reverse_bit ((
CVP) (pY5 + offsetBitReverse), vX2_2PtDft_2);
938 __vstore_reverse_bit ((
CVP) (pY6 + offsetBitReverse), vX3_2PtDft_1);
939 __vstore_reverse_bit ((
CVP) (pY7 + offsetBitReverse), vX3_2PtDft_2);
947 #if (!defined(FFTLIB_REMOVE_CHECK_PARAMS) && \
948 !defined(FFTLIB_IFFT1D_I32FC_C32FC_O32FC_REMOVE_CHECK_PARAMS)) || \
949 (defined(FFTLIB_CHECK_PARAMS)) || \
950 (defined(FFTLIB_IFFT1D_I32FC_C32FC_O32FC_CHECK_PARAMS))
FFTLIB_STATUS_NAME
The enumeration of all status codes.
float FFTLIB_F32
Single precision floating point.
#define SE_LOOP4_PARAM_OFFSET
void ifft_i32fc_o32fc_conjugate_init_ci(void *pX, uint32_t size, void *pBlock)
static c7x::cfloat_vec ifft_i32fc_o32fc_scaleAndConjugate(c7x::cfloat_vec in, c7x::float_vec scaleVec, c7x::ulong_vec xorVec)
void ifft_i32fc_o32fc_conjugate_exec_ci(void *pX, c7x::ulong_vec xorVec, uint32_t size, void *pBlock)
#define SE_CONJ_LOOP_PARAM_OFFSET
#define SA_LOOP2_PARAM_OFFSET
#define SA_CONJ_LOOP_PARAM_OFFSET
#define SE_LOOP1_PARAM_OFFSET
#define SA_LOOP1_PARAM_OFFSET
#define SE_TWID_PARAM_OFFSET
#define SE_LOOP2_PARAM_OFFSET
#define SE_LOOP3_PARAM_OFFSET
#define SA_LOOP3_PARAM_OFFSET
#define SE_LOOP5_PARAM_OFFSET
FFTLIB_STATUS FFTLIB_ifft1d_i32fc_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_ifft1d_i32fc_c32fc_o32fc_init an...
FFTLIB_STATUS FFTLIB_ifft1d_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function should be called before the FFTLIB_ifft1d_i32fc_c32fc_o32fc_kernel function is called....
FFTLIB_STATUS FFTLIB_ifft1d_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function is the main kernel compute function.
A structure for a 1 dimensional buffer descriptor.
uint32_t dim_x
Width of buffer in X dimension in elements.