29 #include "../FFTLIB_fft1d_i32fc_c32fc_o32fc.h"
34 #include "../../../common/printv.h"
42 #define SE_PARAM_BASE (0x0000)
43 #define SE_LOOP1_PARAM_OFFSET (SE_PARAM_BASE)
44 #define SE_LOOP2_PARAM_OFFSET (SE_LOOP1_PARAM_OFFSET + SE_PARAM_SIZE)
45 #define SE_LOOP3_PARAM_OFFSET (SE_LOOP2_PARAM_OFFSET + SE_PARAM_SIZE)
46 #define SE_LOOP4_PARAM_OFFSET (SE_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
47 #define SE_LOOP5_PARAM_OFFSET (SE_LOOP4_PARAM_OFFSET + SE_PARAM_SIZE)
48 #define SE_TWID_PARAM_OFFSET (SE_LOOP5_PARAM_OFFSET + SE_PARAM_SIZE)
49 #define SA_LOOP1_PARAM_OFFSET (SE_TWID_PARAM_OFFSET + SE_PARAM_SIZE)
50 #define SA_LOOP2_PARAM_OFFSET (SA_LOOP1_PARAM_OFFSET + SA_PARAM_SIZE)
51 #define SA_LOOP3_PARAM_OFFSET (SA_LOOP2_PARAM_OFFSET + SA_PARAM_SIZE)
64 #if defined(FFTLIB_CHECK_PARAMS) || \
65 defined(FFTLIB_FFT1D_I32FC_C32FC_O32FC_CHECK_PARAMS)
67 pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, pBlock);
72 uint32_t numPointsPerDft;
73 uint32_t seCnt1, seCnt2, seCnt3, seCnt4;
74 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
75 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1 ();
76 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
78 numPoints = bufParamsX->
dim_x >> 1;
79 numPointsPerDft = numPoints;
80 seCnt1 = numPoints >> 2;
81 seCnt2 = numPoints >> 5;
83 seCnt4 = numPoints >> 3;
88 se0_param.DIM1 = seCnt1;
89 se0_param.ICNT2 = seCnt2;
91 se0_param.ICNT3 = seCnt3;
92 se0_param.DIM3 = numPointsPerDft;
94 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
95 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
96 se0_param.DIMFMT = __SE_DIMFMT_4D;
103 se1_param.DIM1 = seCnt1;
104 se1_param.ICNT2 = seCnt2;
106 se1_param.ICNT3 = seCnt3;
109 se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
110 se1_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
111 se1_param.DIMFMT = __SE_DIMFMT_4D;
118 sa0_param.DIM1 = seCnt1;
119 sa0_param.ICNT2 = seCnt2;
121 sa0_param.ICNT3 = seCnt3;
122 sa0_param.DIM3 = numPointsPerDft;
124 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
125 sa0_param.DIMFMT = __SA_DIMFMT_4D;
133 se0_param.ICNT2 = seCnt2;
136 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
137 se0_param.TRANSPOSE =
138 __SE_TRANSPOSE_256BIT;
140 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
141 se0_param.DIMFMT = __SE_DIMFMT_3D;
146 sa0_param.ICNT0 = numPoints;
148 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
149 sa0_param.DIMFMT = __SA_DIMFMT_1D;
154 se0_param = __gen_SE_TEMPLATE_v1 ();
155 se0_param.ICNT0 = numPoints;
157 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
158 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
159 se0_param.DIMFMT = __SE_DIMFMT_1D;
164 sa0_param.ICNT0 = numPoints;
166 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
167 sa0_param.DIMFMT = __SA_DIMFMT_1D;
172 se0_param.ICNT0 = seCnt4;
174 se0_param.DIM1 = seCnt4;
180 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
181 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
182 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
183 se0_param.DIMFMT = __SE_DIMFMT_2D;
188 se0_param.ICNT0 = seCnt4;
190 se0_param.DIM1 = seCnt4;
196 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
197 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
198 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
199 se0_param.DIMFMT = __SE_DIMFMT_2D;
218 uint32_t numPointsPerDft;
219 uint32_t numLeadingZeros;
220 uint32_t offsetBitReverse;
221 uint32_t seCnt1, seCnt2, seCnt3;
222 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
223 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1 ();
224 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
226 cfloat* restrict pXLocal;
227 cfloat* restrict pYLocal;
228 cfloat* restrict pWLocal;
229 cfloat* restrict pY0;
230 cfloat* restrict pY1;
231 cfloat* restrict pY2;
232 cfloat* restrict pY3;
233 cfloat* restrict pY4;
234 cfloat* restrict pY5;
235 cfloat* restrict pY6;
236 cfloat* restrict pY7;
238 typedef typename c7x::cfloat_vec
CV;
241 typedef typename c7x::float_vec
V;
244 CV vX_0, vX_N_4, vX_N_2, vX_3N_4;
245 CV vSum1, vSum2, vDiff1, vDiff2;
246 CV vTwX1, vTwX2, vTwX3;
247 CV vX0Temp, vX1Temp, vX2Temp, vX3Temp;
248 CV vX0, vX1, vX2, vX3;
249 CV vX_0_1, vX_N_4_1, vX_N_2_1, vX_3N_4_1;
250 CV vSum1_1, vSum2_1, vDiff1_1, vDiff2_1;
251 CV vX0_1, vX1_1, vX2_1, vX3_1;
252 CV vX0_2PtDft_1, vX0_2PtDft_2;
253 CV vX1_2PtDft_1, vX1_2PtDft_2;
254 CV vX2_2PtDft_1, vX2_2PtDft_2;
255 CV vX3_2PtDft_1, vX3_2PtDft_2;
258 #ifdef FFTLIB_CHECK_PARAMS
260 pX, bufParamsX, pW, bufParamsW, pY, bufParamsY, pBlock);
264 numPoints = bufParamsX->
dim_x >> 1;
265 numPointsPerDft = numPoints;
273 seCnt1 = numPointsPerDft >> 2;
274 seCnt2 = numPointsPerDft >> 5;
277 pXLocal = (cfloat*) pX;
278 pWLocal = (cfloat*) pW;
279 pYLocal = (cfloat*) pY;
281 while (numPointsPerDft >= 64) {
285 se0_param.DIM1 = seCnt1;
286 se0_param.ICNT2 = seCnt2;
289 se0_param.ICNT3 = seCnt3;
290 se0_param.DIM3 = numPointsPerDft;
291 __SE0_OPEN ((
void *) pXLocal, se0_param);
294 se1_param.DIM1 = seCnt1;
295 se1_param.ICNT2 = seCnt2;
297 se1_param.ICNT3 = seCnt3;
299 __SE1_OPEN ((
void *) pWLocal, se1_param);
302 sa0_param.DIM1 = seCnt1;
303 sa0_param.ICNT2 = seCnt2;
306 sa0_param.ICNT3 = seCnt3;
307 sa0_param.DIM3 = numPointsPerDft;
309 __SA0_OPEN (sa0_param);
312 for (k = 0; k < numPoints; k += 64) {
315 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
316 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
317 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
318 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
320 vSum1 = vX_0 + vX_N_2;
321 vSum2 = vX_N_4 + vX_3N_4;
322 vDiff1 = vX_0 - vX_N_2;
323 vDiff2 = vX_N_4 - vX_3N_4;
325 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
326 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
327 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
329 vX0Temp = vSum1 + vSum2;
330 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
331 vX2Temp = vSum1 - vSum2;
332 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
335 vX1 = __complex_multiply (vX1Temp, vTwX1);
336 vX2 = __complex_multiply (vX2Temp, vTwX2);
337 vX3 = __complex_multiply (vX3Temp, vTwX3);
346 tmp = c7x::strm_agen<0, CV>::get_vpred ();
347 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
348 __vstore_pred (tmp, addr, vX0);
350 tmp = c7x::strm_agen<0, CV>::get_vpred ();
351 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
352 __vstore_pred (tmp, addr, vX2);
354 tmp = c7x::strm_agen<0, CV>::get_vpred ();
355 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
356 __vstore_pred (tmp, addr, vX1);
358 tmp = c7x::strm_agen<0, CV>::get_vpred ();
359 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
360 __vstore_pred (tmp, addr, vX3);
363 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
364 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
365 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
366 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
368 vSum1 = vX_0 + vX_N_2;
369 vSum2 = vX_N_4 + vX_3N_4;
370 vDiff1 = vX_0 - vX_N_2;
371 vDiff2 = vX_N_4 - vX_3N_4;
373 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
374 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
375 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
377 vX0Temp = vSum1 + vSum2;
378 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
379 vX2Temp = vSum1 - vSum2;
380 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
383 vX1 = __complex_multiply (vX1Temp, vTwX1);
384 vX2 = __complex_multiply (vX2Temp, vTwX2);
385 vX3 = __complex_multiply (vX3Temp, vTwX3);
392 tmp = c7x::strm_agen<0, CV>::get_vpred ();
393 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
394 __vstore_pred (tmp, addr, vX0);
396 tmp = c7x::strm_agen<0, CV>::get_vpred ();
397 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
398 __vstore_pred (tmp, addr, vX2);
400 tmp = c7x::strm_agen<0, CV>::get_vpred ();
401 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
402 __vstore_pred (tmp, addr, vX1);
404 tmp = c7x::strm_agen<0, CV>::get_vpred ();
405 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
406 __vstore_pred (tmp, addr, vX3);
412 numPointsPerDft >>= 2;
413 pWLocal += numPointsPerDft * 3;
419 if (numPointsPerDft == 16) {
426 __SE0_OPEN ((
void *) pXLocal, se0_param);
427 __SE1_OPEN ((
void *) (pXLocal + 8), se0_param);
431 __SA0_OPEN (sa0_param);
434 vTwX1 = *((
CVP) pWLocal);
435 vTwX1 =
CV (vTwX1.lo(), vTwX1.lo());
436 vTwX2 = *((
CVP) (pWLocal + 4));
437 vTwX2 =
CV (vTwX2.lo(), vTwX2.lo());
438 vTwX3 = *((
CVP) (pWLocal + 8));
439 vTwX3 =
CV (vTwX3.lo(), vTwX3.lo());
441 vTwX1 = *((
CVP) pWLocal);
442 vTwX1 = (
CV) (vTwX1.lo(), vTwX1.lo());
443 vTwX2 = *((
CVP) (pWLocal + 4));
444 vTwX2 = (
CV) (vTwX2.lo(), vTwX2.lo());
445 vTwX3 = *((
CVP) (pWLocal + 8));
446 vTwX3 = (
CV) (vTwX3.lo(), vTwX3.lo());
449 for (k = 0; k < numPoints; k += 32) {
450 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
451 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
452 vX_N_2 = c7x::strm_eng<1, CV>::get_adv ();
453 vX_3N_4 = c7x::strm_eng<1, CV>::get_adv ();
455 vSum1 = vX_0 + vX_N_2;
456 vSum2 = vX_N_4 + vX_3N_4;
457 vDiff1 = vX_0 - vX_N_2;
458 vDiff2 = vX_N_4 - vX_3N_4;
460 vX0Temp = vSum1 + vSum2;
461 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
462 vX2Temp = vSum1 - vSum2;
463 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
466 vX1 = __complex_multiply (vX1Temp, vTwX1);
467 vX2 = __complex_multiply (vX2Temp, vTwX2);
468 vX3 = __complex_multiply (vX3Temp, vTwX3);
482 tmp = c7x::strm_agen<0, CV>::get_vpred ();
483 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
484 __vstore_pred (tmp, addr,
CV (vX0.lo(), vX2.lo()));
485 tmp = c7x::strm_agen<0, CV>::get_vpred ();
486 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
487 __vstore_pred (tmp, addr,
CV (vX1.lo(), vX3.lo()));
488 tmp = c7x::strm_agen<0, CV>::get_vpred ();
489 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
490 __vstore_pred (tmp, addr,
CV (vX0.hi(), vX2.hi()));
491 tmp = c7x::strm_agen<0, CV>::get_vpred ();
492 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
493 __vstore_pred (tmp, addr,
CV (vX1.hi(), vX3.hi()));
497 tmp = c7x::strm_agen<0, CV>::get_vpred ();
498 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
499 __vstore_pred (tmp, addr, (
CV) (vX0.lo(), vX2.lo()));
500 tmp = c7x::strm_agen<0, CV>::get_vpred ();
501 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
502 __vstore_pred (tmp, addr, (
CV) (vX1.lo(), vX3.lo()));
503 tmp = c7x::strm_agen<0, CV>::get_vpred ();
504 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
505 __vstore_pred (tmp, addr, (
CV) (vX0.hi(), vX2.hi()));
506 tmp = c7x::strm_agen<0, CV>::get_vpred ();
507 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
508 __vstore_pred (tmp, addr, (
CV) (vX1.hi(), vX3.hi()));
519 __SE0_OPEN ((
void *) pXLocal, se0_param);
523 __SA0_OPEN (sa0_param);
525 vTwX1 = *((
CVP) pWLocal);
526 vTwX2 = *((
CVP) (pWLocal + 8));
527 vTwX3 = *((
CVP) (pWLocal + 16));
529 for (k = 0; k < numPoints; k += 64) {
530 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
531 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
532 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
533 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
535 vSum1 = vX_0 + vX_N_2;
536 vSum2 = vX_N_4 + vX_3N_4;
537 vDiff1 = vX_0 - vX_N_2;
538 vDiff2 = vX_N_4 - vX_3N_4;
540 vX0Temp = vSum1 + vSum2;
541 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
542 vX2Temp = vSum1 - vSum2;
543 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
546 vX1 = __complex_multiply (vX1Temp, vTwX1);
547 vX2 = __complex_multiply (vX2Temp, vTwX2);
548 vX3 = __complex_multiply (vX3Temp, vTwX3);
557 tmp = c7x::strm_agen<0, CV>::get_vpred ();
558 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
559 __vstore_pred (tmp, addr, vX0);
561 tmp = c7x::strm_agen<0, CV>::get_vpred ();
562 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
563 __vstore_pred (tmp, addr, vX2);
565 tmp = c7x::strm_agen<0, CV>::get_vpred ();
566 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
567 __vstore_pred (tmp, addr, vX1);
569 tmp = c7x::strm_agen<0, CV>::get_vpred ();
570 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
571 __vstore_pred (tmp, addr, vX3);
573 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
574 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
575 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
576 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
578 vSum1 = vX_0 + vX_N_2;
579 vSum2 = vX_N_4 + vX_3N_4;
580 vDiff1 = vX_0 - vX_N_2;
581 vDiff2 = vX_N_4 - vX_3N_4;
583 vX0Temp = vSum1 + vSum2;
584 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
585 vX2Temp = vSum1 - vSum2;
586 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
589 vX1 = __complex_multiply (vX1Temp, vTwX1);
590 vX2 = __complex_multiply (vX2Temp, vTwX2);
591 vX3 = __complex_multiply (vX3Temp, vTwX3);
598 tmp = c7x::strm_agen<0, CV>::get_vpred ();
599 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
600 __vstore_pred (tmp, addr, vX0);
602 tmp = c7x::strm_agen<0, CV>::get_vpred ();
603 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
604 __vstore_pred (tmp, addr, vX2);
606 tmp = c7x::strm_agen<0, CV>::get_vpred ();
607 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
608 __vstore_pred (tmp, addr, vX1);
610 tmp = c7x::strm_agen<0, CV>::get_vpred ();
611 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
612 __vstore_pred (tmp, addr, vX3);
618 numPointsPerDft >>= 2;
619 pWLocal += numPointsPerDft * 3;
621 if (numPointsPerDft == 4) {
625 __SE0_OPEN ((
void *) pXLocal, se0_param);
627 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
634 pY0 = (cfloat*) (pY + 0);
635 pY1 = (cfloat*) (pY + ((0x40000000u >> numLeadingZeros) << 1));
636 pY2 = (cfloat*) (pY + ((0x80000000u >> numLeadingZeros) << 1));
637 pY3 = (cfloat*) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
639 #ifdef LAST_LOOP_UNROLL
645 pY4 = (cfloat*) (pY + ((0x20000000u >> numLeadingZeros) << 1));
646 pY5 = (cfloat*) (pY + ((0x60000000u >> numLeadingZeros) << 1));
647 pY6 = (cfloat*) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
648 pY7 = (cfloat*) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
651 #ifdef LAST_LOOP_UNROLL
652 for (k = 0; k < numPoints >> 3; k += 8)
654 for (k = 0; k < numPoints >> 3; k += 4)
657 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
659 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
660 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
661 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
662 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
664 vSum1 = vX_0 + vX_N_2;
665 vSum2 = vX_N_4 + vX_3N_4;
666 vDiff1 = vX_0 - vX_N_2;
667 vDiff2 = vX_N_4 - vX_3N_4;
670 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
672 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
674 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse), vX0);
675 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse), vX1);
676 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse), vX2);
677 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse), vX3);
679 #ifdef LAST_LOOP_UNROLL
680 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
681 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
682 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
683 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
685 vSum1_1 = vX_0_1 + vX_N_2_1;
686 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
687 vDiff1_1 = vX_0_1 - vX_N_2_1;
688 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
690 vX0_1 = vSum1_1 + vSum2_1;
691 vX1_1 = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
692 vX2_1 = vSum1_1 - vSum2_1;
693 vX3_1 = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
695 __vstore_reverse_bit ((
CVP) (pY4 + offsetBitReverse), vX0_1);
696 __vstore_reverse_bit ((
CVP) (pY5 + offsetBitReverse), vX1_1);
697 __vstore_reverse_bit ((
CVP) (pY6 + offsetBitReverse), vX2_1);
698 __vstore_reverse_bit ((
CVP) (pY7 + offsetBitReverse), vX3_1);
707 __SE0_OPEN ((
void *) pXLocal, se0_param);
709 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
713 vTwX1 =
CV (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
718 vTwX1 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
725 vTwX2 =
CV (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
730 vTwX2 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
736 vTwX3 =
CV (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
741 vTwX3 = (
CV) (twTemp, twTemp, twTemp, twTemp, twTemp, twTemp, twTemp,
754 pY0 = (cfloat*) (pY + (0x00000000u));
755 pY1 = (cfloat*) (pY + ((0x80000000u >> numLeadingZeros) << 1));
756 pY2 = (cfloat*) (pY + ((0x20000000u >> numLeadingZeros) << 1));
757 pY3 = (cfloat*) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
758 pY4 = (cfloat*) (pY + ((0x40000000u >> numLeadingZeros) << 1));
759 pY5 = (cfloat*) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
760 pY6 = (cfloat*) (pY + ((0x60000000u >> numLeadingZeros) << 1));
761 pY7 = (cfloat*) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
763 for (k = 0; k < numPoints >> 3; k += 8) {
764 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
766 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
767 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
768 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
769 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
770 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
771 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
772 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
773 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
775 vSum1 = vX_0 + vX_N_2;
776 vSum2 = vX_N_4 + vX_3N_4;
777 vDiff1 = vX_0 - vX_N_2;
778 vDiff2 = vX_N_4 - vX_3N_4;
781 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
783 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
785 vSum1_1 = vX_0_1 + vX_N_2_1;
786 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
787 vDiff1_1 = vX_0_1 - vX_N_2_1;
788 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
790 vX0Temp = vSum1_1 + vSum2_1;
791 vX1Temp = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
792 vX2Temp = vSum1_1 - vSum2_1;
793 vX3Temp = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
796 vX1_1 = __complex_multiply (vX1Temp, vTwX1);
797 vX2_1 = __complex_multiply (vX2Temp, vTwX2);
798 vX3_1 = __complex_multiply (vX3Temp, vTwX3);
800 vX0_2PtDft_1 = vX0 + vX0_1;
801 vX0_2PtDft_2 = vX0 - vX0_1;
802 vX1_2PtDft_1 = vX1 + vX1_1;
803 vX1_2PtDft_2 = vX1 - vX1_1;
804 vX2_2PtDft_1 = vX2 + vX2_1;
805 vX2_2PtDft_2 = vX2 - vX2_1;
806 vX3_2PtDft_1 = vX3 + vX3_1;
807 vX3_2PtDft_2 = vX3 - vX3_1;
809 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse), vX0_2PtDft_1);
810 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse), vX0_2PtDft_2);
811 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse), vX1_2PtDft_1);
812 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse), vX1_2PtDft_2);
813 __vstore_reverse_bit ((
CVP) (pY4 + offsetBitReverse), vX2_2PtDft_1);
814 __vstore_reverse_bit ((
CVP) (pY5 + offsetBitReverse), vX2_2PtDft_2);
815 __vstore_reverse_bit ((
CVP) (pY6 + offsetBitReverse), vX3_2PtDft_1);
816 __vstore_reverse_bit ((
CVP) (pY7 + offsetBitReverse), vX3_2PtDft_2);
824 #if (!defined(FFTLIB_REMOVE_CHECK_PARAMS) && \
825 !defined(FFTLIB_FFT1D_I32FC_C32FC_O32FC_REMOVE_CHECK_PARAMS)) || \
826 (defined(FFTLIB_CHECK_PARAMS)) || \
827 (defined(FFTLIB_FFT1D_I32FC_C32FC_O32FC_CHECK_PARAMS))
840 if ((pX == NULL) || (pW == NULL) || (pY == NULL)) {
843 else if (bufParamsX->
dim_x != bufParamsW->
dim_x ||
847 else if (bufParamsX->
dim_x < 64 * 2) {
855 else if (((uint64_t) pX) & 0xFu) {
863 if (bufParamsX->
dim_x & (1u << k)) {
868 if ((1u << k) != bufParamsX->
dim_x) {
FFTLIB_STATUS_NAME
The enumeration of all status codes.
@ FFTLIB_ERR_INVALID_TYPE
@ FFTLIB_ERR_NULL_POINTER
@ FFTLIB_ERR_INVALID_DIMENSION
@ FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES
float FFTLIB_F32
Single precision floating point.
#define SE_LOOP4_PARAM_OFFSET
#define SA_LOOP2_PARAM_OFFSET
#define SE_LOOP1_PARAM_OFFSET
#define SA_LOOP1_PARAM_OFFSET
#define SE_TWID_PARAM_OFFSET
#define SE_LOOP2_PARAM_OFFSET
#define SE_LOOP3_PARAM_OFFSET
#define SA_LOOP3_PARAM_OFFSET
#define SE_LOOP5_PARAM_OFFSET
FFTLIB_STATUS FFTLIB_fft1d_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function should be called before the FFTLIB_fft1d_i32fc_c32fc_o32fc_kernel function is called....
FFTLIB_STATUS FFTLIB_fft1d_i32fc_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_fft1d_i32fc_c32fc_o32fc_init and...
FFTLIB_STATUS FFTLIB_fft1d_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, void *pBlock)
This function is the main kernel compute function.
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.