29 #include "../FFTLIB_fft1dBatched_i32fc_c32fc_o32fc.h"
34 #include "../../../common/printv.h"
43 #define SE_PARAM_BASE (0x0000)
44 #define SE_LOOP1_PARAM_OFFSET (SE_PARAM_BASE)
45 #define SE_LOOP2_PARAM_OFFSET (SE_LOOP1_PARAM_OFFSET + SE_PARAM_SIZE)
46 #define SE_LOOP3_PARAM_OFFSET (SE_LOOP2_PARAM_OFFSET + SE_PARAM_SIZE)
47 #define SE_LOOP4_PARAM_OFFSET (SE_LOOP3_PARAM_OFFSET + SE_PARAM_SIZE)
48 #define SE_LOOP5_PARAM_OFFSET (SE_LOOP4_PARAM_OFFSET + SE_PARAM_SIZE)
49 #define SE_LOOP6_PARAM_OFFSET (SE_LOOP5_PARAM_OFFSET + SE_PARAM_SIZE)
50 #define SE_LOOP7_PARAM_OFFSET (SE_LOOP6_PARAM_OFFSET + SE_PARAM_SIZE)
51 #define SE_TWID_PARAM_OFFSET (SE_LOOP7_PARAM_OFFSET + SE_PARAM_SIZE)
52 #define SA_LOOP1_PARAM_OFFSET (SE_TWID_PARAM_OFFSET + SE_PARAM_SIZE)
53 #define SA_LOOP2_PARAM_OFFSET (SA_LOOP1_PARAM_OFFSET + SA_PARAM_SIZE)
54 #define SA_LOOP3_PARAM_OFFSET (SA_LOOP2_PARAM_OFFSET + SA_PARAM_SIZE)
55 #define SA_LOOP4_PARAM_OFFSET (SA_LOOP3_PARAM_OFFSET + SA_PARAM_SIZE)
56 #define SA_LOOP6_PARAM_OFFSET (SA_LOOP4_PARAM_OFFSET + SA_PARAM_SIZE)
71 #if defined(FFTLIB_CHECK_PARAMS) || \
72 defined(FFTLIB_FFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS)
80 uint32_t numPointsPerDft;
81 uint32_t seCnt1, seCnt2, seCnt3, seCnt4;
82 uint32_t seCnt6, seCnt7, seCnt8, seCnt9, seCnt10;
84 __SE_TEMPLATE_v1 se0_param = __gen_SE_TEMPLATE_v1 ();
85 __SE_TEMPLATE_v1 se1_param = __gen_SE_TEMPLATE_v1 ();
86 __SA_TEMPLATE_v1 sa0_param = __gen_SA_TEMPLATE_v1 ();
88 numPointsPerDft = numPoints;
89 seCnt1 = numPoints >> 2;
90 seCnt2 = numPoints >> 4;
92 seCnt4 = numPoints >> 2;
93 seCnt6 = seCnt3 * numChannels;
95 (numPoints * numChannels >> 4) > 1 ? numPoints * numChannels >> 4 : 1;
96 seCnt8 = numPoints * numChannels;
97 seCnt9 = (numPoints * numChannels > 16) ? numPoints * numChannels : 16;
99 (numPoints * numChannels >> 5) > 1 ? numPoints * numChannels >> 5 : 1;
100 seCnt11 = (numPoints * numChannels > 32) ? numPoints * numChannels : 32;
102 uint32_t elementSize = c7x::element_count_of<c7x::cfloat_vec>::value;
104 se0_param = __gen_SE_TEMPLATE_v1 ();
105 se0_param.ICNT0 = elementSize;
107 se0_param.DIM1 = seCnt1;
108 se0_param.ICNT2 = seCnt2;
109 se0_param.DIM2 = elementSize;
110 se0_param.ICNT3 = seCnt6;
111 se0_param.DIM3 = numPointsPerDft;
113 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
114 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
115 se0_param.DIMFMT = __SE_DIMFMT_4D;
119 se1_param = __gen_SE_TEMPLATE_v1 ();
120 se1_param.ICNT0 = elementSize;
122 se1_param.DIM1 = seCnt1;
123 se1_param.ICNT2 = seCnt2;
124 se1_param.DIM2 = elementSize;
125 se1_param.ICNT3 = seCnt6;
128 se1_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
129 se1_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
130 se1_param.DIMFMT = __SE_DIMFMT_4D;
134 sa0_param = __gen_SA_TEMPLATE_v1 ();
135 sa0_param.ICNT0 = elementSize;
137 sa0_param.DIM1 = seCnt1;
138 sa0_param.ICNT2 = seCnt2;
139 sa0_param.DIM2 = elementSize;
140 sa0_param.ICNT3 = seCnt6;
141 sa0_param.DIM3 = numPointsPerDft;
143 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
144 sa0_param.DIMFMT = __SA_DIMFMT_4D;
148 se0_param = __gen_SE_TEMPLATE_v1 ();
149 se0_param.ICNT0 = elementSize;
152 se0_param.ICNT2 = seCnt7;
155 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
159 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
160 se0_param.DIMFMT = __SE_DIMFMT_3D;
164 sa0_param = __gen_SA_TEMPLATE_v1 ();
165 sa0_param.ICNT0 = seCnt8;
169 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
170 sa0_param.DIMFMT = __SA_DIMFMT_1D;
174 se0_param = __gen_SE_TEMPLATE_v1 ();
175 se0_param.ICNT0 = seCnt8;
177 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
178 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
179 se0_param.DIMFMT = __SE_DIMFMT_1D;
183 sa0_param = __gen_SA_TEMPLATE_v1 ();
184 sa0_param.ICNT0 = seCnt8;
186 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
187 sa0_param.DIMFMT = __SA_DIMFMT_1D;
194 se0_param = __gen_SE_TEMPLATE_v1 ();
198 se0_param.ICNT2 = seCnt7;
201 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
202 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
203 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
204 se0_param.DIMFMT = __SE_DIMFMT_3D;
208 sa0_param = __gen_SA_TEMPLATE_v1 ();
209 sa0_param.ICNT0 = seCnt9;
213 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
214 sa0_param.DIMFMT = __SA_DIMFMT_1D;
218 se0_param = __gen_SE_TEMPLATE_v1 ();
219 se0_param.ICNT0 = seCnt4;
220 se0_param.ICNT1 = elementSize;
221 se0_param.DIM1 = seCnt4;
222 se0_param.ICNT2 = numChannels;
223 se0_param.DIM2 = numPoints;
225 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
226 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
227 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
228 se0_param.DIMFMT = __SE_DIMFMT_3D;
232 se0_param = __gen_SE_TEMPLATE_v1 ();
236 se0_param.ICNT2 = seCnt10;
239 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
240 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
241 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
242 se0_param.DIMFMT = __SE_DIMFMT_3D;
246 sa0_param = __gen_SA_TEMPLATE_v1 ();
247 sa0_param.ICNT0 = seCnt11;
251 sa0_param.VECLEN = c7x::sa_veclen<c7x::cfloat_vec>::value;
252 sa0_param.DIMFMT = __SA_DIMFMT_1D;
256 se0_param = __gen_SE_TEMPLATE_v1 ();
257 se0_param.ICNT0 = seCnt4;
258 se0_param.ICNT1 = elementSize;
259 se0_param.DIM1 = seCnt4;
260 se0_param.ICNT2 = numChannels;
261 se0_param.DIM2 = numPoints;
263 se0_param.ELETYPE = __SE_ELETYPE_32BIT_CMPLX_SWAP;
264 se0_param.TRANSPOSE = __SE_TRANSPOSE_64BIT;
265 se0_param.VECLEN = c7x::se_veclen<c7x::cfloat_vec>::value;
266 se0_param.DIMFMT = __SE_DIMFMT_3D;
281 uint32_t numChannels,
286 uint32_t numPointsPerDft;
287 uint32_t numLeadingZeros;
288 uint32_t offsetBitReverse;
289 uint32_t seCnt1, seCnt2, seCnt3, seCnt6;
291 __SE_TEMPLATE_v1 se0_param;
292 __SE_TEMPLATE_v1 se1_param;
293 __SA_TEMPLATE_v1 sa0_param;
295 cfloat* restrict pXLocal;
296 cfloat* restrict pYLocal;
297 cfloat* restrict pWLocal;
298 cfloat* restrict pY0;
299 cfloat* restrict pY1;
300 cfloat* restrict pY2;
301 cfloat* restrict pY3;
302 cfloat* restrict pY4;
303 cfloat* restrict pY5;
304 cfloat* restrict pY6;
305 cfloat* restrict pY7;
307 typedef typename c7x::cfloat_vec
CV;
313 CV vX_0, vX_N_4, vX_N_2, vX_3N_4;
314 CV vSum1, vSum2, vDiff1, vDiff2;
315 CV vTwX1, vTwX2, vTwX3;
316 CV vX0Temp, vX1Temp, vX2Temp, vX3Temp;
317 CV vX0, vX1, vX2, vX3;
318 CV vX_0_1, vX_N_4_1, vX_N_2_1, vX_3N_4_1;
319 CV vSum1_1, vSum2_1, vDiff1_1, vDiff2_1;
320 CV vX0_1, vX1_1, vX2_1, vX3_1;
321 CV vX0_2PtDft_1, vX0_2PtDft_2;
322 CV vX1_2PtDft_1, vX1_2PtDft_2;
323 CV vX2_2PtDft_1, vX2_2PtDft_2;
324 CV vX3_2PtDft_1, vX3_2PtDft_2;
325 CV vX01_lo, vX23_lo, vX01_hi, vX23_hi;
328 #ifdef FFTLIB_CHECK_PARAMS
336 numPointsPerDft = numPoints;
344 seCnt1 = numPointsPerDft >> 2;
345 seCnt2 = numPointsPerDft >> 4;
348 pXLocal = (cfloat*) pX;
349 pWLocal = (cfloat*) pW;
350 pYLocal = (cfloat*) pY;
352 while (numPointsPerDft >= 16) {
354 seCnt6 = seCnt3 * numChannels;
356 se0_param.DIM1 = seCnt1;
357 se0_param.ICNT2 = seCnt2;
359 se0_param.ICNT3 = seCnt6;
362 __SE0_OPEN ((
void *) pXLocal, se0_param);
365 se1_param.DIM1 = seCnt1;
369 se1_param.ICNT3 = seCnt6;
371 __SE1_OPEN ((
void *) pWLocal, se1_param);
374 sa0_param.DIM1 = seCnt1;
375 sa0_param.ICNT2 = seCnt2;
378 sa0_param.ICNT3 = seCnt6;
379 sa0_param.DIM3 = numPointsPerDft;
380 __SA0_OPEN (sa0_param);
383 for (k = 0; k < numPoints * numChannels; k += 32) {
386 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
387 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
388 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
389 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
391 vSum1 = vX_0 + vX_N_2;
392 vSum2 = vX_N_4 + vX_3N_4;
393 vDiff1 = vX_0 - vX_N_2;
394 vDiff2 = vX_N_4 - vX_3N_4;
396 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
397 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
398 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
400 vX0Temp = vSum1 + vSum2;
401 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
402 vX2Temp = vSum1 - vSum2;
403 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
406 vX1 = __complex_multiply (vX1Temp, vTwX1);
407 vX2 = __complex_multiply (vX2Temp, vTwX2);
408 vX3 = __complex_multiply (vX3Temp, vTwX3);
413 tmp = c7x::strm_agen<0, CV>::get_vpred ();
414 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
415 __vstore_pred (tmp, addr, vX0);
417 tmp = c7x::strm_agen<0, CV>::get_vpred ();
418 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
419 __vstore_pred (tmp, addr, vX2);
421 tmp = c7x::strm_agen<0, CV>::get_vpred ();
422 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
423 __vstore_pred (tmp, addr, vX1);
425 tmp = c7x::strm_agen<0, CV>::get_vpred ();
426 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
427 __vstore_pred (tmp, addr, vX3);
430 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
431 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
432 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
433 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
435 vSum1 = vX_0 + vX_N_2;
436 vSum2 = vX_N_4 + vX_3N_4;
437 vDiff1 = vX_0 - vX_N_2;
438 vDiff2 = vX_N_4 - vX_3N_4;
440 vTwX1 = c7x::strm_eng<1, CV>::get_adv ();
441 vTwX2 = c7x::strm_eng<1, CV>::get_adv ();
442 vTwX3 = c7x::strm_eng<1, CV>::get_adv ();
444 vX0Temp = vSum1 + vSum2;
445 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
446 vX2Temp = vSum1 - vSum2;
447 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
450 vX1 = __complex_multiply (vX1Temp, vTwX1);
451 vX2 = __complex_multiply (vX2Temp, vTwX2);
452 vX3 = __complex_multiply (vX3Temp, vTwX3);
454 tmp = c7x::strm_agen<0, CV>::get_vpred ();
455 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
456 __vstore_pred (tmp, addr, vX0);
458 tmp = c7x::strm_agen<0, CV>::get_vpred ();
459 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
460 __vstore_pred (tmp, addr, vX2);
462 tmp = c7x::strm_agen<0, CV>::get_vpred ();
463 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
464 __vstore_pred (tmp, addr, vX1);
466 tmp = c7x::strm_agen<0, CV>::get_vpred ();
467 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
468 __vstore_pred (tmp, addr, vX3);
474 numPointsPerDft >>= 2;
475 pWLocal += numPointsPerDft * 3;
481 if (numPointsPerDft == 16) {
485 __SE0_OPEN ((
void *) pXLocal, se0_param);
486 __SE1_OPEN ((
void *) (pXLocal + 8), se0_param);
490 __SA0_OPEN (sa0_param);
492 vTwX1 = *((
CVP) pWLocal);
493 vTwX2 = *((
CVP) (pWLocal + 4));
494 vTwX3 = *((
CVP) (pWLocal + 8));
497 vTwX1 =
CV (vTwX1.lo(), vTwX1.lo());
498 vTwX2 =
CV (vTwX2.lo(), vTwX2.lo());
499 vTwX3 =
CV (vTwX3.lo(), vTwX3.lo());
501 vTwX1 = (
CV) (vTwX1.lo(), vTwX1.lo());
502 vTwX2 = (
CV) (vTwX2.lo(), vTwX2.lo());
503 vTwX3 = (
CV) (vTwX3.lo(), vTwX3.lo());
506 for (k = 0; k < numPoints * numChannels; k += 32) {
507 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
508 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
509 vX_N_2 = c7x::strm_eng<1, CV>::get_adv ();
510 vX_3N_4 = c7x::strm_eng<1, CV>::get_adv ();
512 vSum1 = vX_0 + vX_N_2;
513 vSum2 = vX_N_4 + vX_3N_4;
514 vDiff1 = vX_0 - vX_N_2;
515 vDiff2 = vX_N_4 - vX_3N_4;
517 vX0Temp = vSum1 + vSum2;
518 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
519 vX2Temp = vSum1 - vSum2;
520 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
523 vX1 = __complex_multiply (vX1Temp, vTwX1);
524 vX2 = __complex_multiply (vX2Temp, vTwX2);
525 vX3 = __complex_multiply (vX3Temp, vTwX3);
528 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
530 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
531 __vstore_pred (tmp, addr,
CV (vX0.lo(), vX2.lo()));
533 tmp = c7x::strm_agen<0, CV>::get_vpred ();
534 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
535 __vstore_pred (tmp, addr,
CV (vX1.lo(), vX3.lo()));
537 tmp = c7x::strm_agen<0, CV>::get_vpred ();
538 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
539 __vstore_pred (tmp, addr,
CV (vX0.hi(), vX2.hi()));
541 tmp = c7x::strm_agen<0, CV>::get_vpred ();
542 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
543 __vstore_pred (tmp, addr,
CV (vX1.hi(), vX3.hi()));
545 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
547 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
548 __vstore_pred (tmp, addr, (
CV) (vX0.lo(), vX2.lo()));
550 tmp = c7x::strm_agen<0, CV>::get_vpred ();
551 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
552 __vstore_pred (tmp, addr, (
CV) (vX1.lo(), vX3.lo()));
554 tmp = c7x::strm_agen<0, CV>::get_vpred ();
555 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
556 __vstore_pred (tmp, addr, (
CV) (vX0.hi(), vX2.hi()));
558 tmp = c7x::strm_agen<0, CV>::get_vpred ();
559 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
560 __vstore_pred (tmp, addr, (
CV) (vX1.hi(), vX3.hi()));
572 __SE0_OPEN ((
void *) pXLocal, se0_param);
576 __SA0_OPEN (sa0_param);
578 vTwX1 = *((
CVP) pWLocal);
579 vTwX2 = *((
CVP) (pWLocal + 8));
580 vTwX3 = *((
CVP) (pWLocal + 16));
582 for (k = 0; k < numPoints * numChannels; k += 32) {
583 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
584 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
585 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
586 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
588 vSum1 = vX_0 + vX_N_2;
589 vSum2 = vX_N_4 + vX_3N_4;
590 vDiff1 = vX_0 - vX_N_2;
591 vDiff2 = vX_N_4 - vX_3N_4;
593 vX0Temp = vSum1 + vSum2;
594 vX1Temp = vDiff1 - __vcrot90sp_vv (vDiff2);
595 vX2Temp = vSum1 - vSum2;
596 vX3Temp = vDiff1 + __vcrot90sp_vv (vDiff2);
599 vX1 = __complex_multiply (vX1Temp, vTwX1);
600 vX2 = __complex_multiply (vX2Temp, vTwX2);
601 vX3 = __complex_multiply (vX3Temp, vTwX3);
603 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
605 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
606 __vstore_pred (tmp, addr, vX0);
608 tmp = c7x::strm_agen<0, CV>::get_vpred ();
609 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
610 __vstore_pred (tmp, addr, vX2);
612 tmp = c7x::strm_agen<0, CV>::get_vpred ();
613 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
614 __vstore_pred (tmp, addr, vX1);
616 tmp = c7x::strm_agen<0, CV>::get_vpred ();
617 addr = c7x::strm_agen<0, CV>::get_adv (pXLocal);
618 __vstore_pred (tmp, addr, vX3);
628 if (numPointsPerDft == 4) {
631 if (numPoints == 16) {
635 c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
636 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
637 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
638 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
639 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
640 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
641 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
642 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
644 c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
645 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
646 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
647 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
648 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
649 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
650 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
651 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
655 se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock +
657 __SE0_OPEN ((
void *) pXLocal, se0_param);
659 sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
661 __SA0_OPEN (sa0_param);
663 for (k = 0; k < numChannels << 4; k += 32) {
664 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
665 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
666 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
667 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
669 vSum1 = vX_0 + vX_N_2;
670 vSum2 = vX_N_4 + vX_3N_4;
671 vDiff1 = vX_0 - vX_N_2;
672 vDiff2 = vX_N_4 - vX_3N_4;
675 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
677 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
679 vX01_lo = c7x::as_cfloat_vec (
680 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1),
681 c7x::as_uchar_vec (vX0)));
682 vX23_lo = c7x::as_cfloat_vec (
683 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3),
684 c7x::as_uchar_vec (vX2)));
685 vX01_hi = c7x::as_cfloat_vec (
686 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1),
687 c7x::as_uchar_vec (vX0)));
688 vX23_hi = c7x::as_cfloat_vec (
689 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3),
690 c7x::as_uchar_vec (vX2)));
692 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
694 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
695 __vstore_pred (tmp, addr, vX01_lo);
697 tmp = c7x::strm_agen<0, CV>::get_vpred ();
698 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
699 __vstore_pred (tmp, addr, vX23_lo);
701 tmp = c7x::strm_agen<0, CV>::get_vpred ();
702 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
703 __vstore_pred (tmp, addr, vX01_hi);
705 tmp = c7x::strm_agen<0, CV>::get_vpred ();
706 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
707 __vstore_pred (tmp, addr, vX23_hi);
714 se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock +
716 __SE0_OPEN ((
void *) pXLocal, se0_param);
718 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
725 pY0 = (cfloat*) (pY + 0);
726 pY1 = (cfloat*) (pY + ((0x40000000u >> numLeadingZeros) << 1));
727 pY2 = (cfloat*) (pY + ((0x80000000u >> numLeadingZeros) << 1));
728 pY3 = (cfloat*) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
730 #ifdef CL7X_HE_CFLOAT_PTR_BUG
731 float *myPY0 = (
float *) pY0;
732 float *myPY1 = (
float *) pY1;
733 float *myPY2 = (
float *) pY2;
734 float *myPY3 = (
float *) pY3;
737 for (l = 0; l < numChannels; l++) {
738 for (k = 0; k < numPoints >> 2; k += 4) {
739 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
741 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
742 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
743 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
744 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
746 vSum1 = vX_0 + vX_N_2;
747 vSum2 = vX_N_4 + vX_3N_4;
748 vDiff1 = vX_0 - vX_N_2;
749 vDiff2 = vX_N_4 - vX_3N_4;
752 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
754 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
769 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse), vX0);
770 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse), vX1);
771 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse), vX2);
772 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse), vX3);
775 #ifdef CL7X_HE_CFLOAT_PTR_BUG
776 myPY0 += (numPoints << 1);
777 myPY1 += (numPoints << 1);
778 myPY2 += (numPoints << 1);
779 myPY3 += (numPoints << 1);
781 pY0 = (cfloat*) myPY0;
782 pY1 = (cfloat*) myPY1;
783 pY2 = (cfloat*) myPY2;
784 pY3 = (cfloat*) myPY3;
802 vTwX1 =
CV (twTemp, twTemp, twTemp, twTemp);
805 vTwX2 =
CV (twTemp, twTemp, twTemp, twTemp);
808 vTwX3 =
CV (twTemp, twTemp, twTemp, twTemp);
812 vTwX1 = (
CV) (twTemp, twTemp, twTemp, twTemp);
815 vTwX2 = (
CV) (twTemp, twTemp, twTemp, twTemp);
818 vTwX3 = (
CV) (twTemp, twTemp, twTemp, twTemp);
823 if (numPoints == 32) {
827 c7x::uchar_vec vXPermCtrl = c7x::uchar_vec(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
828 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
829 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
830 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
831 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
832 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
833 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
834 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
836 c7x::uchar_vec vXPermCtrl = (c7x::uchar_vec)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
837 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
838 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
839 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
840 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
841 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
842 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
843 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F);
846 CV vX01_2PtDft_1_lo, vX23_2PtDft_1_lo, vX01_2PtDft_2_lo,
848 CV vX01_2PtDft_1_hi, vX23_2PtDft_1_hi, vX01_2PtDft_2_hi,
851 se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock +
853 __SE0_OPEN ((
void *) pXLocal, se0_param);
855 sa0_param = *((__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock +
857 __SA0_OPEN (sa0_param);
859 for (k = 0; k < numChannels << 5; k += 64) {
860 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
861 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
862 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
863 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
864 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
865 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
866 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
867 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
869 vSum1 = vX_0 + vX_N_2;
870 vSum2 = vX_N_4 + vX_3N_4;
871 vDiff1 = vX_0 - vX_N_2;
872 vDiff2 = vX_N_4 - vX_3N_4;
875 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
877 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
879 vSum1_1 = vX_0_1 + vX_N_2_1;
880 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
881 vDiff1_1 = vX_0_1 - vX_N_2_1;
882 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
884 vX0Temp = vSum1_1 + vSum2_1;
885 vX1Temp = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
886 vX2Temp = vSum1_1 - vSum2_1;
887 vX3Temp = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
890 vX1_1 = __complex_multiply (vX1Temp, vTwX1);
891 vX2_1 = __complex_multiply (vX2Temp, vTwX2);
892 vX3_1 = __complex_multiply (vX3Temp, vTwX3);
894 vX0_2PtDft_1 = vX0 + vX0_1;
895 vX0_2PtDft_2 = vX0 - vX0_1;
896 vX1_2PtDft_1 = vX1 + vX1_1;
897 vX1_2PtDft_2 = vX1 - vX1_1;
898 vX2_2PtDft_1 = vX2 + vX2_1;
899 vX2_2PtDft_2 = vX2 - vX2_1;
900 vX3_2PtDft_1 = vX3 + vX3_1;
901 vX3_2PtDft_2 = vX3 - vX3_1;
904 vX01_2PtDft_1_lo = c7x::as_cfloat_vec (
905 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_1),
906 c7x::as_uchar_vec (vX0_2PtDft_1)));
907 vX23_2PtDft_1_lo = c7x::as_cfloat_vec (
908 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_1),
909 c7x::as_uchar_vec (vX2_2PtDft_1)));
910 vX01_2PtDft_2_lo = c7x::as_cfloat_vec (
911 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_2),
912 c7x::as_uchar_vec (vX0_2PtDft_2)));
913 vX23_2PtDft_2_lo = c7x::as_cfloat_vec (
914 __vpermll_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_2),
915 c7x::as_uchar_vec (vX2_2PtDft_2)));
916 vX01_2PtDft_1_hi = c7x::as_cfloat_vec (
917 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_1),
918 c7x::as_uchar_vec (vX0_2PtDft_1)));
919 vX23_2PtDft_1_hi = c7x::as_cfloat_vec (
920 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_1),
921 c7x::as_uchar_vec (vX2_2PtDft_1)));
922 vX01_2PtDft_2_hi = c7x::as_cfloat_vec (
923 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX1_2PtDft_2),
924 c7x::as_uchar_vec (vX0_2PtDft_2)));
925 vX23_2PtDft_2_hi = c7x::as_cfloat_vec (
926 __vpermhh_yvvv (vXPermCtrl, c7x::as_uchar_vec (vX3_2PtDft_2),
927 c7x::as_uchar_vec (vX2_2PtDft_2)));
929 __vpred tmp = c7x::strm_agen<0, CV>::get_vpred ();
931 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
932 __vstore_pred (tmp, addr, vX01_2PtDft_1_lo);
934 tmp = c7x::strm_agen<0, CV>::get_vpred ();
935 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
936 __vstore_pred (tmp, addr, vX23_2PtDft_1_lo);
938 tmp = c7x::strm_agen<0, CV>::get_vpred ();
939 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
940 __vstore_pred (tmp, addr, vX01_2PtDft_2_lo);
942 tmp = c7x::strm_agen<0, CV>::get_vpred ();
943 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
944 __vstore_pred (tmp, addr, vX23_2PtDft_2_lo);
946 tmp = c7x::strm_agen<0, CV>::get_vpred ();
947 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
948 __vstore_pred (tmp, addr, vX01_2PtDft_1_hi);
950 tmp = c7x::strm_agen<0, CV>::get_vpred ();
951 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
952 __vstore_pred (tmp, addr, vX23_2PtDft_1_hi);
954 tmp = c7x::strm_agen<0, CV>::get_vpred ();
955 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
956 __vstore_pred (tmp, addr, vX01_2PtDft_2_hi);
958 tmp = c7x::strm_agen<0, CV>::get_vpred ();
959 addr = c7x::strm_agen<0, CV>::get_adv (pYLocal);
960 __vstore_pred (tmp, addr, vX23_2PtDft_2_hi);
969 se0_param = *((__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock +
971 __SE0_OPEN ((
void *) pXLocal, se0_param);
973 numLeadingZeros = __norm ((int32_t) (numPoints - 1)) + 1;
984 pY0 = (cfloat*) (pY + (0x00000000u));
985 pY1 = (cfloat*) (pY + ((0x80000000u >> numLeadingZeros) << 1));
986 pY2 = (cfloat*) (pY + ((0x20000000u >> numLeadingZeros) << 1));
987 pY3 = (cfloat*) (pY + ((0xA0000000u >> numLeadingZeros) << 1));
988 pY4 = (cfloat*) (pY + ((0x40000000u >> numLeadingZeros) << 1));
989 pY5 = (cfloat*) (pY + ((0xC0000000u >> numLeadingZeros) << 1));
990 pY6 = (cfloat*) (pY + ((0x60000000u >> numLeadingZeros) << 1));
991 pY7 = (cfloat*) (pY + ((0xE0000000u >> numLeadingZeros) << 1));
993 #ifdef CL7X_HE_CFLOAT_PTR_BUG
994 float *myPY0 = (
float *) pY0;
995 float *myPY1 = (
float *) pY1;
996 float *myPY2 = (
float *) pY2;
997 float *myPY3 = (
float *) pY3;
998 float *myPY4 = (
float *) pY4;
999 float *myPY5 = (
float *) pY5;
1000 float *myPY6 = (
float *) pY6;
1001 float *myPY7 = (
float *) pY7;
1004 for (l = 0; l < numChannels; l++) {
1005 for (k = 0; k < numPoints >> 2; k += 8) {
1006 offsetBitReverse = __bit_reverse (k) >> numLeadingZeros;
1008 vX_0 = c7x::strm_eng<0, CV>::get_adv ();
1009 vX_0_1 = c7x::strm_eng<0, CV>::get_adv ();
1010 vX_N_4 = c7x::strm_eng<0, CV>::get_adv ();
1011 vX_N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
1012 vX_N_2 = c7x::strm_eng<0, CV>::get_adv ();
1013 vX_N_2_1 = c7x::strm_eng<0, CV>::get_adv ();
1014 vX_3N_4 = c7x::strm_eng<0, CV>::get_adv ();
1015 vX_3N_4_1 = c7x::strm_eng<0, CV>::get_adv ();
1017 vSum1 = vX_0 + vX_N_2;
1018 vSum2 = vX_N_4 + vX_3N_4;
1019 vDiff1 = vX_0 - vX_N_2;
1020 vDiff2 = vX_N_4 - vX_3N_4;
1022 vX0 = vSum1 + vSum2;
1023 vX1 = vDiff1 - __vcrot90sp_vv (vDiff2);
1024 vX2 = vSum1 - vSum2;
1025 vX3 = vDiff1 + __vcrot90sp_vv (vDiff2);
1027 vSum1_1 = vX_0_1 + vX_N_2_1;
1028 vSum2_1 = vX_N_4_1 + vX_3N_4_1;
1029 vDiff1_1 = vX_0_1 - vX_N_2_1;
1030 vDiff2_1 = vX_N_4_1 - vX_3N_4_1;
1032 vX0Temp = vSum1_1 + vSum2_1;
1033 vX1Temp = vDiff1_1 - __vcrot90sp_vv (vDiff2_1);
1034 vX2Temp = vSum1_1 - vSum2_1;
1035 vX3Temp = vDiff1_1 + __vcrot90sp_vv (vDiff2_1);
1038 vX1_1 = __complex_multiply (vX1Temp, vTwX1);
1039 vX2_1 = __complex_multiply (vX2Temp, vTwX2);
1040 vX3_1 = __complex_multiply (vX3Temp, vTwX3);
1042 vX0_2PtDft_1 = vX0 + vX0_1;
1043 vX0_2PtDft_2 = vX0 - vX0_1;
1044 vX1_2PtDft_1 = vX1 + vX1_1;
1045 vX1_2PtDft_2 = vX1 - vX1_1;
1046 vX2_2PtDft_1 = vX2 + vX2_1;
1047 vX2_2PtDft_2 = vX2 - vX2_1;
1048 vX3_2PtDft_1 = vX3 + vX3_1;
1049 vX3_2PtDft_2 = vX3 - vX3_1;
1051 __vstore_reverse_bit ((
CVP) (pY0 + offsetBitReverse),
1053 __vstore_reverse_bit ((
CVP) (pY1 + offsetBitReverse),
1055 __vstore_reverse_bit ((
CVP) (pY2 + offsetBitReverse),
1057 __vstore_reverse_bit ((
CVP) (pY3 + offsetBitReverse),
1059 __vstore_reverse_bit ((
CVP) (pY4 + offsetBitReverse),
1061 __vstore_reverse_bit ((
CVP) (pY5 + offsetBitReverse),
1063 __vstore_reverse_bit ((
CVP) (pY6 + offsetBitReverse),
1065 __vstore_reverse_bit ((
CVP) (pY7 + offsetBitReverse),
1069 #ifdef CL7X_HE_CFLOAT_PTR_BUG
1070 myPY0 += (numPoints << 1);
1071 myPY1 += (numPoints << 1);
1072 myPY2 += (numPoints << 1);
1073 myPY3 += (numPoints << 1);
1074 myPY4 += (numPoints << 1);
1075 myPY5 += (numPoints << 1);
1076 myPY6 += (numPoints << 1);
1077 myPY7 += (numPoints << 1);
1079 pY0 = (cfloat*) myPY0;
1080 pY1 = (cfloat*) myPY1;
1081 pY2 = (cfloat*) myPY2;
1082 pY3 = (cfloat*) myPY3;
1083 pY4 = (cfloat*) myPY4;
1084 pY5 = (cfloat*) myPY5;
1085 pY6 = (cfloat*) myPY6;
1086 pY7 = (cfloat*) myPY7;
1107 #if (!defined(FFTLIB_REMOVE_CHECK_PARAMS) && \
1108 !defined(FFTLIB_FFT1DBATCHED_I32FC_C32FC_O32FC_REMOVE_CHECK_PARAMS)) || \
1109 (defined(FFTLIB_CHECK_PARAMS)) || \
1110 (defined(FFTLIB_FFT1DBATCHED_I32FC_C32FC_O32FC_CHECK_PARAMS))
1120 uint32_t numChannels,
1125 if ((pX == NULL) || (pW == NULL) || (pY == NULL) || (pBlock == NULL)) {
1128 else if (bufParamsX->
dim_x != bufParamsY->
dim_x) {
1131 else if (bufParamsX->
dim_x < numPoints * numChannels * 2) {
1138 else if (bufParamsX->
dim_x < 64 * 2) {
1141 else if (bufParamsW->
dim_x != numPoints * 2) {
1149 else if (((uint64_t) pX) & 0xFu) {
1157 if (numPoints & (1u << k)) {
1162 if ((1u << k) != numPoints) {
1166 if ((numChannels != 1) && (numChannels != 2) && (numChannels != 4) &&
1167 (numChannels != 8) && (numChannels != 16)) {
FFTLIB_STATUS_NAME
The enumeration of all status codes.
@ FFTLIB_ERR_INVALID_TYPE
@ FFTLIB_ERR_NULL_POINTER
@ FFTLIB_ERR_INVALID_DIMENSION
@ FFTLIB_ERR_NOT_ALIGNED_PTRS_STRIDES
float FFTLIB_F32
Single precision floating point.
#define SA_LOOP4_PARAM_OFFSET
#define SE_LOOP6_PARAM_OFFSET
#define SE_LOOP4_PARAM_OFFSET
#define SA_LOOP6_PARAM_OFFSET
#define SA_LOOP2_PARAM_OFFSET
#define SE_LOOP7_PARAM_OFFSET
#define SE_LOOP1_PARAM_OFFSET
#define SA_LOOP1_PARAM_OFFSET
#define SE_TWID_PARAM_OFFSET
#define SE_LOOP2_PARAM_OFFSET
#define SE_LOOP3_PARAM_OFFSET
#define SA_LOOP3_PARAM_OFFSET
#define SE_LOOP5_PARAM_OFFSET
FFTLIB_STATUS FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_init(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function should be called before the FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_kernel function is ca...
FFTLIB_STATUS FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_kernel(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function is the main kernel compute function.
FFTLIB_STATUS FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_checkParams(FFTLIB_F32 *pX, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_F32 *pW, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_F32 *pY, FFTLIB_bufParams1D_t *bufParamsY, uint32_t numPoints, uint32_t numChannels, void *pBlock)
This function checks the validity of the parameters passed to FFTLIB_fft1dBatched_i32fc_c32fc_o32fc_i...
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.