FFTLIB User Guide
FFTLIB_FFT_dftSmall_ixX_cxX_oxX_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 
38 /*******************************************************************************
39  *
40  * INCLUDES
41  *
42  ******************************************************************************/
43 
44 #include "../FFTLIB_FFT_dftSmall_ixX_cxX_oxX_priv.h"
45 #include "../../../common/c71/FFTLIB_inlines.h"
46 
47 
48 /*******************************************************************************
49  *
50  * DEFINES
51  *
52  ******************************************************************************/
53 #define SE_PARAM_BASE (0x0000)
54 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
55 #define SE_SE1_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_SIZE)
56 #define SE_SA0_PARAM_OFFSET (SE_SE1_PARAM_OFFSET + SE_PARAM_SIZE)
57 #define NUMBLOCKS_OFFSET (SE_SA0_PARAM_OFFSET + SE_PARAM_SIZE)
58 
59 #define MMA_PARAM_BASE (NUMBLOCKS_OFFSET + sizeof(int32_t))
60 #define MMA_PARAM_OFFSET (MMA_PARAM_BASE)
61 #define MMA_CONFIGREG_OFFSET (MMA_PARAM_OFFSET)
62 #define MMA_OFFSETREG_OFFSET (MMA_CONFIGREG_OFFSET + sizeof(FFTLIB_MMA_CONFIG_REG))
63 
65  const FFTLIB_bufParams1D_t *bufParamsX,
66  const FFTLIB_bufParams1D_t *bufParamsY,
67  const FFTLIB_bufParams1D_t *bufParamsW,
68  uint32_t fftSize,
69  uint32_t batchSize,
70  uint64_t *archCycles,
71  uint64_t *estCycles)
72 {
73  // determine the mmaWidth based upon output data type
74  int32_t MMA_SIZE;
75  uint32_t batchSizePerRow, numRows, numBlocks;
76  uint64_t startupCycles, teardownCycles, computeCycles, operationCycles, overheadCycles;
77  uint64_t storeLatency = 24;
78 
79  if (bufParamsX->data_type == FFTLIB_INT8) {
80  MMA_SIZE = FFTLIB_MMA_SIZE_8_BIT;
81  } else if (bufParamsX->data_type == FFTLIB_INT16) {
82  MMA_SIZE = FFTLIB_MMA_SIZE_16_BIT;
83  } else {
84  MMA_SIZE = FFTLIB_MMA_SIZE_32_BIT;
85  }
86 
87  batchSizePerRow = MMA_SIZE/(fftSize*2);
88  numRows = batchSize/batchSizePerRow;
89  numRows = numRows*batchSizePerRow == batchSize ?
90  numRows : numRows + 1;
91  /* number of rows processed per block is aligned with MMA block size
92  * (rather than smaller size) because of better assembly code accounting
93  * for MMA transfer-receive latency */
94  numBlocks = numRows/MMA_SIZE;
95  numBlocks = numBlocks*MMA_SIZE == numRows ?
96  numBlocks : numBlocks + 1;
97 
98  startupCycles =
99  9 + // kernel function call
100  7 + // load numBlocks, interleaving check
101  5 + // load SE1 params
102  17; // SEOPEN latency on SE1 (the first SE needed)
103 
104  teardownCycles =
105  2; // close SE and return
106 
107  computeCycles = numBlocks*MMA_SIZE;
108 
109  operationCycles =
110  MMA_SIZE + // Load DFT matrix into B
111  computeCycles +
112  MMA_SIZE + // store last block output
113  storeLatency;
114 
115  overheadCycles =
116  2; // if check for numBlocks > 1
117 
118  *archCycles = startupCycles + operationCycles + teardownCycles;
119  *estCycles = startupCycles + operationCycles + overheadCycles + teardownCycles;
120 
121  return;
122 }
123 
124 template <uint32_t dataType>
126  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs * pKerInitArgs,
127  FFTLIB_bufParams1D_t * bufParamsX,
128  FFTLIB_bufParams1D_t * bufParamsW,
129  FFTLIB_bufParams1D_t * bufParamsY)
130 {
131  uint32_t fftSize = pKerInitArgs->fftSize;
132  uint32_t batchSize = pKerInitArgs->batchSize;
133  uint32_t MMA_SIZE;
134  FFTLIB_STATUS status = FFTLIB_SUCCESS;
135 
136  if (dataType == FFTLIB_INT32) {
137  MMA_SIZE = FFTLIB_MMA_SIZE_32_BIT;
138  } else if (dataType == FFTLIB_INT16) {
139  MMA_SIZE = FFTLIB_MMA_SIZE_16_BIT;
140  } else {
142  }
143 
144  if (fftSize > (MMA_SIZE >> 1)) {
146  }
147 
148  if (status == FFTLIB_SUCCESS) {
149  bufParamsX->dim_x = batchSize*fftSize*2;
150  bufParamsY->dim_x = batchSize*fftSize*2;
151  bufParamsW->dim_x = MMA_SIZE*MMA_SIZE;
152  }
153 
154  return status;
155 }
156 
158  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs * pKerInitArgs,
159  FFTLIB_bufParams1D_t * bufParamsX,
160  FFTLIB_bufParams1D_t * bufParamsW,
161  FFTLIB_bufParams1D_t * bufParamsY);
163  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs * pKerInitArgs,
164  FFTLIB_bufParams1D_t * bufParamsX,
165  FFTLIB_bufParams1D_t * bufParamsW,
166  FFTLIB_bufParams1D_t * bufParamsY);
167 
168 template <typename dataType>
170  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs * pKerInitArgs,
171  void *restrict pW,
172  const FFTLIB_bufParams1D_t * bufParamsW)
173 {
174  FFTLIB_STATUS status = FFTLIB_SUCCESS;
175  uint32_t j, k, l, n;
176  uint32_t batchSizePerRow;
177  const FFTLIB_D64 PI = 3.141592654;
178  FFTLIB_D64 twF2sScale;
179  int32_t index;
180  FFTLIB_bufParams1D_t bufParamsXLocal, bufParamsYLocal, bufParamsWLocal;
181  dataType *pWLocal = (dataType *)pW;
182  uint32_t fftSize = pKerInitArgs->fftSize;
183  uint32_t batchSize = pKerInitArgs->batchSize;
184  uint32_t interleave = pKerInitArgs->interleave;
185  uint32_t MMA_SIZE;
186 
187  if (sizeof(dataType) == 4) {
189  &bufParamsXLocal,
190  &bufParamsWLocal,
191  &bufParamsYLocal);
192  MMA_SIZE = FFTLIB_MMA_SIZE_32_BIT;
193  } else if (sizeof(dataType) == 2) {
195  &bufParamsXLocal,
196  &bufParamsWLocal,
197  &bufParamsYLocal);
198  MMA_SIZE = FFTLIB_MMA_SIZE_16_BIT;
199  } else {
201  }
202 
203  if (bufParamsW->dim_x != bufParamsWLocal.dim_x) {
205  }
206 
207  if (status == FFTLIB_SUCCESS) {
208  batchSizePerRow = (MMA_SIZE >> 1)/fftSize;
209  for (j = 0; j < MMA_SIZE; j++) {
210  for (k = 0; k < MMA_SIZE; k++) {
211  pWLocal[j*MMA_SIZE+k] = 0;
212  }
213  }
214  twF2sScale = ((uint32_t)1 << ((uint32_t)(sizeof(dataType)*8-2))) - 1;
215  twF2sScale += ((uint32_t)1 << ((uint32_t)(sizeof(dataType)*8-2)));
216  twF2sScale += 0.5;
217  if (interleave) {
218  for (j = 0; j < batchSizePerRow; j++) {
219  for ( k = 0; k < fftSize; k++) {
220  for (l = 0; l < batchSizePerRow; l++) {
221  index = (j*fftSize+k)*2*MMA_SIZE+l*fftSize*2;
222  for (n = 0; n < fftSize; n++) {
223 
224  /* Fill only block diagonal entries */
225  if (l == j) {
226  pWLocal[index+n*2] =
227  FFTLIB_UTIL_cos_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
228  pWLocal[index+n*2+1] =
229  -FFTLIB_UTIL_sin_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
230  pWLocal[index+MMA_SIZE+n*2] =
231  FFTLIB_UTIL_sin_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
232  pWLocal[index+MMA_SIZE+n*2+1] =
233  FFTLIB_UTIL_cos_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
234  }
235  }
236  }
237  }
238  }
239  } else {
240  for (j = 0; j < batchSizePerRow; j++) {
241  for ( k = 0; k < fftSize; k++) {
242  for (l = 0; l < batchSize; l++) {
243  index = (j*fftSize*2+k)*MMA_SIZE+l*fftSize*2;
244  for (n = 0; n < fftSize; n++) {
245 
246  /* Fill only block diagonal entries */
247  if (l == j) {
248  pWLocal[index+n] =
249  FFTLIB_UTIL_cos_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
250  pWLocal[index+fftSize+n] =
251  -FFTLIB_UTIL_sin_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
252  pWLocal[index+fftSize*MMA_SIZE+n] =
253  FFTLIB_UTIL_sin_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
254  pWLocal[index+fftSize*MMA_SIZE+fftSize+n] =
255  FFTLIB_UTIL_cos_i64f_oxX<dataType>(2*PI*k*n/fftSize, twF2sScale);
256  }
257  }
258  }
259  }
260  }
261  }
262  }
263  return status;
264 }
265 
267  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs * pKerInitArgs,
268  void *restrict pW,
269  const FFTLIB_bufParams1D_t * bufParamsW);
270 
272  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs * pKerInitArgs,
273  void *restrict pW,
274  const FFTLIB_bufParams1D_t * bufParamsW);
275 
276 template <uint32_t dataType>
278  FFTLIB_kernelHandle handle,
279  const FFTLIB_bufParams1D_t *bufParamsX,
280  const FFTLIB_bufParams1D_t *bufParamsW,
281  const FFTLIB_bufParams1D_t *bufParamsY,
282  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs)
283 {
284  FFTLIB_STATUS status = FFTLIB_SUCCESS;
285  __SE_TEMPLATE_v1 se0Params;
286  __SE_TEMPLATE_v1 se1Params;
287  __SA_TEMPLATE_v1 sa0Params;
288  uint32_t batchSizePerRow, numRows, numBlocks;
289  FFTLIB_MMA_CONFIG_REG mmaConfig;
290  __HWA_OFFSET_REG mmaOffset;
293  uint8_t *pBlock = pKerPrivArgs->bufPblock;
294  uint32_t fftSize = pKerInitArgs->fftSize;
295  uint32_t batchSize = pKerInitArgs->batchSize;
296  uint32_t shift = pKerInitArgs->shiftVector[0];
297  uint32_t MMA_SIZE;
298  __SE_ELETYPE SE_ELETYPE;
299  __SE_VECLEN SE_VECLEN;
300  FFTLIB_MMA_CONFIG_REG mmaConfigDefault;
301 
302  if (dataType == FFTLIB_INT32) {
303  MMA_SIZE = FFTLIB_MMA_SIZE_32_BIT;
304  SE_ELETYPE = __SE_ELETYPE_32BIT;
305  SE_VECLEN = __SE_VECLEN_16ELEMS;
306  mmaConfigDefault = configRegisterStruct_i32s_i32s_o32s;
307  } else if (dataType == FFTLIB_INT16) {
308  MMA_SIZE = FFTLIB_MMA_SIZE_16_BIT;
309  SE_ELETYPE = __SE_ELETYPE_16BIT;
310  SE_VECLEN = __SE_VECLEN_32ELEMS;
311  mmaConfigDefault = configRegisterStruct_i16s_i16s_o16s;
312  } else {
314  }
315 
316  if (status == FFTLIB_SUCCESS) {
317  batchSizePerRow = (MMA_SIZE >> 1)/fftSize;
318  numRows = batchSize/batchSizePerRow;
319  numRows = numRows*batchSizePerRow == batchSize ?
320  numRows : numRows + 1;
321  /* number of rows processed per block is aligned with MMA block size
322  * (rather than smaller size) because of better assembly code accounting
323  * for MMA transfer-receive latency */
324  numBlocks = numRows/MMA_SIZE;
325  numBlocks = numBlocks*MMA_SIZE == numRows ?
326  numBlocks : numBlocks + 1;
327 
328  /**********************************************************************/
329  /* Prepare streaming engine 0 to fetch input data */
330  /**********************************************************************/
331  se0Params = __gen_SE_TEMPLATE_v1();
332  se0Params.ICNT0 = batchSizePerRow*fftSize*2;
333  se0Params.ICNT1 = MMA_SIZE*numBlocks;
334  se0Params.DIM1 = batchSizePerRow*fftSize*2;
335 
336  se0Params.DECDIM1_WIDTH = fftSize*2*batchSize;
337  se0Params.DECDIM1 = __SE_DECDIM_DIM1;
338  se0Params.ELETYPE = SE_ELETYPE;
339  se0Params.VECLEN = SE_VECLEN;
340  se0Params.DIMFMT = __SE_DIMFMT_2D;
341 
342  /**********************************************************************/
343  /* Prepare streaming engine 1 to fetch DFT matrix */
344  /**********************************************************************/
345  se1Params = __gen_SE_TEMPLATE_v1();
346  se1Params.ICNT0 = MMA_SIZE;
347  se1Params.ICNT1 = MMA_SIZE;
348  se1Params.DIM1 = MMA_SIZE;
349  /* Load two copies: one into B-back and the other into B-fore */
350  se1Params.ICNT2 = 2;
351  se1Params.DIM2 = 0;
352 
353  se1Params.ELETYPE = SE_ELETYPE;
354  se1Params.VECLEN = SE_VECLEN;
355  se1Params.DIMFMT = __SE_DIMFMT_3D;
356 
357  /**********************************************************************/
358  /* Prepare SA template to store output */
359  /**********************************************************************/
360  sa0Params = __gen_SA_TEMPLATE_v1();
361  sa0Params.ICNT0 = fftSize*2*batchSizePerRow*FFTLIB_sizeof(bufParamsY->data_type);
362  sa0Params.ICNT1 = numBlocks*MMA_SIZE;
363  sa0Params.DIM1 = batchSizePerRow*fftSize*2*FFTLIB_sizeof(bufParamsY->data_type);
364  sa0Params.DECDIM1_WIDTH = fftSize*2*batchSize*FFTLIB_sizeof(bufParamsY->data_type);
365 
366  sa0Params.DECDIM1 = __SA_DECDIM_DIM1;
367  sa0Params.VECLEN = __SA_VECLEN_64ELEMS;
368  sa0Params.DIMFMT = __SA_DIMFMT_2D;
369 
370  /**********************************************************************/
371  /* Prepare MMA */
372  /**********************************************************************/
373  mmaConfig = mmaConfigDefault;
374  mmaOffset = offsetRegStruct_zeros;
375 
376  mmaConfig.B_BSWPER = 0xFFFFFFFFu;
377 
378  mmaConfig.C_BSWPER = 0xFFFFFFFFu;
379  mmaConfig.C_CWSWPER = MMA_SIZE;
380  mmaConfig.C_CRSWPER = MMA_SIZE;
381  mmaConfig.C_CRRSTPER = MMA_SIZE;
382  mmaConfig.C_CWRSTPER = MMA_SIZE;
383 
384  mmaConfig.X_SHIFT = shift;
385  mmaConfig.X_CSWPER = MMA_SIZE;
386  mmaConfig.X_CRRSTPER = MMA_SIZE;
387 
388  *((__SE_TEMPLATE_v1*)((uint8_t *)pBlock + SE_SE0_PARAM_OFFSET)) = se0Params;
389  *((__SE_TEMPLATE_v1*)((uint8_t *)pBlock + SE_SE1_PARAM_OFFSET)) = se1Params;
390  *((__SA_TEMPLATE_v1*)((uint8_t *)pBlock + SE_SA0_PARAM_OFFSET)) = sa0Params;
391  *((FFTLIB_MMA_CONFIG_REG *)((uint8_t *)pBlock + MMA_CONFIGREG_OFFSET)) = mmaConfig;
392  *((__HWA_OFFSET_REG *)((uint8_t *)pBlock + MMA_OFFSETREG_OFFSET)) = mmaOffset;
393 
394  *((int32_t *)((uint8_t *)pBlock + NUMBLOCKS_OFFSET)) = numBlocks;
395  }
396 
397  return status;
398 }
399 
401  FFTLIB_kernelHandle handle,
402  const FFTLIB_bufParams1D_t *bufParamsX,
403  const FFTLIB_bufParams1D_t *bufParamsW,
404  const FFTLIB_bufParams1D_t *bufParamsY,
405  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs);
406 
408  FFTLIB_kernelHandle handle,
409  const FFTLIB_bufParams1D_t *bufParamsX,
410  const FFTLIB_bufParams1D_t *bufParamsW,
411  const FFTLIB_bufParams1D_t *bufParamsY,
412  const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs);
413 
414 /*******************************************************************************
415  *
416  * EXECUTION FUNCTIONS
417  *
418  ******************************************************************************/
419 
420 template <typename dataType, uint32_t MMA_SIZE>
422  FFTLIB_kernelHandle handle,
423  const void *restrict pX,
424  const void *restrict pW,
425  void *restrict pY)
426 {
427  __SE_TEMPLATE_v1 se0Params;
428  __SE_TEMPLATE_v1 se1Params;
429  __SA_TEMPLATE_v1 sa0Params;
430  FFTLIB_MMA_CONFIG_REG mmaConfig;
431  __HWA_OFFSET_REG mmaOffset;
432  int32_t numBlocks;
433  int32_t loopCount;
436  uint8_t *pBlock = pKerPrivArgs->bufPblock;
437 
438  se1Params = *((__SE_TEMPLATE_v1*)((uint8_t *)pBlock + SE_SE1_PARAM_OFFSET));
439  __SE1_OPEN((const void *)pW, se1Params);
440 
441  mmaConfig = *((FFTLIB_MMA_CONFIG_REG *)((uint8_t *)pBlock + MMA_CONFIGREG_OFFSET));
442  mmaOffset = *((__HWA_OFFSET_REG *)((uint8_t *)pBlock + MMA_OFFSETREG_OFFSET));
443  __HWAOPEN(mmaConfig, mmaOffset, __MMA_OPEN_FSM_RESET);
444  __HWAADV();
445 
446  se0Params = *((__SE_TEMPLATE_v1*)((uint8_t *)pBlock + SE_SE0_PARAM_OFFSET));
447  __SE0_OPEN((const void *)pX, se0Params);
448 
449  sa0Params = *((__SA_TEMPLATE_v1*)((uint8_t *)pBlock + SE_SA0_PARAM_OFFSET));
450  __SA0_OPEN(sa0Params);
451 
452  numBlocks = *((int32_t *)((uint8_t *)pBlock + NUMBLOCKS_OFFSET));
453 
454  FFTLIB_UTIL_SE1Bload(MMA_SIZE);
455 
457 
458  loopCount = (numBlocks-1)*MMA_SIZE;
459 
460  if (numBlocks > 1) {
461  _nassert(loopCount > 0);
462  FFTLIB_UTIL_SE0AloadComputeCSA0Cstore(loopCount, (uint8_t *)pY);
463 
464  FFTLIB_UTIL_SA0Cstore_unroll<MMA_SIZE>(MMA_SIZE, (uint8_t *)pY);
465  } else {
466  FFTLIB_UTIL_SA0Cstore_unroll<MMA_SIZE>(MMA_SIZE, (uint8_t *)pY);
467  }
468 
469  __SE0_CLOSE();
470  __SE1_CLOSE();
471  __SA0_CLOSE();
472  __HWACLOSE(0);
473 
474  return FFTLIB_SUCCESS;
475 }
476 
478  FFTLIB_kernelHandle handle,
479  const void *restrict pX,
480  const void *restrict pW,
481  void *restrict pY);
482 
484  FFTLIB_kernelHandle handle,
485  const void *restrict pX,
486  const void *restrict pW,
487  void *restrict pY);
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_exec_ci< int32_t, FFTLIB_MMA_SIZE_32_BIT >(FFTLIB_kernelHandle handle, const void *restrict pX, const void *restrict pW, void *restrict pY)
FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_exec_ci(FFTLIB_kernelHandle handle, const void *restrict pX, const void *restrict pW, void *restrict pY)
This function is the main execution function for the C7x implementation of the kernel....
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_getSizes< FFTLIB_INT32 >(const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_bufParams1D_t *bufParamsY)
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_getSizes< FFTLIB_INT16 >(const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_bufParams1D_t *bufParamsY)
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_twGen< int16_t >(const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs, void *restrict pW, const FFTLIB_bufParams1D_t *bufParamsW)
FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_init_ci(FFTLIB_kernelHandle handle, const FFTLIB_bufParams1D_t *bufParamsX, const FFTLIB_bufParams1D_t *bufParamsW, const FFTLIB_bufParams1D_t *bufParamsY, const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_init_ci< FFTLIB_INT32 >(FFTLIB_kernelHandle handle, const FFTLIB_bufParams1D_t *bufParamsX, const FFTLIB_bufParams1D_t *bufParamsW, const FFTLIB_bufParams1D_t *bufParamsY, const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs)
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_exec_ci< int16_t, FFTLIB_MMA_SIZE_16_BIT >(FFTLIB_kernelHandle handle, const void *restrict pX, const void *restrict pW, void *restrict pY)
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_init_ci< FFTLIB_INT16 >(FFTLIB_kernelHandle handle, const FFTLIB_bufParams1D_t *bufParamsX, const FFTLIB_bufParams1D_t *bufParamsW, const FFTLIB_bufParams1D_t *bufParamsY, const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs)
FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_twGen(const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs, void *restrict pW, const FFTLIB_bufParams1D_t *bufParamsW)
This is a utility function that generates the DFT computational matrix into the provided buffer.
template FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_twGen< int32_t >(const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs, void *restrict pW, const FFTLIB_bufParams1D_t *bufParamsW)
static void FFTLIB_UTIL_SE0AloadComputeCSA0Cstore(int32_t numRows, uint8_t *CbackDestination)
static void FFTLIB_UTIL_SE0AloadComputeC(int32_t numRows)
use streaming engine 0 to load numRows of A compute C(row) = A(row) x Bfore(:,:)
static void FFTLIB_UTIL_SE1Bload(int32_t numRows)
load numRows of B into the Bload buffer
@ FFTLIB_INT16
@ FFTLIB_INT8
@ FFTLIB_INT32
static int32_t FFTLIB_sizeof(uint32_t type)
Inline function returns number of bytes per element given a type of _FFTLIB_data_type_e.
double FFTLIB_D64
Double precision floating point.
Definition: FFTLIB_types.h:168
void * FFTLIB_kernelHandle
Handle type for FFTLIB operations.
Definition: FFTLIB_types.h:217
FFTLIB_STATUS_NAME
The enumeration of all status codes.
Definition: FFTLIB_types.h:172
@ FFTLIB_ERR_NOT_IMPLEMENTED
Definition: FFTLIB_types.h:179
@ FFTLIB_ERR_INVALID_DIMENSION
Definition: FFTLIB_types.h:177
@ FFTLIB_SUCCESS
Definition: FFTLIB_types.h:173
const __HWA_OFFSET_REG offsetRegStruct_zeros
const FFTLIB_MMA_CONFIG_REG configRegisterStruct_i16s_i16s_o16s
const FFTLIB_MMA_CONFIG_REG configRegisterStruct_i32s_i32s_o32s
#define FFTLIB_MMA_CONFIG_REG
#define FFTLIB_MMA_SIZE_16_BIT
type is 16-bit integers
#define FFTLIB_MMA_SIZE_8_BIT
MMA size as a function of precision.
#define FFTLIB_MMA_SIZE_32_BIT
type is 32-bit integers
FFTLIB_STATUS FFTLIB_FFT_dftSmall_ixX_cxX_oxX_getSizes(const FFTLIB_FFT_dftSmall_ixX_cxX_oxX_InitArgs *pKerInitArgs, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_bufParams1D_t *bufParamsY)
This is a query function to calculate the sizes of input, output and the DFT computational matrix buf...
void FFTLIB_FFT_dftSmall_ixX_cxX_oxX_perfEst(FFTLIB_kernelHandle handle, const FFTLIB_bufParams1D_t *bufParamsX, const FFTLIB_bufParams1D_t *bufParamsY, const FFTLIB_bufParams1D_t *bufParamsW, uint32_t fftSize, uint32_t batchSize, uint64_t *archCycles, uint64_t *estCycles)
This is a utility function that gives an estimate of the cycles consumed for the kernel execution.
Structure containing the parameters for DFT computation.
uint32_t interleave
Flag to indicate if the real and imaginary parts of data are interleaved or not. A value of 1 indicat...
uint32_t shiftVector[FFTLIB_FFT_DFTSMALL_IXX_CXX_OXX_NUMSHIFTS]
Array containing the bit-shift values to be used for internal computation.
uint32_t fftSize
Size of each channel's data in terms of number of complex points.
uint32_t batchSize
Size of the batch in terms of number of channels of DFT's
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[FFTLIB_FFT_DFTSMALL_IXX_CXX_OXX_PBLOCK_SIZE]
Array to store the configuration prepared by FFTLIB_FFT_dftSmall_ixX_cxX_oxX_init_ci that will be ret...
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.