44 #include "../FFTLIB_FFT_dftLarge_ixX_cxX_oxX_priv.h"
45 #include "../../../common/c71/FFTLIB_inlines.h"
64 int32_t startupCycles, teardownCycles;
66 bufParamsMatrixData.
dim_x = fftSize*2;
67 bufParamsMatrixData.
dim_y = batchSize;
70 bufParamsMatrixW.
dim_x = fftSize*2;
71 bufParamsMatrixW.
dim_y = fftSize*2;
90 *archCycles = startupCycles + *archCycles + teardownCycles;
91 *estCycles = startupCycles + *estCycles + teardownCycles;
96 template <u
int32_t dataType>
103 uint32_t fftSize = pKerInitArgs->
fftSize;
104 uint32_t batchSize = pKerInitArgs->
batchSize;
116 if (fftSize <= MMA_SIZE/2) {
121 bufParamsX->
dim_x = batchSize*fftSize*2;
122 bufParamsY->
dim_x = batchSize*fftSize*2;
123 bufParamsW->
dim_x = fftSize*2*fftSize*2;
142 template <
typename dataType>
153 dataType *pWLocal = (dataType *)pW;
154 uint32_t fftSize = pKerInitArgs->
fftSize;
155 uint32_t interleave = pKerInitArgs->
interleave;
157 if (
sizeof(dataType) == 4) {
162 }
else if (
sizeof(dataType) == 2) {
171 if (bufParamsW->
dim_x != bufParamsWLocal.
dim_x) {
176 twF2sScale = ((uint32_t)1 << ((uint32_t)(
sizeof(dataType)*8-2))) - 1;
177 twF2sScale += ((uint32_t)1 << ((uint32_t)(
sizeof(dataType)*8-2)));
180 for (j = 0; j < fftSize; j++) {
181 for (k = 0; k < fftSize; k++) {
182 pWLocal[j*2*fftSize*2+k*2] =
183 FFTLIB_UTIL_cos_i64f_oxX<dataType>(
184 2*PI*j*k/fftSize, twF2sScale);
185 pWLocal[j*2*fftSize*2+k*2+1] =
186 -FFTLIB_UTIL_sin_i64f_oxX<dataType>(
187 2*PI*j*k/fftSize, twF2sScale);
189 for (k = 0; k < fftSize; k++) {
190 pWLocal[(j*2+1)*fftSize*2+k*2] =
191 FFTLIB_UTIL_sin_i64f_oxX<dataType>(
192 2*PI*j*k/fftSize, twF2sScale);
193 pWLocal[(j*2+1)*fftSize*2+k*2+1] =
194 FFTLIB_UTIL_cos_i64f_oxX<dataType>(
195 2*PI*j*k/fftSize, twF2sScale);
199 for (j = 0; j < fftSize; j++) {
200 for (k = 0; k < fftSize; k++) {
201 pWLocal[j*fftSize*2+k] =
202 FFTLIB_UTIL_cos_i64f_oxX<dataType>(
203 2*PI*j*k/fftSize, twF2sScale);
204 pWLocal[j*fftSize*2+fftSize+k] =
205 -FFTLIB_UTIL_sin_i64f_oxX<dataType>(
206 2*PI*j*k/fftSize, twF2sScale);
208 for (k = 0; k < fftSize; k++) {
209 pWLocal[fftSize*fftSize*2+j*fftSize*2+k] =
210 FFTLIB_UTIL_sin_i64f_oxX<dataType>(
211 2*PI*j*k/fftSize, twF2sScale);
212 pWLocal[fftSize*fftSize*2+j*fftSize*2+fftSize+k] =
213 FFTLIB_UTIL_cos_i64f_oxX<dataType>(
214 2*PI*j*k/fftSize, twF2sScale);
template FFTLIB_STATUS FFTLIB_FFT_dftLarge_ixX_cxX_oxX_getSizes< FFTLIB_INT16 >(const FFTLIB_FFT_dftLarge_ixX_cxX_oxX_InitArgs *pKerInitArgs, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_bufParams1D_t *bufParamsY)
template FFTLIB_STATUS FFTLIB_FFT_dftLarge_ixX_cxX_oxX_getSizes< FFTLIB_INT32 >(const FFTLIB_FFT_dftLarge_ixX_cxX_oxX_InitArgs *pKerInitArgs, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_bufParams1D_t *bufParamsY)
FFTLIB_STATUS FFTLIB_FFT_dftLarge_ixX_cxX_oxX_twGen(const FFTLIB_FFT_dftLarge_ixX_cxX_oxX_InitArgs *pKerInitArgs, void *restrict pW, const FFTLIB_bufParams1D_t *bufParamsW)
This is a utility function that generates the DFT computational matrix into the provided buffer.
void FFTLIB_LINALG_matrixMatrixMultiply_ixX_ixX_oxX_perfEst(FFTLIB_kernelHandle handle, const FFTLIB_bufParams2D_t *src0_addr, const FFTLIB_bufParams2D_t *src1_addr, const FFTLIB_bufParams2D_t *dst_addr, uint64_t *archCycles, uint64_t *estCycles, int32_t *caseNumber)
This function estimates the cycles consumed for the kernel execution.
static int32_t FFTLIB_sizeof(uint32_t type)
Inline function returns number of bytes per element given a type of _FFTLIB_data_type_e.
double FFTLIB_D64
Double precision floating point.
void * FFTLIB_kernelHandle
Handle type for FFTLIB operations.
FFTLIB_STATUS_NAME
The enumeration of all status codes.
@ FFTLIB_ERR_NOT_IMPLEMENTED
@ FFTLIB_ERR_INVALID_DIMENSION
#define FFTLIB_MMA_SIZE_16_BIT
type is 16-bit integers
#define FFTLIB_MMA_SIZE_32_BIT
type is 32-bit integers
FFTLIB_STATUS FFTLIB_FFT_dftLarge_ixX_cxX_oxX_getSizes(const FFTLIB_FFT_dftLarge_ixX_cxX_oxX_InitArgs *pKerInitArgs, FFTLIB_bufParams1D_t *bufParamsX, FFTLIB_bufParams1D_t *bufParamsW, FFTLIB_bufParams1D_t *bufParamsY)
This is a query function to calculate the sizes of input, output and the DFT computational matrix buf...
void FFTLIB_FFT_dftLarge_ixX_cxX_oxX_perfEst(FFTLIB_kernelHandle handle, const FFTLIB_bufParams1D_t *bufParamsX, const FFTLIB_bufParams1D_t *bufParamsY, const FFTLIB_bufParams1D_t *bufParamsW, uint32_t fftSize, uint32_t batchSize, uint64_t *archCycles, uint64_t *estCycles)
This is a utility function that gives an estimate of the cycles consumed for the kernel execution.
Structure containing the parameters for DFT computation.
uint32_t interleave
Flag to indicate if the real and imaginary parts of data are interleaved or not. A value of 1 indicat...
uint32_t batchSize
Size of the batch in terms of number of channels of DFT's.
uint32_t fftSize
Size of each channel's data in terms of number of complex points.
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type FFTLIB_data_type_e.
uint32_t dim_x
Width of buffer in X dimension in elements.
A structure for a 2 dimensional buffer descriptor.
uint32_t dim_y
Height of buffer in Y dimension in elements.
uint32_t dim_x
Width of buffer in X dimension in elements.
int32_t stride_y
Stride in Y dimension in bytes.
uint32_t data_type
Values are of type FFTLIB_data_type_e.