43 #include "../common/c71/DSPLIB_inlines.h"
51 #define SE_PARAM_BASE (0x0000)
52 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
53 #define SE_SE1_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_SIZE)
54 #define SE_SA0_PARAM_OFFSET (SE_SE1_PARAM_OFFSET + SE_PARAM_SIZE)
55 #define SE_SA1_PARAM_OFFSET (SE_SA0_PARAM_OFFSET + SE_PARAM_SIZE)
58 #define FIR_MODE_SMALL_FIR ( 0 )
59 #define FIR_MODE_LARGE_FIR ( 1 )
60 #define FIR_MODE_SMALL_FIR_SS ( 2 )
63 #define SMALL_FIR_LOOP_UNROLL_FACTOR ( 4 )
66 #define LONG_FIR_OUT_SAMPLES_PER_BLOCK ( 8 )
67 #define LONG_FIR_NUM_OUT_SAMP_GROUPS ( 2 )
68 #define LONG_FIR_TOT_OUT_SAMPLES_PER_BLOCK \
69 ( LONG_FIR_OUT_SAMPLES_PER_BLOCK * LONG_FIR_NUM_OUT_SAMP_GROUPS )
82 template <
typename dataType>
88 uint32_t dataSize = pKerInitArgs->
dataSize;
89 uint32_t filterSize = pKerInitArgs->
filterSize;
90 uint32_t computeLoopOutputSize;
93 printf(
"DSPLIB_DEBUGPRINT DSPLIB_fir_getSizes\n");
100 computeLoopOutputSize = (dataSize - filterSize + 1);
101 bufParamsOut->
dim_x = computeLoopOutputSize;
103 #if DSPLIB_DEBUGPRINT
104 printf(
"DSPLIB_DEBUGPRINT DSPLIB_fir_getSizes bufParamsOut->dim_x %d\n", bufParamsOut->
dim_x);
127 template <
typename dataType>
135 __SE_TEMPLATE_v1 se0Params;
136 __SE_TEMPLATE_v1 se1Params;
137 __SA_TEMPLATE_v1 sa0Params;
138 __SA_TEMPLATE_v1 sa1Params;
141 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
149 int32_t outputSize = pKerPrivArgs->
outputSize;
151 typedef typename c7x::make_full_vector<dataType>::type vec;
152 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
153 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
154 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
155 __SE_ELEDUP SE_ELEDUP = c7x::se_eledup<dataType>::value;
156 int32_t eleCount = c7x::element_count_of<vec>::value;
159 int32_t dataLoopSize;
160 int32_t filterLoopSize;
161 int32_t remFilterLoopSize;
163 #if DSPLIB_DEBUGPRINT
164 printf(
"DSPLIB_DEBUGPRINT SE_VECLEN: %d, SA_VECLEN: %d, SE_ELETYPE: %d "
165 "batchSize %d outputSize %d\n",
166 SE_VECLEN, SA_VECLEN, SE_ELETYPE, batchSize, outputSize);
169 #if DSPLIB_DEBUGPRINT
170 printf(
"DSPLIB_DEBUGPRINT SE_VECLEN: %d, SA_VECLEN: %d, SE_ELETYPE: %d "
171 "SE_ELEDUP %d eleCount %d\n",
172 SE_VECLEN, SA_VECLEN, SE_ELETYPE, SE_ELEDUP, eleCount);
181 dataLoopSize = (outputSize + blkElements - 1 ) / blkElements;
182 dataLoopSize = (dataLoopSize == 0) ? 1 : dataLoopSize;
184 #if DSPLIB_DEBUGPRINT
185 printf(
"DSPLIB_DEBUGPRINT dataLoopSize: %d (dataSize - filterSize) %d 16*eleCount "
186 "%d blkElements %d ((dataSize - filterSize) / blkElements) %d\n",
190 filterLoopSize = filterSize;
191 remFilterLoopSize = 0;
198 blkElements = (eleCount > filterSize) ? filterSize : eleCount;
199 dataLoopSize = (outputSize + blkElements - 1 ) / blkElements;
200 dataLoopSize = (dataLoopSize == 0) ? 1 : dataLoopSize;
209 dataLoopSize = outputSize;
211 int32_t numFilterVec = (filterSize + eleCount - 1)/eleCount;
219 pKerPrivArgs->
flag = flag;
221 #if DSPLIB_DEBUGPRINT
222 printf(
"DSPLIB_DEBUGPRINT FIR size flag: %d\n", flag);
229 se1Params = __gen_SE_TEMPLATE_v1();
231 se1Params.ICNT1 = filterSize;
232 se1Params.DIM1 = -(int32_t) 1;
233 se1Params.ICNT2 = dataLoopSize;
235 se1Params.ICNT3 = batchSize;
236 se1Params.DIM3 = (enableNchCoefs == 0) ? 0 : filterPitch;
237 se1Params.ELETYPE = SE_ELETYPE;
238 se1Params.VECLEN = SE_VECLEN;
239 se1Params.ELEDUP = SE_ELEDUP;
240 se1Params.DIMFMT = __SE_DIMFMT_4D;
241 se1Params.DIR = __SE_DIR_DEC;
246 se0Params = __gen_SE_TEMPLATE_v1();
247 se0Params.ICNT0 = blkElements;
248 se0Params.ICNT1 = filterSize;
249 se0Params.DIM1 = (int32_t) 1;
250 se0Params.ICNT2 = dataLoopSize;
251 se0Params.DIM2 = blkElements;
252 se0Params.ICNT3 = batchSize;
253 se0Params.DIM3 = dataBufferInPitch;
254 se0Params.DECDIM1 = __SE_DECDIM_DIM2;
255 se0Params.DECDIM1SD = __SE_DECDIMSD_DIM1;
256 se0Params.DECDIM1_WIDTH = dataSize;
257 se0Params.ELETYPE = SE_ELETYPE;
258 se0Params.VECLEN = SE_VECLEN;
259 se0Params.DIMFMT = __SE_DIMFMT_4D;
264 sa0Params = __gen_SA_TEMPLATE_v1();
265 sa0Params.ICNT0 = blkElements;
266 sa0Params.ICNT1 = dataLoopSize;
267 sa0Params.DIM1 = blkElements;
268 sa0Params.ICNT2 = batchSize;
269 sa0Params.DIM2 = dataBufferOutPitch;
270 sa0Params.DECDIM1_WIDTH = outputSize;
271 sa0Params.DECDIM1 = __SA_DECDIM_DIM1;
272 sa0Params.VECLEN = SA_VECLEN;
273 sa0Params.DIMFMT = __SA_DIMFMT_3D;
283 sa1Params = __gen_SA_TEMPLATE_v1();
284 sa1Params.ICNT0 = (uint32_t) 1;
285 sa1Params.ICNT1 = (uint32_t) filterSize;
286 sa1Params.DIM1 = -(int32_t) 1;
287 sa1Params.ICNT2 = dataLoopSize;
289 sa1Params.ICNT3 = batchSize;
290 sa1Params.DIM3 = (enableNchCoefs == 0) ? 0 : filterPitch;
291 sa1Params.DIMFMT = __SA_DIMFMT_4D;
292 sa1Params.VECLEN = __SA_VECLEN_1ELEM;
297 se0Params = __gen_SE_TEMPLATE_v1();
299 se0Params.ICNT1 = filterSize;
300 se0Params.DIM1 = (int32_t) 1;
301 se0Params.ICNT2 = dataLoopSize;
303 se0Params.ICNT3 = batchSize;
304 se0Params.DIM3 = dataBufferInPitch;
305 se0Params.DECDIM1 = __SE_DECDIM_DIM2;
306 se0Params.DECDIM1SD = __SE_DECDIMSD_DIM1;
307 se0Params.DECDIM1_WIDTH = dataSize;
308 se0Params.ELETYPE = SE_ELETYPE;
309 se0Params.VECLEN = SE_VECLEN;
310 se0Params.DIMFMT = __SE_DIMFMT_4D;
315 se1Params = se0Params;
321 sa0Params = __gen_SA_TEMPLATE_v1();
322 sa0Params.ICNT0 = eleCount;
324 sa0Params.DIM1 = (int32_t) eleCount;
325 sa0Params.ICNT2 = dataLoopSize;
327 sa0Params.ICNT3 = batchSize;
328 sa0Params.DIM3 = dataBufferOutPitch;
329 sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
330 sa0Params.DECDIM1SD = __SA_DECDIMSD_DIM1;
331 sa0Params.DECDIM1_WIDTH = outputSize;
332 sa0Params.VECLEN = SA_VECLEN;
333 sa0Params.DIMFMT = __SA_DIMFMT_4D;
344 se1Params = __gen_SE_TEMPLATE_v1();
345 se1Params.ICNT0 = filterSize;
346 se1Params.ICNT1 = dataLoopSize;
348 se1Params.ICNT2 = batchSize;
349 se1Params.DIM2 = (enableNchCoefs == 0) ? 0 : filterPitch;
350 se1Params.ELETYPE = SE_ELETYPE;
351 se1Params.VECLEN = SE_VECLEN;
352 se1Params.DIMFMT = __SE_DIMFMT_3D;
353 se1Params.DIR = __SE_DIR_DEC;
358 se0Params = __gen_SE_TEMPLATE_v1();
359 se0Params.ICNT0 = filterSize;
360 se0Params.ICNT1 = dataLoopSize;
361 se0Params.DIM1 = (int32_t) 1;
362 se0Params.ICNT2 = batchSize;
363 se0Params.DIM2 = dataBufferInPitch;
364 se0Params.DECDIM1 = __SE_DECDIM_DIM1;
365 se0Params.DECDIM1_WIDTH = dataSize;
366 se0Params.ELETYPE = SE_ELETYPE;
367 se0Params.VECLEN = SE_VECLEN;
368 se0Params.DIMFMT = __SE_DIMFMT_3D;
373 sa0Params = __gen_SA_TEMPLATE_v1();
375 sa0Params.ICNT1 = dataLoopSize;
377 sa0Params.ICNT2 = batchSize;
378 sa0Params.DIM2 = dataBufferOutPitch;
379 sa0Params.VECLEN = SA_VECLEN;
380 sa0Params.DIMFMT = __SA_DIMFMT_3D;
422 template <
typename dataType>
426 __SE_TEMPLATE_v1 se0Params;
427 __SE_TEMPLATE_v1 se1Params;
428 __SA_TEMPLATE_v1 sa0Params;
429 __SA_TEMPLATE_v1 sa1Params;
432 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
435 uint32_t flag = pKerPrivArgs->
flag;
436 int32_t dataLoopSize;
437 int32_t filterLoopSize;
438 int32_t remFilterLoopSize;
440 dataType *pInLocal = (dataType *) pIn;
441 dataType *pFilterLocal = (dataType *) pFilter;
442 dataType *pOutLocal = (dataType *) pOut;
444 #if DSPLIB_DEBUGPRINT
445 printf(
"Enter DSPLIB_fir_exec_ci\n");
448 typedef typename c7x::make_full_vector<dataType>::type vec;
449 int eleCount = c7x::element_count_of<vec>::value;
451 vec accumulator_0, accumulator_1, accumulator_2, accumulator_3;
452 vec accumulator_x, accumulator_y;
472 #if DSPLIB_DEBUGPRINT
473 printf(
"Enter pFilter %p\n", pFilter);
477 pFilterLocal = pFilterLocal + filterSize - 1;
480 pFilterLocal = pFilterLocal + filterSize;
483 #if DSPLIB_DEBUGPRINT
484 printf(
"Enter pFilter %p\n", pFilterLocal);
488 __SE0_OPEN(pInLocal, se0Params);
492 __SE1_OPEN(pFilterLocal, se1Params);
496 pInLocal = pInLocal + 8 * eleCount;
498 __SE1_OPEN(pInLocal, se1Params);
502 __SA1_OPEN(sa1Params);
506 __SA0_OPEN(sa0Params);
512 #if DSPLIB_DEBUGPRINT
513 printf(
"DSPLIB_DEBUGPRINT dataSize %d filterSize %d dataLoopSize %d\n", pKerPrivArgs->
initArgs.
dataSize, filterSize, dataLoopSize);
516 #if DSPLIB_DEBUGPRINT
517 printf(
"DSPLIB_DEBUGPRINT filterSize %d\n", filterSize);
522 for (int32_t batchSizeCount = 0; batchSizeCount < batchSize; batchSizeCount++) {
523 for (int32_t dataLoopCount = 0; dataLoopCount < dataLoopSize; dataLoopCount++) {
524 accumulator_0 = (vec) 0;
525 accumulator_1 = (vec) 0;
526 accumulator_2 = (vec) 0;
527 accumulator_3 = (vec) 0;
529 for (int32_t filterLoopCount = 0; filterLoopCount < filterLoopSize; filterLoopCount++) {
530 vec tmp0 = c7x::strm_eng<0, vec>::get_adv();
531 vec tmp1 = c7x::strm_eng<1, vec>::get_adv();
532 accumulator_0 += tmp0 * tmp1;
534 vec tmp2 = c7x::strm_eng<0, vec>::get_adv();
535 vec tmp3 = c7x::strm_eng<1, vec>::get_adv();
536 accumulator_1 += tmp2 * tmp3;
538 vec tmp4 = c7x::strm_eng<0, vec>::get_adv();
539 vec tmp5 = c7x::strm_eng<1, vec>::get_adv();
540 accumulator_2 += tmp4 * tmp5;
542 vec tmp6 = c7x::strm_eng<0, vec>::get_adv();
543 vec tmp7 = c7x::strm_eng<1, vec>::get_adv();
544 accumulator_3 += tmp6 * tmp7;
547 for (int32_t filterLoopCount = 0; filterLoopCount < remFilterLoopSize; filterLoopCount++) {
548 vec tmp0 = c7x::strm_eng<0, vec>::get_adv();
549 vec tmp1 = c7x::strm_eng<1, vec>::get_adv();
550 accumulator_0 += tmp0 * tmp1;
553 accumulator_x = accumulator_0 + accumulator_1;
554 accumulator_y = accumulator_2 + accumulator_3;
555 accumulator_x = accumulator_x + accumulator_y;
558 accumulator_x.lo() = accumulator_x.hi() + accumulator_x.lo();
559 accumulator_x.lo().lo() = accumulator_x.lo().hi() + accumulator_x.lo().lo();
560 accumulator_x.lo().lo().lo() = accumulator_x.lo().lo().hi() + accumulator_x.lo().lo().lo();
561 acc = (float) accumulator_x.s[0] + (
float) accumulator_x.s[1];
564 __vpred tmp = c7x::strm_agen<0, dataType>::get_vpred();
565 dataType *VB1 = c7x::strm_agen<0, dataType>::get_adv(pOutLocal);
566 __vstore_pred(tmp, VB1, acc);
572 for (int32_t batchSizeCount = 0; batchSizeCount < batchSize; batchSizeCount++) {
573 for (int32_t dataLoopCount = 0; dataLoopCount < dataLoopSize; dataLoopCount++) {
574 accumulator_0 = (vec) 0;
575 accumulator_1 = (vec) 0;
576 accumulator_2 = (vec) 0;
577 accumulator_3 = (vec) 0;
579 for (int32_t filterLoopCount = 0; filterLoopCount < filterLoopSize; filterLoopCount++) {
580 vec tmp0 = c7x::strm_eng<0, vec>::get_adv();
581 vec tmp1 = c7x::strm_eng<1, vec>::get_adv();
582 #if DSPLIB_DEBUGPRINT
586 accumulator_0 += tmp0 * tmp1;
588 vec tmp2 = c7x::strm_eng<0, vec>::get_adv();
589 vec tmp3 = c7x::strm_eng<1, vec>::get_adv();
590 accumulator_1 += tmp2 * tmp3;
592 vec tmp4 = c7x::strm_eng<0, vec>::get_adv();
593 vec tmp5 = c7x::strm_eng<1, vec>::get_adv();
594 accumulator_2 += tmp4 * tmp5;
596 vec tmp6 = c7x::strm_eng<0, vec>::get_adv();
597 vec tmp7 = c7x::strm_eng<1, vec>::get_adv();
598 accumulator_3 += tmp6 * tmp7;
601 for (int32_t filterLoopCount = 0; filterLoopCount < remFilterLoopSize; filterLoopCount++) {
602 vec tmp0 = c7x::strm_eng<0, vec>::get_adv();
603 vec tmp1 = c7x::strm_eng<1, vec>::get_adv();
604 accumulator_0 += tmp0 * tmp1;
607 accumulator_x = accumulator_0 + accumulator_1;
608 accumulator_y = accumulator_2 + accumulator_3;
609 accumulator_x = accumulator_x + accumulator_y;
610 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
611 vec *VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
612 __vstore_pred(tmp, VB1, accumulator_x);
614 #if DSPLIB_DEBUGPRINT
615 DSPLIB_debugPrintVector(accumulator_x);
622 for (int32_t batchSizeCount = 0; batchSizeCount < batchSize; batchSizeCount++) {
623 for (int32_t dataLoopCount = 0; dataLoopCount < dataLoopSize; dataLoopCount++) {
624 accumulator_0_0 = (vec) 0;
625 accumulator_0_1 = (vec) 0;
626 accumulator_0_2 = (vec) 0;
627 accumulator_0_3 = (vec) 0;
628 accumulator_0_4 = (vec) 0;
629 accumulator_0_5 = (vec) 0;
630 accumulator_0_6 = (vec) 0;
631 accumulator_0_7 = (vec) 0;
632 accumulator_1_0 = (vec) 0;
633 accumulator_1_1 = (vec) 0;
634 accumulator_1_2 = (vec) 0;
635 accumulator_1_3 = (vec) 0;
636 accumulator_1_4 = (vec) 0;
637 accumulator_1_5 = (vec) 0;
638 accumulator_1_6 = (vec) 0;
639 accumulator_1_7 = (vec) 0;
641 for (int32_t filterLoopCount = 0; filterLoopCount < filterLoopSize; filterLoopCount++) {
642 dataType *VB1Dup = c7x::strm_agen<1, dataType>::get_adv(pFilterLocal);
643 vec flt = __vload_dup(VB1Dup);
645 vec tmp0_0 = c7x::strm_eng<0, vec>::get_adv();
646 accumulator_0_0 += tmp0_0 * flt;
648 vec tmp0_1 = c7x::strm_eng<0, vec>::get_adv();
649 accumulator_0_1 += tmp0_1 * flt;
651 vec tmp0_2 = c7x::strm_eng<0, vec>::get_adv();
652 accumulator_0_2 += tmp0_2 * flt;
654 vec tmp0_3 = c7x::strm_eng<0, vec>::get_adv();
655 accumulator_0_3 += tmp0_3 * flt;
657 vec tmp0_4 = c7x::strm_eng<0, vec>::get_adv();
658 accumulator_0_4 += tmp0_4 * flt;
660 vec tmp0_5 = c7x::strm_eng<0, vec>::get_adv();
661 accumulator_0_5 += tmp0_5 * flt;
663 vec tmp0_6 = c7x::strm_eng<0, vec>::get_adv();
664 accumulator_0_6 += tmp0_6 * flt;
666 vec tmp0_7 = c7x::strm_eng<0, vec>::get_adv();
667 accumulator_0_7 += tmp0_7 * flt;
669 vec tmp1_0 = c7x::strm_eng<1, vec>::get_adv();
670 accumulator_1_0 += tmp1_0 * flt;
672 vec tmp1_1 = c7x::strm_eng<1, vec>::get_adv();
673 accumulator_1_1 += tmp1_1 * flt;
675 vec tmp1_2 = c7x::strm_eng<1, vec>::get_adv();
676 accumulator_1_2 += tmp1_2 * flt;
678 vec tmp1_3 = c7x::strm_eng<1, vec>::get_adv();
679 accumulator_1_3 += tmp1_3 * flt;
681 vec tmp1_4 = c7x::strm_eng<1, vec>::get_adv();
682 accumulator_1_4 += tmp1_4 * flt;
684 vec tmp1_5 = c7x::strm_eng<1, vec>::get_adv();
685 accumulator_1_5 += tmp1_5 * flt;
687 vec tmp1_6 = c7x::strm_eng<1, vec>::get_adv();
688 accumulator_1_6 += tmp1_6 * flt;
690 vec tmp1_7 = c7x::strm_eng<1, vec>::get_adv();
691 accumulator_1_7 += tmp1_7 * flt;
692 #if DSPLIB_DEBUGPRINT
712 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
713 vec *VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
714 __vstore_pred(tmp, VB1, accumulator_0_0);
716 tmp = c7x::strm_agen<0, vec>::get_vpred();
717 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
718 __vstore_pred(tmp, VB1, accumulator_0_1);
720 tmp = c7x::strm_agen<0, vec>::get_vpred();
721 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
722 __vstore_pred(tmp, VB1, accumulator_0_2);
724 tmp = c7x::strm_agen<0, vec>::get_vpred();
725 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
726 __vstore_pred(tmp, VB1, accumulator_0_3);
728 tmp = c7x::strm_agen<0, vec>::get_vpred();
729 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
730 __vstore_pred(tmp, VB1, accumulator_0_4);
732 tmp = c7x::strm_agen<0, vec>::get_vpred();
733 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
734 __vstore_pred(tmp, VB1, accumulator_0_5);
736 tmp = c7x::strm_agen<0, vec>::get_vpred();
737 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
738 __vstore_pred(tmp, VB1, accumulator_0_6);
740 tmp = c7x::strm_agen<0, vec>::get_vpred();
741 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
742 __vstore_pred(tmp, VB1, accumulator_0_7);
744 tmp = c7x::strm_agen<0, vec>::get_vpred();
745 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
746 __vstore_pred(tmp, VB1, accumulator_1_0);
748 tmp = c7x::strm_agen<0, vec>::get_vpred();
749 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
750 __vstore_pred(tmp, VB1, accumulator_1_1);
752 tmp = c7x::strm_agen<0, vec>::get_vpred();
753 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
754 __vstore_pred(tmp, VB1, accumulator_1_2);
756 tmp = c7x::strm_agen<0, vec>::get_vpred();
757 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
758 __vstore_pred(tmp, VB1, accumulator_1_3);
760 tmp = c7x::strm_agen<0, vec>::get_vpred();
761 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
762 __vstore_pred(tmp, VB1, accumulator_1_4);
764 tmp = c7x::strm_agen<0, vec>::get_vpred();
765 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
766 __vstore_pred(tmp, VB1, accumulator_1_5);
768 tmp = c7x::strm_agen<0, vec>::get_vpred();
769 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
770 __vstore_pred(tmp, VB1, accumulator_1_6);
772 tmp = c7x::strm_agen<0, vec>::get_vpred();
773 VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
774 __vstore_pred(tmp, VB1, accumulator_1_7);
776 #if DSPLIB_DEBUGPRINT
807 void *restrict pFilter,
808 void *restrict pOut);
812 void *restrict pFilter,
813 void *restrict pOut);
DSPLIB_STATUS DSPLIB_fir_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsFilter, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_fir_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
#define FIR_MODE_LARGE_FIR
template DSPLIB_STATUS DSPLIB_fir_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pFilter, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_fir_getSizes< float >(const DSPLIB_fir_InitArgs *pKerInitArgs, DSPLIB_bufParams2D_t *bufParamsIn, DSPLIB_bufParams2D_t *bufParamsFilter, DSPLIB_bufParams2D_t *bufParamsOut)
template DSPLIB_STATUS DSPLIB_fir_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pFilter, void *restrict pOut)
#define SE_SE0_PARAM_OFFSET
#define LONG_FIR_TOT_OUT_SAMPLES_PER_BLOCK
template DSPLIB_STATUS DSPLIB_fir_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsFilter, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_fir_InitArgs *pKerInitArgs)
#define SE_SA1_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_fir_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pFilter, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_fir_getSizes< int8_t >(const DSPLIB_fir_InitArgs *pKerInitArgs, DSPLIB_bufParams2D_t *bufParamsIn, DSPLIB_bufParams2D_t *bufParamsFilter, DSPLIB_bufParams2D_t *bufParamsOut)
#define FIR_MODE_SMALL_FIR
DSPLIB_STATUS DSPLIB_fir_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pFilter, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
#define SE_SE1_PARAM_OFFSET
#define SMALL_FIR_LOOP_UNROLL_FACTOR
template DSPLIB_STATUS DSPLIB_fir_getSizes< int32_t >(const DSPLIB_fir_InitArgs *pKerInitArgs, DSPLIB_bufParams2D_t *bufParamsIn, DSPLIB_bufParams2D_t *bufParamsFilter, DSPLIB_bufParams2D_t *bufParamsOut)
template DSPLIB_STATUS DSPLIB_fir_getSizes< int16_t >(const DSPLIB_fir_InitArgs *pKerInitArgs, DSPLIB_bufParams2D_t *bufParamsIn, DSPLIB_bufParams2D_t *bufParamsFilter, DSPLIB_bufParams2D_t *bufParamsOut)
template DSPLIB_STATUS DSPLIB_fir_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pFilter, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_fir_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsFilter, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_fir_InitArgs *pKerInitArgs)
#define FIR_MODE_SMALL_FIR_SS
#define LONG_FIR_OUT_SAMPLES_PER_BLOCK
template DSPLIB_STATUS DSPLIB_fir_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsFilter, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_fir_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_fir_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsFilter, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_fir_InitArgs *pKerInitArgs)
#define SE_SA0_PARAM_OFFSET
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_fir.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
DSPLIB_STATUS DSPLIB_fir_getSizes(const DSPLIB_fir_InitArgs *pKerInitArgs, DSPLIB_bufParams2D_t *bufParamsIn, DSPLIB_bufParams2D_t *bufParamsFilter, DSPLIB_bufParams2D_t *bufParamsOut)
This is a query function to calculate the sizes of input, filter and the output buffers.
void DSPLIB_fir_perfEst(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsFilter, const DSPLIB_bufParams2D_t *bufParamsOut, uint64_t *archCycles, uint64_t *estCycles)
This is a utility function that gives an estimate of the cycles consumed for the kernel execution.
A structure for a 2 dimensional buffer descriptor.
uint32_t dim_x
Width of buffer in X dimension in elements.
Structure containing the parameters to initialize the kernel.
int32_t dataSize
Size of input data
uint32_t enableNchCoefs
Flag to enable per channel filter coefficients: 0 => same coefficients for all channels in batch 1 =>...
int32_t batchSize
Size of batch in terms of number of channels of input data
int32_t filterSize
Size of filter
Structure that is reserved for internal use by the kernel.
int32_t remFilterLoopSize
uint32_t flag
Array to store the configuration prepared by DSPLIB_fir_init_ci that will be retrieved and used by DS...
uint32_t dataBufferOutPitch
Pitch of output buffer for different batches DSPLIB_fir_init that will be retrieved and used by DSPLI...
uint8_t bufPblock[DSPLIB_FIR_IXX_IXX_OXX_PBLOCK_SIZE]
uint32_t filterPitch
Pitch of filter coefficient buffer for different batches DSPLIB_fir_init that will be retrieved and u...
uint32_t dataBufferInPitch
Pitch of input buffer for different batches DSPLIB_fir_init that will be retrieved and used by DSPLIB...
DSPLIB_fir_InitArgs initArgs
Structure holding initialization parameters