47 #include "../common/c71/DSPLIB_inlines.h"
57 #define SE_PARAM_BASE (0x0000)
58 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
61 template <
typename dataType>
68 __SE_TEMPLATE_v1 se0Params;
70 __SE_ELETYPE SE_ELETYPE;
71 __SE_VECLEN SE_VECLEN;
75 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
76 int32_t blockSize = pKerPrivArgs->
blockSize;
78 typedef typename c7x::make_full_vector<dataType>::type vec;
80 SE_VECLEN = c7x::se_veclen<vec>::value;
81 SE_ELETYPE = c7x::se_eletype<vec>::value;
84 int32_t eleCount = c7x::element_count_of<vec>::value;
85 printf(
"Enter eleCount %d\n", eleCount);
91 se0Params = __gen_SE_TEMPLATE_v1();
93 se0Params.ICNT0 = blockSize;
94 se0Params.ELETYPE = SE_ELETYPE;
95 se0Params.VECLEN = SE_VECLEN;
96 se0Params.DIMFMT = __SE_DIMFMT_1D;
111 __SE_TEMPLATE_v1 se0Params;
113 __SE_ELETYPE SE_ELETYPE;
114 __SE_VECLEN SE_VECLEN;
116 __SE_PROMOTE SE_PROMOTE;
120 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
121 int32_t blockSize = pKerPrivArgs->
blockSize;
124 SE_VECLEN = c7x::se_veclen<c7x::short_vec>::value;
125 SE_ELETYPE = c7x::se_eletype<c7x::char_vec>::value;
126 SE_PROMOTE = __SE_PROMOTE_2X_SIGNEXT;
127 #if DSPLIB_DEBUGPRINT
128 int32_t eleCount = c7x::element_count_of<char_vec>::value;
129 printf(
"Enter eleCount %d\n", eleCount);
135 se0Params = __gen_SE_TEMPLATE_v1();
137 se0Params.ICNT0 = blockSize;
138 se0Params.ELETYPE = SE_ELETYPE;
139 se0Params.VECLEN = SE_VECLEN;
140 se0Params.DIMFMT = __SE_DIMFMT_1D;
141 se0Params.PROMOTE = SE_PROMOTE;
192 #pragma FUNC_ALWAYS_INLINE
197 vector.lo() = vector.hi() + vector.lo();
198 vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
200 vector.lo().lo().lo() = vector.lo().lo().hi() + vector.lo().lo().lo();
201 sum = (float) vector.s[0] + (
float) vector.s[1];
206 #pragma FUNC_ALWAYS_INLINE
211 vector.lo() = vector.hi() + vector.lo();
214 vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
216 sum = (double) vector.s[0] + (
double) vector.s[1];
227 template <
typename dataType>
232 int32_t blockSize = pKerPrivArgs->
blockSize;
234 __SE_TEMPLATE_v1 se0Params;
236 dataType *restrict pInLocal1 = (dataType *) pIn1;
237 dataType *restrict pInLocal2 = (dataType *) pIn2;
238 dataType *restrict pOutLocal = (dataType *) pOut;
240 #if DSPLIB_DEBUGPRINT
241 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
244 typedef typename c7x::make_full_vector<dataType>::type vec;
245 int32_t eleCount = c7x::element_count_of<vec>::value;
249 #if DSPLIB_DEBUGPRINT
250 printf(
"Enter eleCount %d\n", eleCount);
252 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
257 __SE0_OPEN(pInLocal1, se0Params);
258 __SE1_OPEN(pInLocal2, se0Params);
260 #if DSPLIB_DEBUGPRINT
261 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
276 for (int32_t counter = 0; counter < blockSize; counter += eleCount * 4) {
277 vec a = c7x::strm_eng<0, vec>::get_adv();
278 vec b = c7x::strm_eng<1, vec>::get_adv();
282 vec c = c7x::strm_eng<0, vec>::get_adv();
283 vec d = c7x::strm_eng<1, vec>::get_adv();
287 vec e = c7x::strm_eng<0, vec>::get_adv();
288 vec f = c7x::strm_eng<1, vec>::get_adv();
292 vec g = c7x::strm_eng<0, vec>::get_adv();
293 vec h = c7x::strm_eng<1, vec>::get_adv();
298 out = out_ab + out_cd + out_ef + out_gh;
324 int32_t blockSize = pKerPrivArgs->
blockSize;
326 __SE_TEMPLATE_v1 se0Params;
328 int16_t *restrict pInLocal1 = (int16_t *) pIn1;
329 int16_t *restrict pInLocal2 = (int16_t *) pIn2;
330 int32_t *restrict pOutLocal = (int32_t *) pOut;
332 #if DSPLIB_DEBUGPRINT
333 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
336 typedef typename c7x::make_full_vector<int16_t>::type vec;
337 int32_t eleCount = c7x::element_count_of<vec>::value;
339 #if DSPLIB_DEBUGPRINT
340 printf(
"Enter eleCount %d\n", eleCount);
342 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
347 __SE0_OPEN(pInLocal1, se0Params);
348 __SE1_OPEN(pInLocal2, se0Params);
350 #if DSPLIB_DEBUGPRINT
351 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
354 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
359 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
360 vec a = c7x::strm_eng<0, vec>::get_adv();
361 vec b = c7x::strm_eng<1, vec>::get_adv();
363 out += __vdotp4hd_vvv(a, b);
368 result = (int32_t) __horizontal_add(out);
389 int32_t blockSize = pKerPrivArgs->
blockSize;
391 __SE_TEMPLATE_v1 se0Params;
393 uint8_t *restrict pInLocal1 = (uint8_t *) pIn1;
394 uint8_t *restrict pInLocal2 = (uint8_t *) pIn2;
395 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
397 #if DSPLIB_DEBUGPRINT
398 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
401 typedef typename c7x::make_full_vector<uint8_t>::type vec;
402 int32_t eleCount = c7x::element_count_of<vec>::value;
404 #if DSPLIB_DEBUGPRINT
405 printf(
"Enter eleCount %d\n", eleCount);
407 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
412 __SE0_OPEN(pInLocal1, se0Params);
413 __SE1_OPEN(pInLocal2, se0Params);
415 #if DSPLIB_DEBUGPRINT
416 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
419 typedef typename c7x::make_full_vector<uint32_t>::type vec_out;
425 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
426 vec a = c7x::strm_eng<0, vec>::get_adv();
427 vec b = c7x::strm_eng<1, vec>::get_adv();
429 out += __vdotp4ubw_vvv(a, b);
434 result = (uint32_t) __horizontal_add(out);
455 int32_t blockSize = pKerPrivArgs->
blockSize;
457 __SE_TEMPLATE_v1 se0Params;
459 int16_t *restrict pInLocal1 = (int16_t *) pIn1;
460 int16_t *restrict pInLocal2 = (int16_t *) pIn2;
461 int64_t *restrict pOutLocal = (int64_t *) pOut;
463 #if DSPLIB_DEBUGPRINT
464 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
467 typedef typename c7x::make_full_vector<int16_t>::type vec;
468 int32_t eleCount = c7x::element_count_of<vec>::value;
470 #if DSPLIB_DEBUGPRINT
471 printf(
"Enter eleCount %d\n", eleCount);
473 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
478 __SE0_OPEN(pInLocal1, se0Params);
479 __SE1_OPEN(pInLocal2, se0Params);
481 #if DSPLIB_DEBUGPRINT
482 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
485 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
490 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
491 vec a = c7x::strm_eng<0, vec>::get_adv();
492 vec b = c7x::strm_eng<1, vec>::get_adv();
494 out += __vdotp4hd_vvv(a, b);
498 result = __horizontal_add(out);
519 int32_t blockSize = pKerPrivArgs->
blockSize;
521 __SE_TEMPLATE_v1 se0Params;
523 uint16_t *restrict pInLocal1 = (uint16_t *) pIn1;
524 uint16_t *restrict pInLocal2 = (uint16_t *) pIn2;
525 uint64_t *restrict pOutLocal = (uint64_t *) pOut;
527 #if DSPLIB_DEBUGPRINT
528 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
531 typedef typename c7x::make_full_vector<uint16_t>::type vec;
532 int32_t eleCount = c7x::element_count_of<vec>::value;
534 #if DSPLIB_DEBUGPRINT
535 printf(
"Enter eleCount %d\n", eleCount);
537 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
542 __SE0_OPEN(pInLocal1, se0Params);
543 __SE1_OPEN(pInLocal2, se0Params);
545 #if DSPLIB_DEBUGPRINT
546 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
549 typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
554 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
555 vec a = c7x::strm_eng<0, vec>::get_adv();
556 vec b = c7x::strm_eng<1, vec>::get_adv();
558 out += __vdotp4uhd_vvv(a, b);
562 result = __horizontal_add(out);
583 int32_t blockSize = pKerPrivArgs->
blockSize;
585 __SE_TEMPLATE_v1 se0Params;
587 int32_t *restrict pInLocal1 = (int32_t *) pIn1;
588 int32_t *restrict pInLocal2 = (int32_t *) pIn2;
589 int64_t *restrict pOutLocal = (int64_t *) pOut;
591 #if DSPLIB_DEBUGPRINT
592 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
595 typedef typename c7x::make_full_vector<int32_t>::type vec;
596 int32_t eleCount = c7x::element_count_of<vec>::value;
598 #if DSPLIB_DEBUGPRINT
599 printf(
"Enter eleCount %d\n", eleCount);
601 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
606 __SE0_OPEN(pInLocal1, se0Params);
607 __SE1_OPEN(pInLocal2, se0Params);
609 #if DSPLIB_DEBUGPRINT
610 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
613 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
620 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
621 vec a = c7x::strm_eng<0, vec>::get_adv();
622 vec b = c7x::strm_eng<1, vec>::get_adv();
624 __vmpywd_vvw(a, b, out0, out1);
625 out += (out0 + out1);
629 result = __horizontal_add(out);
650 int32_t blockSize = pKerPrivArgs->
blockSize;
652 __SE_TEMPLATE_v1 se0Params;
654 uint32_t *restrict pInLocal1 = (uint32_t *) pIn1;
655 uint32_t *restrict pInLocal2 = (uint32_t *) pIn2;
656 uint64_t *restrict pOutLocal = (uint64_t *) pOut;
658 #if DSPLIB_DEBUGPRINT
659 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
662 typedef typename c7x::make_full_vector<uint32_t>::type vec;
663 int32_t eleCount = c7x::element_count_of<vec>::value;
665 #if DSPLIB_DEBUGPRINT
666 printf(
"Enter eleCount %d\n", eleCount);
668 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
673 __SE0_OPEN(pInLocal1, se0Params);
674 __SE1_OPEN(pInLocal2, se0Params);
676 #if DSPLIB_DEBUGPRINT
677 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
680 typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
687 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
688 vec a = c7x::strm_eng<0, vec>::get_adv();
689 vec b = c7x::strm_eng<1, vec>::get_adv();
693 __vmpyuwd_vvw(a, b, out0, out1);
694 out += (out0 + out1);
698 result = __horizontal_add(out);
711 void *restrict pOut);
716 void *restrict pOut);
721 void *restrict pOut);
726 void *restrict pOut);
731 void *restrict pOut);
736 void *restrict pOut);
741 void *restrict pOut);
746 void *restrict pOut);
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
#define SE_SE0_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
static float DSPLIB_horiAdd(c7x::float_vec vector)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_dotprod.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t blockSize
Size of input buffer for different batches DSPLIB_dotprod_init that will be retrieved and used by DSP...
uint8_t bufPblock[DSPLIB_DOTPROD_IXX_IXX_OXX_PBLOCK_SIZE]