47 #include "../common/c71/DSPLIB_inlines.h"
57 #define SE_PARAM_BASE (0x0000)
58 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
61 template <
typename dataType>
68 __SE_TEMPLATE_v1 se0Params;
70 __SE_ELETYPE SE_ELETYPE;
71 __SE_VECLEN SE_VECLEN;
75 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
76 int32_t blockSize = pKerPrivArgs->
blockSize;
78 typedef typename c7x::make_full_vector<dataType>::type vec;
80 SE_VECLEN = c7x::se_veclen<vec>::value;
81 SE_ELETYPE = c7x::se_eletype<vec>::value;
84 int32_t eleCount = c7x::element_count_of<vec>::value;
85 printf(
"Enter eleCount %d\n", eleCount);
91 se0Params = __gen_SE_TEMPLATE_v1();
93 se0Params.ICNT0 = blockSize;
94 se0Params.ELETYPE = SE_ELETYPE;
95 se0Params.VECLEN = SE_VECLEN;
96 se0Params.DIMFMT = __SE_DIMFMT_1D;
111 __SE_TEMPLATE_v1 se0Params;
113 __SE_ELETYPE SE_ELETYPE;
114 __SE_VECLEN SE_VECLEN;
116 __SE_PROMOTE SE_PROMOTE;
120 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
121 int32_t blockSize = pKerPrivArgs->
blockSize;
124 SE_VECLEN = c7x::se_veclen<c7x::short_vec>::value;
125 SE_ELETYPE = c7x::se_eletype<c7x::char_vec>::value;
126 SE_PROMOTE = __SE_PROMOTE_2X_SIGNEXT;
127 #if DSPLIB_DEBUGPRINT
128 int32_t eleCount = c7x::element_count_of<char_vec>::value;
129 printf(
"Enter eleCount %d\n", eleCount);
135 se0Params = __gen_SE_TEMPLATE_v1();
137 se0Params.ICNT0 = blockSize;
138 se0Params.ELETYPE = SE_ELETYPE;
139 se0Params.VECLEN = SE_VECLEN;
140 se0Params.DIMFMT = __SE_DIMFMT_1D;
141 se0Params.PROMOTE = SE_PROMOTE;
192 #pragma FUNC_ALWAYS_INLINE
197 vector.lo() = vector.hi() + vector.lo();
198 vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
200 sum = (float) vector.s[0] + (
float) vector.s[1];
205 #pragma FUNC_ALWAYS_INLINE
210 vector.lo() = vector.hi() + vector.lo();
214 sum = (double) vector.s[0] + (
double) vector.s[1];
225 template <
typename dataType>
230 int32_t blockSize = pKerPrivArgs->
blockSize;
232 __SE_TEMPLATE_v1 se0Params;
234 dataType *restrict pInLocal1 = (dataType *) pIn1;
235 dataType *restrict pInLocal2 = (dataType *) pIn2;
236 dataType *restrict pOutLocal = (dataType *) pOut;
238 #if DSPLIB_DEBUGPRINT
239 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
242 typedef typename c7x::make_full_vector<dataType>::type vec;
243 int32_t eleCount = c7x::element_count_of<vec>::value;
247 #if DSPLIB_DEBUGPRINT
248 printf(
"Enter eleCount %d\n", eleCount);
250 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
255 __SE0_OPEN(pInLocal1, se0Params);
256 __SE1_OPEN(pInLocal2, se0Params);
258 #if DSPLIB_DEBUGPRINT
259 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
274 for (int32_t counter = 0; counter < blockSize; counter += eleCount * 4) {
275 vec a = c7x::strm_eng<0, vec>::get_adv();
276 vec b = c7x::strm_eng<1, vec>::get_adv();
280 vec c = c7x::strm_eng<0, vec>::get_adv();
281 vec d = c7x::strm_eng<1, vec>::get_adv();
285 vec e = c7x::strm_eng<0, vec>::get_adv();
286 vec f = c7x::strm_eng<1, vec>::get_adv();
290 vec g = c7x::strm_eng<0, vec>::get_adv();
291 vec h = c7x::strm_eng<1, vec>::get_adv();
296 out = out_ab + out_cd + out_ef + out_gh;
322 int32_t blockSize = pKerPrivArgs->
blockSize;
324 __SE_TEMPLATE_v1 se0Params;
326 int16_t *restrict pInLocal1 = (int16_t *) pIn1;
327 int16_t *restrict pInLocal2 = (int16_t *) pIn2;
328 int32_t *restrict pOutLocal = (int32_t *) pOut;
330 #if DSPLIB_DEBUGPRINT
331 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
334 typedef typename c7x::make_full_vector<int16_t>::type vec;
335 int32_t eleCount = c7x::element_count_of<vec>::value;
337 #if DSPLIB_DEBUGPRINT
338 printf(
"Enter eleCount %d\n", eleCount);
340 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
345 __SE0_OPEN(pInLocal1, se0Params);
346 __SE1_OPEN(pInLocal2, se0Params);
348 #if DSPLIB_DEBUGPRINT
349 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
352 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
357 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
358 vec a = c7x::strm_eng<0, vec>::get_adv();
359 vec b = c7x::strm_eng<1, vec>::get_adv();
361 out += __vdotp4hd_vvv(a, b);
366 result = (int32_t) __horizontal_add(out);
387 int32_t blockSize = pKerPrivArgs->
blockSize;
389 __SE_TEMPLATE_v1 se0Params;
391 uint8_t *restrict pInLocal1 = (uint8_t *) pIn1;
392 uint8_t *restrict pInLocal2 = (uint8_t *) pIn2;
393 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
395 #if DSPLIB_DEBUGPRINT
396 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
399 typedef typename c7x::make_full_vector<uint8_t>::type vec;
400 int32_t eleCount = c7x::element_count_of<vec>::value;
402 #if DSPLIB_DEBUGPRINT
403 printf(
"Enter eleCount %d\n", eleCount);
405 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
410 __SE0_OPEN(pInLocal1, se0Params);
411 __SE1_OPEN(pInLocal2, se0Params);
413 #if DSPLIB_DEBUGPRINT
414 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
417 typedef typename c7x::make_full_vector<uint32_t>::type vec_out;
423 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
424 vec a = c7x::strm_eng<0, vec>::get_adv();
425 vec b = c7x::strm_eng<1, vec>::get_adv();
427 out += __vdotp4ubw_vvv(a, b);
432 result = (uint32_t) __horizontal_add(out);
453 int32_t blockSize = pKerPrivArgs->
blockSize;
455 __SE_TEMPLATE_v1 se0Params;
457 int16_t *restrict pInLocal1 = (int16_t *) pIn1;
458 int16_t *restrict pInLocal2 = (int16_t *) pIn2;
459 int64_t *restrict pOutLocal = (int64_t *) pOut;
461 #if DSPLIB_DEBUGPRINT
462 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
465 typedef typename c7x::make_full_vector<int16_t>::type vec;
466 int32_t eleCount = c7x::element_count_of<vec>::value;
468 #if DSPLIB_DEBUGPRINT
469 printf(
"Enter eleCount %d\n", eleCount);
471 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
476 __SE0_OPEN(pInLocal1, se0Params);
477 __SE1_OPEN(pInLocal2, se0Params);
479 #if DSPLIB_DEBUGPRINT
480 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
483 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
488 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
489 vec a = c7x::strm_eng<0, vec>::get_adv();
490 vec b = c7x::strm_eng<1, vec>::get_adv();
492 out += __vdotp4hd_vvv(a, b);
496 result = __horizontal_add(out);
517 int32_t blockSize = pKerPrivArgs->
blockSize;
519 __SE_TEMPLATE_v1 se0Params;
521 uint16_t *restrict pInLocal1 = (uint16_t *) pIn1;
522 uint16_t *restrict pInLocal2 = (uint16_t *) pIn2;
523 uint64_t *restrict pOutLocal = (uint64_t *) pOut;
525 #if DSPLIB_DEBUGPRINT
526 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
529 typedef typename c7x::make_full_vector<uint16_t>::type vec;
530 int32_t eleCount = c7x::element_count_of<vec>::value;
532 #if DSPLIB_DEBUGPRINT
533 printf(
"Enter eleCount %d\n", eleCount);
535 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
540 __SE0_OPEN(pInLocal1, se0Params);
541 __SE1_OPEN(pInLocal2, se0Params);
543 #if DSPLIB_DEBUGPRINT
544 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
547 typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
552 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
553 vec a = c7x::strm_eng<0, vec>::get_adv();
554 vec b = c7x::strm_eng<1, vec>::get_adv();
556 out += __vdotp4uhd_vvv(a, b);
560 result = __horizontal_add(out);
581 int32_t blockSize = pKerPrivArgs->
blockSize;
583 __SE_TEMPLATE_v1 se0Params;
585 int32_t *restrict pInLocal1 = (int32_t *) pIn1;
586 int32_t *restrict pInLocal2 = (int32_t *) pIn2;
587 int64_t *restrict pOutLocal = (int64_t *) pOut;
589 #if DSPLIB_DEBUGPRINT
590 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
593 typedef typename c7x::make_full_vector<int32_t>::type vec;
594 int32_t eleCount = c7x::element_count_of<vec>::value;
596 #if DSPLIB_DEBUGPRINT
597 printf(
"Enter eleCount %d\n", eleCount);
599 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
604 __SE0_OPEN(pInLocal1, se0Params);
605 __SE1_OPEN(pInLocal2, se0Params);
607 #if DSPLIB_DEBUGPRINT
608 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
611 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
618 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
619 vec a = c7x::strm_eng<0, vec>::get_adv();
620 vec b = c7x::strm_eng<1, vec>::get_adv();
622 __vmpywd_vvw(a, b, out0, out1);
623 out += (out0 + out1);
627 result = __horizontal_add(out);
648 int32_t blockSize = pKerPrivArgs->
blockSize;
650 __SE_TEMPLATE_v1 se0Params;
652 uint32_t *restrict pInLocal1 = (uint32_t *) pIn1;
653 uint32_t *restrict pInLocal2 = (uint32_t *) pIn2;
654 uint64_t *restrict pOutLocal = (uint64_t *) pOut;
656 #if DSPLIB_DEBUGPRINT
657 printf(
"Enter DSPLIB_dotprod_exec_ci\n");
660 typedef typename c7x::make_full_vector<uint32_t>::type vec;
661 int32_t eleCount = c7x::element_count_of<vec>::value;
663 #if DSPLIB_DEBUGPRINT
664 printf(
"Enter eleCount %d\n", eleCount);
666 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
671 __SE0_OPEN(pInLocal1, se0Params);
672 __SE1_OPEN(pInLocal2, se0Params);
674 #if DSPLIB_DEBUGPRINT
675 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
678 typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
685 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
686 vec a = c7x::strm_eng<0, vec>::get_adv();
687 vec b = c7x::strm_eng<1, vec>::get_adv();
691 __vmpyuwd_vvw(a, b, out0, out1);
692 out += (out0 + out1);
696 result = __horizontal_add(out);
709 void *restrict pOut);
714 void *restrict pOut);
719 void *restrict pOut);
724 void *restrict pOut);
729 void *restrict pOut);
734 void *restrict pOut);
739 void *restrict pOut);
744 void *restrict pOut);
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
#define SE_SE0_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
static float DSPLIB_horiAdd(c7x::float_vec vector)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_dotprod.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t blockSize
Size of input buffer for different batches DSPLIB_dotprod_init that will be retrieved and used by DSP...
uint8_t bufPblock[DSPLIB_DOTPROD_IXX_IXX_OXX_PBLOCK_SIZE]