47 #include "../common/c71/DSPLIB_inlines.h"
57 #define SE_PARAM_BASE (0x0000)
58 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
61 template <
typename dataType>
68 __SE_TEMPLATE_v1 se0Params;
70 __SE_ELETYPE SE_ELETYPE;
71 __SE_VECLEN SE_VECLEN;
75 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
76 int32_t blockSize = pKerPrivArgs->
blockSize;
78 typedef typename c7x::make_full_vector<dataType>::type vec;
80 SE_VECLEN = c7x::se_veclen<vec>::value;
81 SE_ELETYPE = c7x::se_eletype<vec>::value;
84 int32_t eleCount = c7x::element_count_of<vec>::value;
85 printf(
"Enter eleCount %d\n", eleCount);
91 se0Params = __gen_SE_TEMPLATE_v1();
93 se0Params.ICNT0 = blockSize;
94 se0Params.ELETYPE = SE_ELETYPE;
95 se0Params.VECLEN = SE_VECLEN;
96 se0Params.DIMFMT = __SE_DIMFMT_1D;
111 __SE_TEMPLATE_v1 se0Params;
113 __SE_ELETYPE SE_ELETYPE;
114 __SE_VECLEN SE_VECLEN;
116 __SE_PROMOTE SE_PROMOTE;
120 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
121 int32_t blockSize = pKerPrivArgs->
blockSize;
124 SE_VECLEN = c7x::se_veclen<c7x::short_vec>::value;
125 SE_ELETYPE = c7x::se_eletype<c7x::char_vec>::value;
126 SE_PROMOTE = __SE_PROMOTE_2X_SIGNEXT;
127 #if DSPLIB_DEBUGPRINT
128 int32_t eleCount = c7x::element_count_of<char_vec>::value;
129 printf(
"Enter eleCount %d\n", eleCount);
135 se0Params = __gen_SE_TEMPLATE_v1();
137 se0Params.ICNT0 = blockSize;
138 se0Params.ELETYPE = SE_ELETYPE;
139 se0Params.VECLEN = SE_VECLEN;
140 se0Params.DIMFMT = __SE_DIMFMT_1D;
141 se0Params.PROMOTE = SE_PROMOTE;
192 #pragma FUNC_ALWAYS_INLINE
197 vector.lo() = vector.hi() + vector.lo();
198 vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
200 vector.lo().lo().lo() = vector.lo().lo().hi() + vector.lo().lo().lo();
201 sum = (float) vector.s[0] + (
float) vector.s[1];
206 #pragma FUNC_ALWAYS_INLINE
211 vector.lo() = vector.hi() + vector.lo();
214 vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
216 sum = (double) vector.s[0] + (
double) vector.s[1];
227 template <
typename dataType>
232 int32_t blockSize = pKerPrivArgs->
blockSize;
234 __SE_TEMPLATE_v1 se0Params;
236 dataType *restrict pInLocal1 = (dataType *) pIn1;
237 dataType *restrict pInLocal2 = (dataType *) pIn2;
238 dataType *restrict pOutLocal = (dataType *) pOut;
240 #if DSPLIB_DEBUGPRINT
241 printf(
"Enter DSPLIB_dotp_sqr_exec_ci\n");
244 typedef typename c7x::make_full_vector<dataType>::type vec;
245 int32_t eleCount = c7x::element_count_of<vec>::value;
247 #if DSPLIB_DEBUGPRINT
248 printf(
"Enter eleCount %d\n", eleCount);
250 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
255 __SE0_OPEN(pInLocal1, se0Params);
256 __SE1_OPEN(pInLocal2, se0Params);
258 #if DSPLIB_DEBUGPRINT
259 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
267 out_dotp = (vec) 0.0;
268 out_ab_dotp = (vec) 0.0;
269 out_cd_dotp = (vec) 0.0;
270 out_ef_dotp = (vec) 0.0;
271 out_gh_dotp = (vec) 0.0;
272 dataType result_dotp;
279 out_sqrAdd = (vec) 0.0;
280 out_ab_sqrAdd = (vec) 0.0;
281 out_cd_sqrAdd = (vec) 0.0;
282 out_ef_sqrAdd = (vec) 0.0;
283 out_gh_sqrAdd = (vec) 0.0;
284 dataType result_sqrAdd;
285 for (int32_t counter = 0; counter < blockSize; counter += eleCount * 4) {
286 vec a = c7x::strm_eng<0, vec>::get_adv();
287 vec b = c7x::strm_eng<1, vec>::get_adv();
289 out_ab_dotp += a * b;
291 out_ab_sqrAdd += b * b;
293 vec c = c7x::strm_eng<0, vec>::get_adv();
294 vec d = c7x::strm_eng<1, vec>::get_adv();
296 out_cd_dotp += c * d;
298 out_cd_sqrAdd += d * d;
300 vec e = c7x::strm_eng<0, vec>::get_adv();
301 vec f = c7x::strm_eng<1, vec>::get_adv();
303 out_ef_dotp += e * f;
305 out_ef_sqrAdd += f * f;
307 vec g = c7x::strm_eng<0, vec>::get_adv();
308 vec h = c7x::strm_eng<1, vec>::get_adv();
310 out_gh_dotp += g * h;
312 out_gh_sqrAdd += h * h;
315 out_dotp = out_ab_dotp + out_cd_dotp + out_ef_dotp + out_gh_dotp;
317 out_sqrAdd = out_ab_sqrAdd + out_cd_sqrAdd + out_ef_sqrAdd + out_gh_sqrAdd;
322 *pOutLocal = result_dotp;
323 *++pOutLocal = result_sqrAdd;
344 int32_t blockSize = pKerPrivArgs->
blockSize;
346 __SE_TEMPLATE_v1 se0Params;
348 int16_t *restrict pInLocal1 = (int16_t *) pIn1;
349 int16_t *restrict pInLocal2 = (int16_t *) pIn2;
350 int32_t *restrict pOutLocal = (int32_t *) pOut;
352 #if DSPLIB_DEBUGPRINT
353 printf(
"Enter DSPLIB_dotp_sqr_exec_ci\n");
356 typedef typename c7x::make_full_vector<int16_t>::type vec;
357 int32_t eleCount = c7x::element_count_of<vec>::value;
359 #if DSPLIB_DEBUGPRINT
360 printf(
"Enter eleCount %d\n", eleCount);
362 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
367 __SE0_OPEN(pInLocal1, se0Params);
368 __SE1_OPEN(pInLocal2, se0Params);
370 #if DSPLIB_DEBUGPRINT
371 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
374 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
377 out_dotp = (vec_out) 0;
378 int32_t result_dotp = 0;
381 out_sqrAdd = (vec_out) 0;
382 int32_t result_sqrAdd = 0;
383 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
384 vec a = c7x::strm_eng<0, vec>::get_adv();
385 vec b = c7x::strm_eng<1, vec>::get_adv();
387 out_dotp += __vdotp4hd_vvv(a, b);
389 out_sqrAdd += __vdotp4hd_vvv(b, b);
394 result_dotp = (int32_t) __horizontal_add(out_dotp);
395 result_sqrAdd = (int32_t) __horizontal_add(out_sqrAdd);
397 *pOutLocal = result_dotp;
398 *++pOutLocal = result_sqrAdd;
417 int32_t blockSize = pKerPrivArgs->
blockSize;
419 __SE_TEMPLATE_v1 se0Params;
421 uint8_t *restrict pInLocal1 = (uint8_t *) pIn1;
422 uint8_t *restrict pInLocal2 = (uint8_t *) pIn2;
423 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
425 #if DSPLIB_DEBUGPRINT
426 printf(
"Enter DSPLIB_dotp_sqr_exec_ci\n");
429 typedef typename c7x::make_full_vector<uint8_t>::type vec;
430 int32_t eleCount = c7x::element_count_of<vec>::value;
432 #if DSPLIB_DEBUGPRINT
433 printf(
"Enter eleCount %d\n", eleCount);
435 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
440 __SE0_OPEN(pInLocal1, se0Params);
441 __SE1_OPEN(pInLocal2, se0Params);
443 #if DSPLIB_DEBUGPRINT
444 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
447 typedef typename c7x::make_full_vector<uint32_t>::type vec_out;
450 out_dotp = (vec_out) 0;
451 uint32_t result_dotp = 0;
454 out_sqrAdd = (vec_out) 0;
455 uint32_t result_sqrAdd = 0;
457 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
458 vec a = c7x::strm_eng<0, vec>::get_adv();
459 vec b = c7x::strm_eng<1, vec>::get_adv();
461 out_dotp += __vdotp4ubw_vvv(a, b);
463 out_sqrAdd += __vdotp4ubw_vvv(b, b);
468 result_dotp = (uint32_t) __horizontal_add(out_dotp);
469 result_sqrAdd = (uint32_t) __horizontal_add(out_sqrAdd);
471 *pOutLocal = result_dotp;
472 *++pOutLocal = result_sqrAdd;
491 int32_t blockSize = pKerPrivArgs->
blockSize;
493 __SE_TEMPLATE_v1 se0Params;
495 int16_t *restrict pInLocal1 = (int16_t *) pIn1;
496 int16_t *restrict pInLocal2 = (int16_t *) pIn2;
497 int64_t *restrict pOutLocal = (int64_t *) pOut;
499 #if DSPLIB_DEBUGPRINT
500 printf(
"Enter DSPLIB_dotp_sqr_exec_ci\n");
503 typedef typename c7x::make_full_vector<int16_t>::type vec;
504 int32_t eleCount = c7x::element_count_of<vec>::value;
506 #if DSPLIB_DEBUGPRINT
507 printf(
"Enter eleCount %d\n", eleCount);
509 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
514 __SE0_OPEN(pInLocal1, se0Params);
515 __SE1_OPEN(pInLocal2, se0Params);
517 #if DSPLIB_DEBUGPRINT
518 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
521 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
524 out_dotp = (vec_out) 0;
525 int64_t result_dotp = 0;
528 out_sqrAdd = (vec_out) 0;
529 int64_t result_sqrAdd = 0;
530 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
531 vec a = c7x::strm_eng<0, vec>::get_adv();
532 vec b = c7x::strm_eng<1, vec>::get_adv();
534 out_dotp += __vdotp4hd_vvv(a, b);
536 out_sqrAdd += __vdotp4hd_vvv(b, b);
540 result_dotp = __horizontal_add(out_dotp);
541 result_sqrAdd = __horizontal_add(out_sqrAdd);
543 *pOutLocal = result_dotp;
544 *++pOutLocal = result_sqrAdd;
563 int32_t blockSize = pKerPrivArgs->
blockSize;
565 __SE_TEMPLATE_v1 se0Params;
567 uint16_t *restrict pInLocal1 = (uint16_t *) pIn1;
568 uint16_t *restrict pInLocal2 = (uint16_t *) pIn2;
569 uint64_t *restrict pOutLocal = (uint64_t *) pOut;
571 #if DSPLIB_DEBUGPRINT
572 printf(
"Enter DSPLIB_dotp_sqr_exec_ci\n");
575 typedef typename c7x::make_full_vector<uint16_t>::type vec;
576 int32_t eleCount = c7x::element_count_of<vec>::value;
578 #if DSPLIB_DEBUGPRINT
579 printf(
"Enter eleCount %d\n", eleCount);
581 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
586 __SE0_OPEN(pInLocal1, se0Params);
587 __SE1_OPEN(pInLocal2, se0Params);
589 #if DSPLIB_DEBUGPRINT
590 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
593 typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
596 out_dotp = (vec_out) 0;
597 uint64_t result_dotp = 0;
600 out_sqrAdd = (vec_out) 0;
601 uint64_t result_sqrAdd = 0;
602 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
603 vec a = c7x::strm_eng<0, vec>::get_adv();
604 vec b = c7x::strm_eng<1, vec>::get_adv();
606 out_dotp += __vdotp4uhd_vvv(a, b);
608 out_sqrAdd += __vdotp4uhd_vvv(b, b);
612 result_dotp = __horizontal_add(out_dotp);
613 result_sqrAdd = __horizontal_add(out_sqrAdd);
615 *pOutLocal = result_dotp;
616 *++pOutLocal = result_sqrAdd;
635 int32_t blockSize = pKerPrivArgs->
blockSize;
637 __SE_TEMPLATE_v1 se0Params;
639 int32_t *restrict pInLocal1 = (int32_t *) pIn1;
640 int32_t *restrict pInLocal2 = (int32_t *) pIn2;
641 int64_t *restrict pOutLocal = (int64_t *) pOut;
643 #if DSPLIB_DEBUGPRINT
644 printf(
"Enter DSPLIB_dotp_sqr_exec_ci\n");
647 typedef typename c7x::make_full_vector<int32_t>::type vec;
648 int32_t eleCount = c7x::element_count_of<vec>::value;
650 #if DSPLIB_DEBUGPRINT
651 printf(
"Enter eleCount %d\n", eleCount);
653 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
658 __SE0_OPEN(pInLocal1, se0Params);
659 __SE1_OPEN(pInLocal2, se0Params);
661 #if DSPLIB_DEBUGPRINT
662 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
665 typedef typename c7x::make_full_vector<int64_t>::type vec_out;
670 out_dotp = (vec_out) 0;
671 int64_t result_dotp = 0;
676 out_sqrAdd = (vec_out) 0;
677 int64_t result_sqrAdd = 0;
679 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
680 vec a = c7x::strm_eng<0, vec>::get_adv();
681 vec b = c7x::strm_eng<1, vec>::get_adv();
683 __vmpywd_vvw(a, b, out0_dotp, out1_dotp);
684 out_dotp += (out0_dotp + out1_dotp);
686 __vmpywd_vvw(b, b, out0_sqrAdd, out1_sqrAdd);
687 out_sqrAdd += (out0_sqrAdd + out1_sqrAdd);
691 result_dotp = __horizontal_add(out_dotp);
692 result_sqrAdd = __horizontal_add(out_sqrAdd);
697 *pOutLocal = result_dotp;
698 *++pOutLocal = result_sqrAdd;
717 int32_t blockSize = pKerPrivArgs->
blockSize;
719 __SE_TEMPLATE_v1 se0Params;
721 uint32_t *restrict pInLocal1 = (uint32_t *) pIn1;
722 uint32_t *restrict pInLocal2 = (uint32_t *) pIn2;
723 uint64_t *restrict pOutLocal = (uint64_t *) pOut;
725 #if DSPLIB_DEBUGPRINT
726 printf(
"Enter DSPLIB_dotp_sqr_exec_ci\n");
729 typedef typename c7x::make_full_vector<uint32_t>::type vec;
730 int32_t eleCount = c7x::element_count_of<vec>::value;
732 #if DSPLIB_DEBUGPRINT
733 printf(
"Enter eleCount %d\n", eleCount);
735 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
740 __SE0_OPEN(pInLocal1, se0Params);
741 __SE1_OPEN(pInLocal2, se0Params);
743 #if DSPLIB_DEBUGPRINT
744 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
747 typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
752 out_dotp = (vec_out) 0;
753 uint64_t result_dotp = 0;
758 out_sqrAdd = (vec_out) 0;
759 uint64_t result_sqrAdd = 0;
760 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
761 vec a = c7x::strm_eng<0, vec>::get_adv();
762 vec b = c7x::strm_eng<1, vec>::get_adv();
764 __vmpyuwd_vvw(a, b, out0_dotp, out1_dotp);
765 out_dotp += (out0_dotp + out1_dotp);
767 __vmpyuwd_vvw(b, b, out0_sqrAdd, out1_sqrAdd);
768 out_sqrAdd += (out0_sqrAdd + out1_sqrAdd);
772 result_dotp = __horizontal_add(out_dotp);
773 result_sqrAdd = __horizontal_add(out_sqrAdd);
775 *pOutLocal = result_dotp;
776 *++pOutLocal = result_sqrAdd;
787 void *restrict pOut);
792 void *restrict pOut);
797 void *restrict pOut);
802 void *restrict pOut);
807 void *restrict pOut);
812 void *restrict pOut);
817 void *restrict pOut);
822 void *restrict pOut);
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
#define SE_SE0_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
static float DSPLIB_horiAdd(c7x::float_vec vector)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_dotp_sqr.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_DOTP_SQR_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t blockSize
Size of input buffer for different batches DSPLIB_dotp_sqr_init that will be retrieved and used by DS...