DSPLIB User Guide
DSPLIB_dotprod_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date 9/8/23 Author:
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "../common/c71/DSPLIB_inlines.h"
48 #include "DSPLIB_dotprod_priv.h"
49 #include <float.h>
50 
51 /*******************************************************************************
52  *
53  * DEFINES
54  *
55  ******************************************************************************/
56 
57 #define SE_PARAM_BASE (0x0000)
58 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
59 
60 // Generic initialization
61 template <typename dataType>
63  const DSPLIB_bufParams1D_t *bufParamsIn,
64  const DSPLIB_bufParams1D_t *bufParamsOut,
65  const DSPLIB_dotprod_InitArgs *pKerInitArgs)
66 {
68  __SE_TEMPLATE_v1 se0Params;
69 
70  __SE_ELETYPE SE_ELETYPE;
71  __SE_VECLEN SE_VECLEN;
72 
73  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
74 
75  uint8_t *pBlock = pKerPrivArgs->bufPblock;
76  int32_t blockSize = pKerPrivArgs->blockSize;
77 
78  typedef typename c7x::make_full_vector<dataType>::type vec;
79 
80  SE_VECLEN = c7x::se_veclen<vec>::value;
81  SE_ELETYPE = c7x::se_eletype<vec>::value;
82 
83 #if DSPLIB_DEBUGPRINT
84  int32_t eleCount = c7x::element_count_of<vec>::value;
85  printf("Enter eleCount %d\n", eleCount);
86 #endif
87 
88  /**********************************************************************/
89  /* Prepare streaming engine 1 to fetch the input */
90  /**********************************************************************/
91  se0Params = __gen_SE_TEMPLATE_v1();
92 
93  se0Params.ICNT0 = blockSize;
94  se0Params.ELETYPE = SE_ELETYPE;
95  se0Params.VECLEN = SE_VECLEN;
96  se0Params.DIMFMT = __SE_DIMFMT_1D;
97 
98  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
99 
100  return status;
101 }
102 
103 // int8_t initialization promote to int16_t
104 template <>
106  const DSPLIB_bufParams1D_t *bufParamsIn,
107  const DSPLIB_bufParams1D_t *bufParamsOut,
108  const DSPLIB_dotprod_InitArgs *pKerInitArgs)
109 {
110  DSPLIB_STATUS status = DSPLIB_SUCCESS;
111  __SE_TEMPLATE_v1 se0Params;
112 
113  __SE_ELETYPE SE_ELETYPE;
114  __SE_VECLEN SE_VECLEN;
115 
116  __SE_PROMOTE SE_PROMOTE;
117 
118  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
119 
120  uint8_t *pBlock = pKerPrivArgs->bufPblock;
121  int32_t blockSize = pKerPrivArgs->blockSize;
122 
123 
124  SE_VECLEN = c7x::se_veclen<c7x::short_vec>::value;
125  SE_ELETYPE = c7x::se_eletype<c7x::char_vec>::value;
126  SE_PROMOTE = __SE_PROMOTE_2X_SIGNEXT;
127 #if DSPLIB_DEBUGPRINT
128  int32_t eleCount = c7x::element_count_of<char_vec>::value;
129  printf("Enter eleCount %d\n", eleCount);
130 #endif
131 
132  /**********************************************************************/
133  /* Prepare streaming engine 1 to fetch the input */
134  /**********************************************************************/
135  se0Params = __gen_SE_TEMPLATE_v1();
136 
137  se0Params.ICNT0 = blockSize;
138  se0Params.ELETYPE = SE_ELETYPE;
139  se0Params.VECLEN = SE_VECLEN;
140  se0Params.DIMFMT = __SE_DIMFMT_1D;
141  se0Params.PROMOTE = SE_PROMOTE;
142 
143  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
144 
145  return status;
146 }
147 
149  const DSPLIB_bufParams1D_t *bufParamsIn,
150  const DSPLIB_bufParams1D_t *bufParamsOut,
151  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
152 
154  const DSPLIB_bufParams1D_t *bufParamsIn,
155  const DSPLIB_bufParams1D_t *bufParamsOut,
156  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
157 
159  const DSPLIB_bufParams1D_t *bufParamsIn,
160  const DSPLIB_bufParams1D_t *bufParamsOut,
161  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
162 
164  const DSPLIB_bufParams1D_t *bufParamsIn,
165  const DSPLIB_bufParams1D_t *bufParamsOut,
166  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
167 
169  const DSPLIB_bufParams1D_t *bufParamsIn,
170  const DSPLIB_bufParams1D_t *bufParamsOut,
171  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
172 
174  const DSPLIB_bufParams1D_t *bufParamsIn,
175  const DSPLIB_bufParams1D_t *bufParamsOut,
176  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
177 
179  const DSPLIB_bufParams1D_t *bufParamsIn,
180  const DSPLIB_bufParams1D_t *bufParamsOut,
181  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
182 
184  const DSPLIB_bufParams1D_t *bufParamsIn,
185  const DSPLIB_bufParams1D_t *bufParamsOut,
186  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
187 
188 // This function performs horizontal add of the output vector.
189 // It is used for float and double datat teyps.
190 // The __horizontal_add() intrinsic is used to perform horizontal add on all other datatypes.
191 
192 #pragma FUNC_ALWAYS_INLINE
193 static inline float DSPLIB_horiAdd(c7x::float_vec vector)
194 {
195  float sum = 0;
196 
197  vector.lo() = vector.hi() + vector.lo();
198  vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
199 //#if __C7X_VEC_SIZE_BYTES__ == 64
200  sum = (float) vector.s[0] + (float) vector.s[1];
201 
202  return sum;
203 }
204 
205 #pragma FUNC_ALWAYS_INLINE
206 static inline double DSPLIB_horiAdd(c7x::double_vec vector)
207 {
208  double sum = 0;
209 
210  vector.lo() = vector.hi() + vector.lo();
211 
212 //#if __C7X_VEC_SIZE_BYTES__ == 64
213 
214  sum = (double) vector.s[0] + (double) vector.s[1];
215 
216  return sum;
217 }
218 
219 /**********************************************************************/
220 /* Execute for datatypes float and double */
221 /**********************************************************************/
222 
223 // This is the generic implementation of exec_ci. It is used for float and double.
224 // Other datatypes have their own explicet implementation.
225 template <typename dataType>
227 DSPLIB_dotprod_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
228 {
229  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
230  int32_t blockSize = pKerPrivArgs->blockSize;
231 
232  __SE_TEMPLATE_v1 se0Params;
233 
234  dataType *restrict pInLocal1 = (dataType *) pIn1;
235  dataType *restrict pInLocal2 = (dataType *) pIn2;
236  dataType *restrict pOutLocal = (dataType *) pOut;
237 
238 #if DSPLIB_DEBUGPRINT
239  printf("Enter DSPLIB_dotprod_exec_ci\n");
240 #endif
241 
242  typedef typename c7x::make_full_vector<dataType>::type vec;
243  int32_t eleCount = c7x::element_count_of<vec>::value;
244 
245  // typedef typename c7x::make_vector<dataType, 4>::type four_element_vec;
246 
247 #if DSPLIB_DEBUGPRINT
248  printf("Enter eleCount %d\n", eleCount);
249 #endif
250  uint8_t *pBlock = pKerPrivArgs->bufPblock;
251 
252  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
253 
254  // Input samples
255  __SE0_OPEN(pInLocal1, se0Params);
256  __SE1_OPEN(pInLocal2, se0Params);
257 
258 #if DSPLIB_DEBUGPRINT
259  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
260 #endif
261 
262  vec out;
263  vec out_ab;
264  vec out_cd;
265  vec out_ef;
266  vec out_gh;
267  out = (vec) 0.0;
268  out_ab = (vec) 0.0;
269  out_cd = (vec) 0.0;
270  out_ef = (vec) 0.0;
271  out_gh = (vec) 0.0;
272 
273  dataType result;
274  for (int32_t counter = 0; counter < blockSize; counter += eleCount * 4) {
275  vec a = c7x::strm_eng<0, vec>::get_adv();
276  vec b = c7x::strm_eng<1, vec>::get_adv();
277 
278  out_ab += a * b;
279 
280  vec c = c7x::strm_eng<0, vec>::get_adv();
281  vec d = c7x::strm_eng<1, vec>::get_adv();
282 
283  out_cd += c * d;
284 
285  vec e = c7x::strm_eng<0, vec>::get_adv();
286  vec f = c7x::strm_eng<1, vec>::get_adv();
287 
288  out_ef += e * f;
289 
290  vec g = c7x::strm_eng<0, vec>::get_adv();
291  vec h = c7x::strm_eng<1, vec>::get_adv();
292 
293  out_gh += g * h;
294  }
295 
296  out = out_ab + out_cd + out_ef + out_gh;
297 
298  // result = DSPLIB_horiAdd<vec, dataType, dataType>(out);
299  result = DSPLIB_horiAdd(out);
300 
301  *pOutLocal = result;
302 
303  __SE0_CLOSE();
304  __SE1_CLOSE();
305 
306  return DSPLIB_SUCCESS;
307 }
308 
309 /**********************************************************************/
310 /* Execute for datatype int8_t */
311 /**********************************************************************/
312 // The input datatype of int8_t is promoted to int16_t using the streaming engine.
313 // The dotprod is then implemented as int16_t.
314 // When completed, the output of int64_t is casted down to int32_t.
315 template <>
317  void *restrict pIn1,
318  void *restrict pIn2,
319  void *restrict pOut)
320 {
321  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
322  int32_t blockSize = pKerPrivArgs->blockSize;
323 
324  __SE_TEMPLATE_v1 se0Params;
325 
326  int16_t *restrict pInLocal1 = (int16_t *) pIn1;
327  int16_t *restrict pInLocal2 = (int16_t *) pIn2;
328  int32_t *restrict pOutLocal = (int32_t *) pOut;
329 
330 #if DSPLIB_DEBUGPRINT
331  printf("Enter DSPLIB_dotprod_exec_ci\n");
332 #endif
333 
334  typedef typename c7x::make_full_vector<int16_t>::type vec;
335  int32_t eleCount = c7x::element_count_of<vec>::value;
336 
337 #if DSPLIB_DEBUGPRINT
338  printf("Enter eleCount %d\n", eleCount);
339 #endif
340  uint8_t *pBlock = pKerPrivArgs->bufPblock;
341 
342  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
343 
344  // Input samples
345  __SE0_OPEN(pInLocal1, se0Params);
346  __SE1_OPEN(pInLocal2, se0Params);
347 
348 #if DSPLIB_DEBUGPRINT
349  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
350 #endif
351 
352  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
353 
354  vec_out out;
355  out = (vec_out) 0;
356  int32_t result = 0;
357  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
358  vec a = c7x::strm_eng<0, vec>::get_adv();
359  vec b = c7x::strm_eng<1, vec>::get_adv();
360 
361  out += __vdotp4hd_vvv(a, b);
362  }
363 
364  // use intrensic for horizontal add
365  // cast the int64_t output of __horizontal_add to int32_t.
366  result = (int32_t) __horizontal_add(out);
367 
368  *pOutLocal = result;
369 
370  __SE0_CLOSE();
371  __SE1_CLOSE();
372 
373  return DSPLIB_SUCCESS;
374 }
375 
376 /**********************************************************************/
377 /* Execute for datatype uint8_t */
378 /**********************************************************************/
379 
380 template <>
382  void *restrict pIn1,
383  void *restrict pIn2,
384  void *restrict pOut)
385 {
386  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
387  int32_t blockSize = pKerPrivArgs->blockSize;
388 
389  __SE_TEMPLATE_v1 se0Params;
390 
391  uint8_t *restrict pInLocal1 = (uint8_t *) pIn1;
392  uint8_t *restrict pInLocal2 = (uint8_t *) pIn2;
393  uint32_t *restrict pOutLocal = (uint32_t *) pOut;
394 
395 #if DSPLIB_DEBUGPRINT
396  printf("Enter DSPLIB_dotprod_exec_ci\n");
397 #endif
398 
399  typedef typename c7x::make_full_vector<uint8_t>::type vec;
400  int32_t eleCount = c7x::element_count_of<vec>::value;
401 
402 #if DSPLIB_DEBUGPRINT
403  printf("Enter eleCount %d\n", eleCount);
404 #endif
405  uint8_t *pBlock = pKerPrivArgs->bufPblock;
406 
407  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
408 
409  // Input samples
410  __SE0_OPEN(pInLocal1, se0Params);
411  __SE1_OPEN(pInLocal2, se0Params);
412 
413 #if DSPLIB_DEBUGPRINT
414  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
415 #endif
416 
417  typedef typename c7x::make_full_vector<uint32_t>::type vec_out;
418 
419  vec_out out;
420  out = (vec_out) 0;
421  uint32_t result = 0;
422 
423  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
424  vec a = c7x::strm_eng<0, vec>::get_adv();
425  vec b = c7x::strm_eng<1, vec>::get_adv();
426 
427  out += __vdotp4ubw_vvv(a, b);
428  }
429  // use intrensic for horizontal add
430  // cast the uint64_t output of __horizontal_add to uint32_t.
431 
432  result = (uint32_t) __horizontal_add(out);
433 
434  *pOutLocal = result;
435 
436  __SE0_CLOSE();
437  __SE1_CLOSE();
438 
439  return DSPLIB_SUCCESS;
440 }
441 
442 /**********************************************************************/
443 /* Execute for datatype int16_t */
444 /**********************************************************************/
445 
446 template <>
448  void *restrict pIn1,
449  void *restrict pIn2,
450  void *restrict pOut)
451 {
452  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
453  int32_t blockSize = pKerPrivArgs->blockSize;
454 
455  __SE_TEMPLATE_v1 se0Params;
456 
457  int16_t *restrict pInLocal1 = (int16_t *) pIn1;
458  int16_t *restrict pInLocal2 = (int16_t *) pIn2;
459  int64_t *restrict pOutLocal = (int64_t *) pOut;
460 
461 #if DSPLIB_DEBUGPRINT
462  printf("Enter DSPLIB_dotprod_exec_ci\n");
463 #endif
464 
465  typedef typename c7x::make_full_vector<int16_t>::type vec; // short16
466  int32_t eleCount = c7x::element_count_of<vec>::value;
467 
468 #if DSPLIB_DEBUGPRINT
469  printf("Enter eleCount %d\n", eleCount);
470 #endif
471  uint8_t *pBlock = pKerPrivArgs->bufPblock;
472 
473  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
474 
475  // Input samples
476  __SE0_OPEN(pInLocal1, se0Params);
477  __SE1_OPEN(pInLocal2, se0Params);
478 
479 #if DSPLIB_DEBUGPRINT
480  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
481 #endif
482 
483  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
484 
485  vec_out out;
486  out = (vec_out) 0;
487  int64_t result = 0;
488  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
489  vec a = c7x::strm_eng<0, vec>::get_adv();
490  vec b = c7x::strm_eng<1, vec>::get_adv();
491 
492  out += __vdotp4hd_vvv(a, b);
493  }
494 
495  // use intrensic for horizontal add
496  result = __horizontal_add(out);
497 
498  *pOutLocal = result;
499 
500  __SE0_CLOSE();
501  __SE1_CLOSE();
502 
503  return DSPLIB_SUCCESS;
504 }
505 
506 /**********************************************************************/
507 /* Execute for datatype uint16_t */
508 /**********************************************************************/
509 
510 template <>
512  void *restrict pIn1,
513  void *restrict pIn2,
514  void *restrict pOut)
515 {
516  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
517  int32_t blockSize = pKerPrivArgs->blockSize;
518 
519  __SE_TEMPLATE_v1 se0Params;
520 
521  uint16_t *restrict pInLocal1 = (uint16_t *) pIn1;
522  uint16_t *restrict pInLocal2 = (uint16_t *) pIn2;
523  uint64_t *restrict pOutLocal = (uint64_t *) pOut;
524 
525 #if DSPLIB_DEBUGPRINT
526  printf("Enter DSPLIB_dotprod_exec_ci\n");
527 #endif
528 
529  typedef typename c7x::make_full_vector<uint16_t>::type vec;
530  int32_t eleCount = c7x::element_count_of<vec>::value;
531 
532 #if DSPLIB_DEBUGPRINT
533  printf("Enter eleCount %d\n", eleCount);
534 #endif
535  uint8_t *pBlock = pKerPrivArgs->bufPblock;
536 
537  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
538 
539  // Input samples
540  __SE0_OPEN(pInLocal1, se0Params);
541  __SE1_OPEN(pInLocal2, se0Params);
542 
543 #if DSPLIB_DEBUGPRINT
544  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
545 #endif
546 
547  typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
548 
549  vec_out out;
550  out = (vec_out) 0;
551  uint64_t result = 0;
552  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
553  vec a = c7x::strm_eng<0, vec>::get_adv();
554  vec b = c7x::strm_eng<1, vec>::get_adv();
555 
556  out += __vdotp4uhd_vvv(a, b);
557  }
558 
559  // use intrensic for horizontal add
560  result = __horizontal_add(out);
561 
562  *pOutLocal = result;
563 
564  __SE0_CLOSE();
565  __SE1_CLOSE();
566 
567  return DSPLIB_SUCCESS;
568 }
569 
570 /**********************************************************************/
571 /* Execute for datatype int32_t */
572 /**********************************************************************/
573 
574 template <>
576  void *restrict pIn1,
577  void *restrict pIn2,
578  void *restrict pOut)
579 {
580  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
581  int32_t blockSize = pKerPrivArgs->blockSize;
582 
583  __SE_TEMPLATE_v1 se0Params;
584 
585  int32_t *restrict pInLocal1 = (int32_t *) pIn1;
586  int32_t *restrict pInLocal2 = (int32_t *) pIn2;
587  int64_t *restrict pOutLocal = (int64_t *) pOut;
588 
589 #if DSPLIB_DEBUGPRINT
590  printf("Enter DSPLIB_dotprod_exec_ci\n");
591 #endif
592 
593  typedef typename c7x::make_full_vector<int32_t>::type vec; // short16
594  int32_t eleCount = c7x::element_count_of<vec>::value;
595 
596 #if DSPLIB_DEBUGPRINT
597  printf("Enter eleCount %d\n", eleCount);
598 #endif
599  uint8_t *pBlock = pKerPrivArgs->bufPblock;
600 
601  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
602 
603  // Input samples
604  __SE0_OPEN(pInLocal1, se0Params);
605  __SE1_OPEN(pInLocal2, se0Params);
606 
607 #if DSPLIB_DEBUGPRINT
608  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
609 #endif
610 
611  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
612 
613  vec_out out;
614  vec_out out0;
615  vec_out out1;
616  out = (vec_out) 0;
617  int64_t result = 0;
618  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
619  vec a = c7x::strm_eng<0, vec>::get_adv();
620  vec b = c7x::strm_eng<1, vec>::get_adv();
621 
622  __vmpywd_vvw(a, b, out0, out1);
623  out += (out0 + out1);
624  }
625 
626  // use intrensic for horizontal add
627  result = __horizontal_add(out);
628 
629  *pOutLocal = result;
630 
631  __SE0_CLOSE();
632  __SE1_CLOSE();
633 
634  return DSPLIB_SUCCESS;
635 }
636 
637 /**********************************************************************/
638 /* Execute for datatype uint32_t */
639 /**********************************************************************/
640 
641 template <>
643  void *restrict pIn1,
644  void *restrict pIn2,
645  void *restrict pOut)
646 {
647  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
648  int32_t blockSize = pKerPrivArgs->blockSize;
649 
650  __SE_TEMPLATE_v1 se0Params;
651 
652  uint32_t *restrict pInLocal1 = (uint32_t *) pIn1;
653  uint32_t *restrict pInLocal2 = (uint32_t *) pIn2;
654  uint64_t *restrict pOutLocal = (uint64_t *) pOut;
655 
656 #if DSPLIB_DEBUGPRINT
657  printf("Enter DSPLIB_dotprod_exec_ci\n");
658 #endif
659 
660  typedef typename c7x::make_full_vector<uint32_t>::type vec; // short16
661  int32_t eleCount = c7x::element_count_of<vec>::value;
662 
663 #if DSPLIB_DEBUGPRINT
664  printf("Enter eleCount %d\n", eleCount);
665 #endif
666  uint8_t *pBlock = pKerPrivArgs->bufPblock;
667 
668  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
669 
670  // Input samples
671  __SE0_OPEN(pInLocal1, se0Params);
672  __SE1_OPEN(pInLocal2, se0Params);
673 
674 #if DSPLIB_DEBUGPRINT
675  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
676 #endif
677 
678  typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
679 
680  vec_out out;
681  vec_out out0;
682  vec_out out1;
683  out = (vec_out) 0;
684  uint64_t result = 0;
685  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
686  vec a = c7x::strm_eng<0, vec>::get_adv();
687  vec b = c7x::strm_eng<1, vec>::get_adv();
688 
689  // out += __vdotp2xwd_vvv(a, b); This did not provide the correct result
690 
691  __vmpyuwd_vvw(a, b, out0, out1);
692  out += (out0 + out1);
693  }
694 
695  // use intrensic for horizontal add
696  result = __horizontal_add(out);
697 
698  *pOutLocal = result;
699 
700  __SE0_CLOSE();
701  __SE1_CLOSE();
702 
703  return DSPLIB_SUCCESS;
704 }
705 
707  void *restrict pIn1,
708  void *restrict pIn2,
709  void *restrict pOut);
710 
712  void *restrict pIn1,
713  void *restrict pIn2,
714  void *restrict pOut);
715 
717  void *restrict pIn1,
718  void *restrict pIn2,
719  void *restrict pOut);
720 
722  void *restrict pIn1,
723  void *restrict pIn2,
724  void *restrict pOut);
725 
727  void *restrict pIn1,
728  void *restrict pIn2,
729  void *restrict pOut);
730 
732  void *restrict pIn1,
733  void *restrict pIn2,
734  void *restrict pOut);
735 
737  void *restrict pIn1,
738  void *restrict pIn2,
739  void *restrict pOut);
740 
742  void *restrict pIn1,
743  void *restrict pIn2,
744  void *restrict pOut);
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
#define SE_SE0_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
static float DSPLIB_horiAdd(c7x::float_vec vector)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_dotprod.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t blockSize
Size of input buffer for different batches DSPLIB_dotprod_init that will be retrieved and used by DSP...
uint8_t bufPblock[DSPLIB_DOTPROD_IXX_IXX_OXX_PBLOCK_SIZE]