DSPLIB User Guide
DSPLIB_dotprod_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date 9/8/23 Author:
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "../common/c71/DSPLIB_inlines.h"
48 #include "DSPLIB_dotprod_priv.h"
49 #include <float.h>
50 
51 /*******************************************************************************
52  *
53  * DEFINES
54  *
55  ******************************************************************************/
56 
57 #define SE_PARAM_BASE (0x0000)
58 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
59 
60 // Generic initialization
61 template <typename dataType>
63  const DSPLIB_bufParams1D_t *bufParamsIn,
64  const DSPLIB_bufParams1D_t *bufParamsOut,
65  const DSPLIB_dotprod_InitArgs *pKerInitArgs)
66 {
68  __SE_TEMPLATE_v1 se0Params;
69 
70  __SE_ELETYPE SE_ELETYPE;
71  __SE_VECLEN SE_VECLEN;
72 
73  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
74 
75  uint8_t *pBlock = pKerPrivArgs->bufPblock;
76  int32_t blockSize = pKerPrivArgs->blockSize;
77 
78  typedef typename c7x::make_full_vector<dataType>::type vec;
79 
80  SE_VECLEN = c7x::se_veclen<vec>::value;
81  SE_ELETYPE = c7x::se_eletype<vec>::value;
82 
83 #if DSPLIB_DEBUGPRINT
84  int32_t eleCount = c7x::element_count_of<vec>::value;
85  printf("Enter eleCount %d\n", eleCount);
86 #endif
87 
88  /**********************************************************************/
89  /* Prepare streaming engine 1 to fetch the input */
90  /**********************************************************************/
91  se0Params = __gen_SE_TEMPLATE_v1();
92 
93  se0Params.ICNT0 = blockSize;
94  se0Params.ELETYPE = SE_ELETYPE;
95  se0Params.VECLEN = SE_VECLEN;
96  se0Params.DIMFMT = __SE_DIMFMT_1D;
97 
98  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
99 
100  return status;
101 }
102 
103 // int8_t initialization promote to int16_t
104 template <>
106  const DSPLIB_bufParams1D_t *bufParamsIn,
107  const DSPLIB_bufParams1D_t *bufParamsOut,
108  const DSPLIB_dotprod_InitArgs *pKerInitArgs)
109 {
110  DSPLIB_STATUS status = DSPLIB_SUCCESS;
111  __SE_TEMPLATE_v1 se0Params;
112 
113  __SE_ELETYPE SE_ELETYPE;
114  __SE_VECLEN SE_VECLEN;
115 
116  __SE_PROMOTE SE_PROMOTE;
117 
118  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
119 
120  uint8_t *pBlock = pKerPrivArgs->bufPblock;
121  int32_t blockSize = pKerPrivArgs->blockSize;
122 
123 
124  SE_VECLEN = c7x::se_veclen<c7x::short_vec>::value;
125  SE_ELETYPE = c7x::se_eletype<c7x::char_vec>::value;
126  SE_PROMOTE = __SE_PROMOTE_2X_SIGNEXT;
127 #if DSPLIB_DEBUGPRINT
128  int32_t eleCount = c7x::element_count_of<char_vec>::value;
129  printf("Enter eleCount %d\n", eleCount);
130 #endif
131 
132  /**********************************************************************/
133  /* Prepare streaming engine 1 to fetch the input */
134  /**********************************************************************/
135  se0Params = __gen_SE_TEMPLATE_v1();
136 
137  se0Params.ICNT0 = blockSize;
138  se0Params.ELETYPE = SE_ELETYPE;
139  se0Params.VECLEN = SE_VECLEN;
140  se0Params.DIMFMT = __SE_DIMFMT_1D;
141  se0Params.PROMOTE = SE_PROMOTE;
142 
143  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
144 
145  return status;
146 }
147 
149  const DSPLIB_bufParams1D_t *bufParamsIn,
150  const DSPLIB_bufParams1D_t *bufParamsOut,
151  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
152 
154  const DSPLIB_bufParams1D_t *bufParamsIn,
155  const DSPLIB_bufParams1D_t *bufParamsOut,
156  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
157 
159  const DSPLIB_bufParams1D_t *bufParamsIn,
160  const DSPLIB_bufParams1D_t *bufParamsOut,
161  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
162 
164  const DSPLIB_bufParams1D_t *bufParamsIn,
165  const DSPLIB_bufParams1D_t *bufParamsOut,
166  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
167 
169  const DSPLIB_bufParams1D_t *bufParamsIn,
170  const DSPLIB_bufParams1D_t *bufParamsOut,
171  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
172 
174  const DSPLIB_bufParams1D_t *bufParamsIn,
175  const DSPLIB_bufParams1D_t *bufParamsOut,
176  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
177 
179  const DSPLIB_bufParams1D_t *bufParamsIn,
180  const DSPLIB_bufParams1D_t *bufParamsOut,
181  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
182 
184  const DSPLIB_bufParams1D_t *bufParamsIn,
185  const DSPLIB_bufParams1D_t *bufParamsOut,
186  const DSPLIB_dotprod_InitArgs *pKerInitArgs);
187 
188 // This function performs horizontal add of the output vector.
189 // It is used for float and double datat teyps.
190 // The __horizontal_add() intrinsic is used to perform horizontal add on all other datatypes.
191 
192 #pragma FUNC_ALWAYS_INLINE
193 static inline float DSPLIB_horiAdd(c7x::float_vec vector)
194 {
195  float sum = 0;
196 
197  vector.lo() = vector.hi() + vector.lo();
198  vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
199 //#if __C7X_VEC_SIZE_BYTES__ == 64
200  vector.lo().lo().lo() = vector.lo().lo().hi() + vector.lo().lo().lo();
201  sum = (float) vector.s[0] + (float) vector.s[1];
202 
203  return sum;
204 }
205 
206 #pragma FUNC_ALWAYS_INLINE
207 static inline double DSPLIB_horiAdd(c7x::double_vec vector)
208 {
209  double sum = 0;
210 
211  vector.lo() = vector.hi() + vector.lo();
212 
213 //#if __C7X_VEC_SIZE_BYTES__ == 64
214  vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
215 
216  sum = (double) vector.s[0] + (double) vector.s[1];
217 
218  return sum;
219 }
220 
221 /**********************************************************************/
222 /* Execute for datatypes float and double */
223 /**********************************************************************/
224 
225 // This is the generic implementation of exec_ci. It is used for float and double.
226 // Other datatypes have their own explicet implementation.
227 template <typename dataType>
229 DSPLIB_dotprod_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
230 {
231  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
232  int32_t blockSize = pKerPrivArgs->blockSize;
233 
234  __SE_TEMPLATE_v1 se0Params;
235 
236  dataType *restrict pInLocal1 = (dataType *) pIn1;
237  dataType *restrict pInLocal2 = (dataType *) pIn2;
238  dataType *restrict pOutLocal = (dataType *) pOut;
239 
240 #if DSPLIB_DEBUGPRINT
241  printf("Enter DSPLIB_dotprod_exec_ci\n");
242 #endif
243 
244  typedef typename c7x::make_full_vector<dataType>::type vec;
245  int32_t eleCount = c7x::element_count_of<vec>::value;
246 
247  // typedef typename c7x::make_vector<dataType, 4>::type four_element_vec;
248 
249 #if DSPLIB_DEBUGPRINT
250  printf("Enter eleCount %d\n", eleCount);
251 #endif
252  uint8_t *pBlock = pKerPrivArgs->bufPblock;
253 
254  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
255 
256  // Input samples
257  __SE0_OPEN(pInLocal1, se0Params);
258  __SE1_OPEN(pInLocal2, se0Params);
259 
260 #if DSPLIB_DEBUGPRINT
261  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
262 #endif
263 
264  vec out;
265  vec out_ab;
266  vec out_cd;
267  vec out_ef;
268  vec out_gh;
269  out = (vec) 0.0;
270  out_ab = (vec) 0.0;
271  out_cd = (vec) 0.0;
272  out_ef = (vec) 0.0;
273  out_gh = (vec) 0.0;
274 
275  dataType result;
276  for (int32_t counter = 0; counter < blockSize; counter += eleCount * 4) {
277  vec a = c7x::strm_eng<0, vec>::get_adv();
278  vec b = c7x::strm_eng<1, vec>::get_adv();
279 
280  out_ab += a * b;
281 
282  vec c = c7x::strm_eng<0, vec>::get_adv();
283  vec d = c7x::strm_eng<1, vec>::get_adv();
284 
285  out_cd += c * d;
286 
287  vec e = c7x::strm_eng<0, vec>::get_adv();
288  vec f = c7x::strm_eng<1, vec>::get_adv();
289 
290  out_ef += e * f;
291 
292  vec g = c7x::strm_eng<0, vec>::get_adv();
293  vec h = c7x::strm_eng<1, vec>::get_adv();
294 
295  out_gh += g * h;
296  }
297 
298  out = out_ab + out_cd + out_ef + out_gh;
299 
300  // result = DSPLIB_horiAdd<vec, dataType, dataType>(out);
301  result = DSPLIB_horiAdd(out);
302 
303  *pOutLocal = result;
304 
305  __SE0_CLOSE();
306  __SE1_CLOSE();
307 
308  return DSPLIB_SUCCESS;
309 }
310 
311 /**********************************************************************/
312 /* Execute for datatype int8_t */
313 /**********************************************************************/
314 // The input datatype of int8_t is promoted to int16_t using the streaming engine.
315 // The dotprod is then implemented as int16_t.
316 // When completed, the output of int64_t is casted down to int32_t.
317 template <>
319  void *restrict pIn1,
320  void *restrict pIn2,
321  void *restrict pOut)
322 {
323  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
324  int32_t blockSize = pKerPrivArgs->blockSize;
325 
326  __SE_TEMPLATE_v1 se0Params;
327 
328  int16_t *restrict pInLocal1 = (int16_t *) pIn1;
329  int16_t *restrict pInLocal2 = (int16_t *) pIn2;
330  int32_t *restrict pOutLocal = (int32_t *) pOut;
331 
332 #if DSPLIB_DEBUGPRINT
333  printf("Enter DSPLIB_dotprod_exec_ci\n");
334 #endif
335 
336  typedef typename c7x::make_full_vector<int16_t>::type vec;
337  int32_t eleCount = c7x::element_count_of<vec>::value;
338 
339 #if DSPLIB_DEBUGPRINT
340  printf("Enter eleCount %d\n", eleCount);
341 #endif
342  uint8_t *pBlock = pKerPrivArgs->bufPblock;
343 
344  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
345 
346  // Input samples
347  __SE0_OPEN(pInLocal1, se0Params);
348  __SE1_OPEN(pInLocal2, se0Params);
349 
350 #if DSPLIB_DEBUGPRINT
351  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
352 #endif
353 
354  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
355 
356  vec_out out;
357  out = (vec_out) 0;
358  int32_t result = 0;
359  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
360  vec a = c7x::strm_eng<0, vec>::get_adv();
361  vec b = c7x::strm_eng<1, vec>::get_adv();
362 
363  out += __vdotp4hd_vvv(a, b);
364  }
365 
366  // use intrensic for horizontal add
367  // cast the int64_t output of __horizontal_add to int32_t.
368  result = (int32_t) __horizontal_add(out);
369 
370  *pOutLocal = result;
371 
372  __SE0_CLOSE();
373  __SE1_CLOSE();
374 
375  return DSPLIB_SUCCESS;
376 }
377 
378 /**********************************************************************/
379 /* Execute for datatype uint8_t */
380 /**********************************************************************/
381 
382 template <>
384  void *restrict pIn1,
385  void *restrict pIn2,
386  void *restrict pOut)
387 {
388  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
389  int32_t blockSize = pKerPrivArgs->blockSize;
390 
391  __SE_TEMPLATE_v1 se0Params;
392 
393  uint8_t *restrict pInLocal1 = (uint8_t *) pIn1;
394  uint8_t *restrict pInLocal2 = (uint8_t *) pIn2;
395  uint32_t *restrict pOutLocal = (uint32_t *) pOut;
396 
397 #if DSPLIB_DEBUGPRINT
398  printf("Enter DSPLIB_dotprod_exec_ci\n");
399 #endif
400 
401  typedef typename c7x::make_full_vector<uint8_t>::type vec;
402  int32_t eleCount = c7x::element_count_of<vec>::value;
403 
404 #if DSPLIB_DEBUGPRINT
405  printf("Enter eleCount %d\n", eleCount);
406 #endif
407  uint8_t *pBlock = pKerPrivArgs->bufPblock;
408 
409  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
410 
411  // Input samples
412  __SE0_OPEN(pInLocal1, se0Params);
413  __SE1_OPEN(pInLocal2, se0Params);
414 
415 #if DSPLIB_DEBUGPRINT
416  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
417 #endif
418 
419  typedef typename c7x::make_full_vector<uint32_t>::type vec_out;
420 
421  vec_out out;
422  out = (vec_out) 0;
423  uint32_t result = 0;
424 
425  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
426  vec a = c7x::strm_eng<0, vec>::get_adv();
427  vec b = c7x::strm_eng<1, vec>::get_adv();
428 
429  out += __vdotp4ubw_vvv(a, b);
430  }
431  // use intrensic for horizontal add
432  // cast the uint64_t output of __horizontal_add to uint32_t.
433 
434  result = (uint32_t) __horizontal_add(out);
435 
436  *pOutLocal = result;
437 
438  __SE0_CLOSE();
439  __SE1_CLOSE();
440 
441  return DSPLIB_SUCCESS;
442 }
443 
444 /**********************************************************************/
445 /* Execute for datatype int16_t */
446 /**********************************************************************/
447 
448 template <>
450  void *restrict pIn1,
451  void *restrict pIn2,
452  void *restrict pOut)
453 {
454  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
455  int32_t blockSize = pKerPrivArgs->blockSize;
456 
457  __SE_TEMPLATE_v1 se0Params;
458 
459  int16_t *restrict pInLocal1 = (int16_t *) pIn1;
460  int16_t *restrict pInLocal2 = (int16_t *) pIn2;
461  int64_t *restrict pOutLocal = (int64_t *) pOut;
462 
463 #if DSPLIB_DEBUGPRINT
464  printf("Enter DSPLIB_dotprod_exec_ci\n");
465 #endif
466 
467  typedef typename c7x::make_full_vector<int16_t>::type vec; // short16
468  int32_t eleCount = c7x::element_count_of<vec>::value;
469 
470 #if DSPLIB_DEBUGPRINT
471  printf("Enter eleCount %d\n", eleCount);
472 #endif
473  uint8_t *pBlock = pKerPrivArgs->bufPblock;
474 
475  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
476 
477  // Input samples
478  __SE0_OPEN(pInLocal1, se0Params);
479  __SE1_OPEN(pInLocal2, se0Params);
480 
481 #if DSPLIB_DEBUGPRINT
482  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
483 #endif
484 
485  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
486 
487  vec_out out;
488  out = (vec_out) 0;
489  int64_t result = 0;
490  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
491  vec a = c7x::strm_eng<0, vec>::get_adv();
492  vec b = c7x::strm_eng<1, vec>::get_adv();
493 
494  out += __vdotp4hd_vvv(a, b);
495  }
496 
497  // use intrensic for horizontal add
498  result = __horizontal_add(out);
499 
500  *pOutLocal = result;
501 
502  __SE0_CLOSE();
503  __SE1_CLOSE();
504 
505  return DSPLIB_SUCCESS;
506 }
507 
508 /**********************************************************************/
509 /* Execute for datatype uint16_t */
510 /**********************************************************************/
511 
512 template <>
514  void *restrict pIn1,
515  void *restrict pIn2,
516  void *restrict pOut)
517 {
518  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
519  int32_t blockSize = pKerPrivArgs->blockSize;
520 
521  __SE_TEMPLATE_v1 se0Params;
522 
523  uint16_t *restrict pInLocal1 = (uint16_t *) pIn1;
524  uint16_t *restrict pInLocal2 = (uint16_t *) pIn2;
525  uint64_t *restrict pOutLocal = (uint64_t *) pOut;
526 
527 #if DSPLIB_DEBUGPRINT
528  printf("Enter DSPLIB_dotprod_exec_ci\n");
529 #endif
530 
531  typedef typename c7x::make_full_vector<uint16_t>::type vec;
532  int32_t eleCount = c7x::element_count_of<vec>::value;
533 
534 #if DSPLIB_DEBUGPRINT
535  printf("Enter eleCount %d\n", eleCount);
536 #endif
537  uint8_t *pBlock = pKerPrivArgs->bufPblock;
538 
539  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
540 
541  // Input samples
542  __SE0_OPEN(pInLocal1, se0Params);
543  __SE1_OPEN(pInLocal2, se0Params);
544 
545 #if DSPLIB_DEBUGPRINT
546  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
547 #endif
548 
549  typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
550 
551  vec_out out;
552  out = (vec_out) 0;
553  uint64_t result = 0;
554  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
555  vec a = c7x::strm_eng<0, vec>::get_adv();
556  vec b = c7x::strm_eng<1, vec>::get_adv();
557 
558  out += __vdotp4uhd_vvv(a, b);
559  }
560 
561  // use intrensic for horizontal add
562  result = __horizontal_add(out);
563 
564  *pOutLocal = result;
565 
566  __SE0_CLOSE();
567  __SE1_CLOSE();
568 
569  return DSPLIB_SUCCESS;
570 }
571 
572 /**********************************************************************/
573 /* Execute for datatype int32_t */
574 /**********************************************************************/
575 
576 template <>
578  void *restrict pIn1,
579  void *restrict pIn2,
580  void *restrict pOut)
581 {
582  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
583  int32_t blockSize = pKerPrivArgs->blockSize;
584 
585  __SE_TEMPLATE_v1 se0Params;
586 
587  int32_t *restrict pInLocal1 = (int32_t *) pIn1;
588  int32_t *restrict pInLocal2 = (int32_t *) pIn2;
589  int64_t *restrict pOutLocal = (int64_t *) pOut;
590 
591 #if DSPLIB_DEBUGPRINT
592  printf("Enter DSPLIB_dotprod_exec_ci\n");
593 #endif
594 
595  typedef typename c7x::make_full_vector<int32_t>::type vec; // short16
596  int32_t eleCount = c7x::element_count_of<vec>::value;
597 
598 #if DSPLIB_DEBUGPRINT
599  printf("Enter eleCount %d\n", eleCount);
600 #endif
601  uint8_t *pBlock = pKerPrivArgs->bufPblock;
602 
603  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
604 
605  // Input samples
606  __SE0_OPEN(pInLocal1, se0Params);
607  __SE1_OPEN(pInLocal2, se0Params);
608 
609 #if DSPLIB_DEBUGPRINT
610  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
611 #endif
612 
613  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
614 
615  vec_out out;
616  vec_out out0;
617  vec_out out1;
618  out = (vec_out) 0;
619  int64_t result = 0;
620  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
621  vec a = c7x::strm_eng<0, vec>::get_adv();
622  vec b = c7x::strm_eng<1, vec>::get_adv();
623 
624  __vmpywd_vvw(a, b, out0, out1);
625  out += (out0 + out1);
626  }
627 
628  // use intrensic for horizontal add
629  result = __horizontal_add(out);
630 
631  *pOutLocal = result;
632 
633  __SE0_CLOSE();
634  __SE1_CLOSE();
635 
636  return DSPLIB_SUCCESS;
637 }
638 
639 /**********************************************************************/
640 /* Execute for datatype uint32_t */
641 /**********************************************************************/
642 
643 template <>
645  void *restrict pIn1,
646  void *restrict pIn2,
647  void *restrict pOut)
648 {
649  DSPLIB_dotprod_PrivArgs *pKerPrivArgs = (DSPLIB_dotprod_PrivArgs *) handle;
650  int32_t blockSize = pKerPrivArgs->blockSize;
651 
652  __SE_TEMPLATE_v1 se0Params;
653 
654  uint32_t *restrict pInLocal1 = (uint32_t *) pIn1;
655  uint32_t *restrict pInLocal2 = (uint32_t *) pIn2;
656  uint64_t *restrict pOutLocal = (uint64_t *) pOut;
657 
658 #if DSPLIB_DEBUGPRINT
659  printf("Enter DSPLIB_dotprod_exec_ci\n");
660 #endif
661 
662  typedef typename c7x::make_full_vector<uint32_t>::type vec; // short16
663  int32_t eleCount = c7x::element_count_of<vec>::value;
664 
665 #if DSPLIB_DEBUGPRINT
666  printf("Enter eleCount %d\n", eleCount);
667 #endif
668  uint8_t *pBlock = pKerPrivArgs->bufPblock;
669 
670  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
671 
672  // Input samples
673  __SE0_OPEN(pInLocal1, se0Params);
674  __SE1_OPEN(pInLocal2, se0Params);
675 
676 #if DSPLIB_DEBUGPRINT
677  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
678 #endif
679 
680  typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
681 
682  vec_out out;
683  vec_out out0;
684  vec_out out1;
685  out = (vec_out) 0;
686  uint64_t result = 0;
687  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
688  vec a = c7x::strm_eng<0, vec>::get_adv();
689  vec b = c7x::strm_eng<1, vec>::get_adv();
690 
691  // out += __vdotp2xwd_vvv(a, b); This did not provide the correct result
692 
693  __vmpyuwd_vvw(a, b, out0, out1);
694  out += (out0 + out1);
695  }
696 
697  // use intrensic for horizontal add
698  result = __horizontal_add(out);
699 
700  *pOutLocal = result;
701 
702  __SE0_CLOSE();
703  __SE1_CLOSE();
704 
705  return DSPLIB_SUCCESS;
706 }
707 
709  void *restrict pIn1,
710  void *restrict pIn2,
711  void *restrict pOut);
712 
714  void *restrict pIn1,
715  void *restrict pIn2,
716  void *restrict pOut);
717 
719  void *restrict pIn1,
720  void *restrict pIn2,
721  void *restrict pOut);
722 
724  void *restrict pIn1,
725  void *restrict pIn2,
726  void *restrict pOut);
727 
729  void *restrict pIn1,
730  void *restrict pIn2,
731  void *restrict pOut);
732 
734  void *restrict pIn1,
735  void *restrict pIn2,
736  void *restrict pOut);
737 
739  void *restrict pIn1,
740  void *restrict pIn2,
741  void *restrict pOut);
742 
744  void *restrict pIn1,
745  void *restrict pIn2,
746  void *restrict pOut);
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
#define SE_SE0_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
static float DSPLIB_horiAdd(c7x::float_vec vector)
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotprod_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotprod_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotprod_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_dotprod.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
int32_t blockSize
Size of input buffer for different batches DSPLIB_dotprod_init that will be retrieved and used by DSP...
uint8_t bufPblock[DSPLIB_DOTPROD_IXX_IXX_OXX_PBLOCK_SIZE]