DSPLIB User Guide
DSPLIB_dotp_sqr_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date 9/8/23 Author:
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "../common/c71/DSPLIB_inlines.h"
48 #include "DSPLIB_dotp_sqr_priv.h"
49 #include <float.h>
50 
51 /*******************************************************************************
52  *
53  * DEFINES
54  *
55  ******************************************************************************/
56 
57 #define SE_PARAM_BASE (0x0000)
58 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
59 
60 // Generic initialization
61 template <typename dataType>
63  const DSPLIB_bufParams1D_t *bufParamsIn,
64  const DSPLIB_bufParams1D_t *bufParamsOut,
65  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
66 {
68  __SE_TEMPLATE_v1 se0Params;
69 
70  __SE_ELETYPE SE_ELETYPE;
71  __SE_VECLEN SE_VECLEN;
72 
73  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
74 
75  uint8_t *pBlock = pKerPrivArgs->bufPblock;
76  int32_t blockSize = pKerPrivArgs->blockSize;
77 
78  typedef typename c7x::make_full_vector<dataType>::type vec;
79 
80  SE_VECLEN = c7x::se_veclen<vec>::value;
81  SE_ELETYPE = c7x::se_eletype<vec>::value;
82 
83 #if DSPLIB_DEBUGPRINT
84  int32_t eleCount = c7x::element_count_of<vec>::value;
85  printf("Enter eleCount %d\n", eleCount);
86 #endif
87 
88  /**********************************************************************/
89  /* Prepare streaming engine 1 to fetch the input */
90  /**********************************************************************/
91  se0Params = __gen_SE_TEMPLATE_v1();
92 
93  se0Params.ICNT0 = blockSize;
94  se0Params.ELETYPE = SE_ELETYPE;
95  se0Params.VECLEN = SE_VECLEN;
96  se0Params.DIMFMT = __SE_DIMFMT_1D;
97 
98  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
99 
100  return status;
101 }
102 
103 // int8_t initialization promote to int16_t
104 template <>
106  const DSPLIB_bufParams1D_t *bufParamsIn,
107  const DSPLIB_bufParams1D_t *bufParamsOut,
108  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
109 {
110  DSPLIB_STATUS status = DSPLIB_SUCCESS;
111  __SE_TEMPLATE_v1 se0Params;
112 
113  __SE_ELETYPE SE_ELETYPE;
114  __SE_VECLEN SE_VECLEN;
115 
116  __SE_PROMOTE SE_PROMOTE;
117 
118  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
119 
120  uint8_t *pBlock = pKerPrivArgs->bufPblock;
121  int32_t blockSize = pKerPrivArgs->blockSize;
122 
123 
124  SE_VECLEN = c7x::se_veclen<c7x::short_vec>::value;
125  SE_ELETYPE = c7x::se_eletype<c7x::char_vec>::value;
126  SE_PROMOTE = __SE_PROMOTE_2X_SIGNEXT;
127 #if DSPLIB_DEBUGPRINT
128  int32_t eleCount = c7x::element_count_of<char_vec>::value;
129  printf("Enter eleCount %d\n", eleCount);
130 #endif
131 
132  /**********************************************************************/
133  /* Prepare streaming engine 1 to fetch the input */
134  /**********************************************************************/
135  se0Params = __gen_SE_TEMPLATE_v1();
136 
137  se0Params.ICNT0 = blockSize;
138  se0Params.ELETYPE = SE_ELETYPE;
139  se0Params.VECLEN = SE_VECLEN;
140  se0Params.DIMFMT = __SE_DIMFMT_1D;
141  se0Params.PROMOTE = SE_PROMOTE;
142 
143  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
144 
145  return status;
146 }
147 
149  const DSPLIB_bufParams1D_t *bufParamsIn,
150  const DSPLIB_bufParams1D_t *bufParamsOut,
151  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
152 
154  const DSPLIB_bufParams1D_t *bufParamsIn,
155  const DSPLIB_bufParams1D_t *bufParamsOut,
156  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
157 
159  const DSPLIB_bufParams1D_t *bufParamsIn,
160  const DSPLIB_bufParams1D_t *bufParamsOut,
161  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
162 
164  const DSPLIB_bufParams1D_t *bufParamsIn,
165  const DSPLIB_bufParams1D_t *bufParamsOut,
166  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
167 
169  const DSPLIB_bufParams1D_t *bufParamsIn,
170  const DSPLIB_bufParams1D_t *bufParamsOut,
171  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
172 
174  const DSPLIB_bufParams1D_t *bufParamsIn,
175  const DSPLIB_bufParams1D_t *bufParamsOut,
176  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
177 
179  const DSPLIB_bufParams1D_t *bufParamsIn,
180  const DSPLIB_bufParams1D_t *bufParamsOut,
181  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
182 
184  const DSPLIB_bufParams1D_t *bufParamsIn,
185  const DSPLIB_bufParams1D_t *bufParamsOut,
186  const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs);
187 
188 // This function performs horizontal add of the output vector.
189 // It is used for float and double datat teyps.
190 // The __horizontal_add() intrinsic is used to perform horizontal add on all other datatypes.
191 
192 #pragma FUNC_ALWAYS_INLINE
193 static inline float DSPLIB_horiAdd(c7x::float_vec vector)
194 {
195  float sum = 0;
196 
197  vector.lo() = vector.hi() + vector.lo();
198  vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
199 // #if __C7X_VEC_SIZE_BYTES__ == 64
200  vector.lo().lo().lo() = vector.lo().lo().hi() + vector.lo().lo().lo();
201  sum = (float) vector.s[0] + (float) vector.s[1];
202 
203  return sum;
204 }
205 
206 #pragma FUNC_ALWAYS_INLINE
207 static inline double DSPLIB_horiAdd(c7x::double_vec vector)
208 {
209  double sum = 0;
210 
211  vector.lo() = vector.hi() + vector.lo();
212 
213 // #if __C7X_VEC_SIZE_BYTES__ == 64
214  vector.lo().lo() = vector.lo().hi() + vector.lo().lo();
215 
216  sum = (double) vector.s[0] + (double) vector.s[1];
217 
218  return sum;
219 }
220 
221 /**********************************************************************/
222 /* Execute for datatypes float and double */
223 /**********************************************************************/
224 
225 // This is the generic implementation of exec_ci. It is used for float and double.
226 // Other datatypes have their own explicet implementation.
227 template <typename dataType>
229 DSPLIB_dotp_sqr_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
230 {
231  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
232  int32_t blockSize = pKerPrivArgs->blockSize;
233 
234  __SE_TEMPLATE_v1 se0Params;
235 
236  dataType *restrict pInLocal1 = (dataType *) pIn1;
237  dataType *restrict pInLocal2 = (dataType *) pIn2;
238  dataType *restrict pOutLocal = (dataType *) pOut;
239 
240 #if DSPLIB_DEBUGPRINT
241  printf("Enter DSPLIB_dotp_sqr_exec_ci\n");
242 #endif
243 
244  typedef typename c7x::make_full_vector<dataType>::type vec;
245  int32_t eleCount = c7x::element_count_of<vec>::value;
246 
247 #if DSPLIB_DEBUGPRINT
248  printf("Enter eleCount %d\n", eleCount);
249 #endif
250  uint8_t *pBlock = pKerPrivArgs->bufPblock;
251 
252  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
253 
254  // Input samples
255  __SE0_OPEN(pInLocal1, se0Params);
256  __SE1_OPEN(pInLocal2, se0Params);
257 
258 #if DSPLIB_DEBUGPRINT
259  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
260 #endif
261 
262  vec out_dotp;
263  vec out_ab_dotp;
264  vec out_cd_dotp;
265  vec out_ef_dotp;
266  vec out_gh_dotp;
267  out_dotp = (vec) 0.0;
268  out_ab_dotp = (vec) 0.0;
269  out_cd_dotp = (vec) 0.0;
270  out_ef_dotp = (vec) 0.0;
271  out_gh_dotp = (vec) 0.0;
272  dataType result_dotp;
273 
274  vec out_sqrAdd;
275  vec out_ab_sqrAdd;
276  vec out_cd_sqrAdd;
277  vec out_ef_sqrAdd;
278  vec out_gh_sqrAdd;
279  out_sqrAdd = (vec) 0.0;
280  out_ab_sqrAdd = (vec) 0.0;
281  out_cd_sqrAdd = (vec) 0.0;
282  out_ef_sqrAdd = (vec) 0.0;
283  out_gh_sqrAdd = (vec) 0.0;
284  dataType result_sqrAdd;
285  for (int32_t counter = 0; counter < blockSize; counter += eleCount * 4) {
286  vec a = c7x::strm_eng<0, vec>::get_adv();
287  vec b = c7x::strm_eng<1, vec>::get_adv();
288 
289  out_ab_dotp += a * b;
290 
291  out_ab_sqrAdd += b * b;
292 
293  vec c = c7x::strm_eng<0, vec>::get_adv();
294  vec d = c7x::strm_eng<1, vec>::get_adv();
295 
296  out_cd_dotp += c * d;
297 
298  out_cd_sqrAdd += d * d;
299 
300  vec e = c7x::strm_eng<0, vec>::get_adv();
301  vec f = c7x::strm_eng<1, vec>::get_adv();
302 
303  out_ef_dotp += e * f;
304 
305  out_ef_sqrAdd += f * f;
306 
307  vec g = c7x::strm_eng<0, vec>::get_adv();
308  vec h = c7x::strm_eng<1, vec>::get_adv();
309 
310  out_gh_dotp += g * h;
311 
312  out_gh_sqrAdd += h * h;
313  }
314 
315  out_dotp = out_ab_dotp + out_cd_dotp + out_ef_dotp + out_gh_dotp;
316 
317  out_sqrAdd = out_ab_sqrAdd + out_cd_sqrAdd + out_ef_sqrAdd + out_gh_sqrAdd;
318 
319  result_dotp = DSPLIB_horiAdd(out_dotp);
320  result_sqrAdd = DSPLIB_horiAdd(out_sqrAdd);
321 
322  *pOutLocal = result_dotp;
323  *++pOutLocal = result_sqrAdd;
324 
325  __SE0_CLOSE();
326  __SE1_CLOSE();
327 
328  return DSPLIB_SUCCESS;
329 }
330 
331 /**********************************************************************/
332 /* Execute for datatype int8_t */
333 /**********************************************************************/
334 // The input datatype of int8_t is promoted to int16_t using the streaming engine.
335 // The dotp_sqr is then implemented as int16_t.
336 // When completed, the output of int64_t is casted down to int32_t.
337 template <>
339  void *restrict pIn1,
340  void *restrict pIn2,
341  void *restrict pOut)
342 {
343  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
344  int32_t blockSize = pKerPrivArgs->blockSize;
345 
346  __SE_TEMPLATE_v1 se0Params;
347 
348  int16_t *restrict pInLocal1 = (int16_t *) pIn1;
349  int16_t *restrict pInLocal2 = (int16_t *) pIn2;
350  int32_t *restrict pOutLocal = (int32_t *) pOut;
351 
352 #if DSPLIB_DEBUGPRINT
353  printf("Enter DSPLIB_dotp_sqr_exec_ci\n");
354 #endif
355 
356  typedef typename c7x::make_full_vector<int16_t>::type vec;
357  int32_t eleCount = c7x::element_count_of<vec>::value;
358 
359 #if DSPLIB_DEBUGPRINT
360  printf("Enter eleCount %d\n", eleCount);
361 #endif
362  uint8_t *pBlock = pKerPrivArgs->bufPblock;
363 
364  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
365 
366  // Input samples
367  __SE0_OPEN(pInLocal1, se0Params);
368  __SE1_OPEN(pInLocal2, se0Params);
369 
370 #if DSPLIB_DEBUGPRINT
371  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
372 #endif
373 
374  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
375 
376  vec_out out_dotp;
377  out_dotp = (vec_out) 0;
378  int32_t result_dotp = 0;
379 
380  vec_out out_sqrAdd;
381  out_sqrAdd = (vec_out) 0;
382  int32_t result_sqrAdd = 0;
383  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
384  vec a = c7x::strm_eng<0, vec>::get_adv();
385  vec b = c7x::strm_eng<1, vec>::get_adv();
386 
387  out_dotp += __vdotp4hd_vvv(a, b);
388 
389  out_sqrAdd += __vdotp4hd_vvv(b, b);
390  }
391 
392  // use intrensic for horizontal add
393  // cast the int64_t output of __horizontal_add to int32_t.
394  result_dotp = (int32_t) __horizontal_add(out_dotp);
395  result_sqrAdd = (int32_t) __horizontal_add(out_sqrAdd);
396 
397  *pOutLocal = result_dotp;
398  *++pOutLocal = result_sqrAdd;
399 
400  __SE0_CLOSE();
401  __SE1_CLOSE();
402 
403  return DSPLIB_SUCCESS;
404 }
405 
406 /**********************************************************************/
407 /* Execute for datatype uint8_t */
408 /**********************************************************************/
409 
410 template <>
412  void *restrict pIn1,
413  void *restrict pIn2,
414  void *restrict pOut)
415 {
416  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
417  int32_t blockSize = pKerPrivArgs->blockSize;
418 
419  __SE_TEMPLATE_v1 se0Params;
420 
421  uint8_t *restrict pInLocal1 = (uint8_t *) pIn1;
422  uint8_t *restrict pInLocal2 = (uint8_t *) pIn2;
423  uint32_t *restrict pOutLocal = (uint32_t *) pOut;
424 
425 #if DSPLIB_DEBUGPRINT
426  printf("Enter DSPLIB_dotp_sqr_exec_ci\n");
427 #endif
428 
429  typedef typename c7x::make_full_vector<uint8_t>::type vec;
430  int32_t eleCount = c7x::element_count_of<vec>::value;
431 
432 #if DSPLIB_DEBUGPRINT
433  printf("Enter eleCount %d\n", eleCount);
434 #endif
435  uint8_t *pBlock = pKerPrivArgs->bufPblock;
436 
437  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
438 
439  // Input samples
440  __SE0_OPEN(pInLocal1, se0Params);
441  __SE1_OPEN(pInLocal2, se0Params);
442 
443 #if DSPLIB_DEBUGPRINT
444  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
445 #endif
446 
447  typedef typename c7x::make_full_vector<uint32_t>::type vec_out;
448 
449  vec_out out_dotp;
450  out_dotp = (vec_out) 0;
451  uint32_t result_dotp = 0;
452 
453  vec_out out_sqrAdd;
454  out_sqrAdd = (vec_out) 0;
455  uint32_t result_sqrAdd = 0;
456 
457  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
458  vec a = c7x::strm_eng<0, vec>::get_adv();
459  vec b = c7x::strm_eng<1, vec>::get_adv();
460 
461  out_dotp += __vdotp4ubw_vvv(a, b);
462 
463  out_sqrAdd += __vdotp4ubw_vvv(b, b);
464  }
465  // use intrensic for horizontal add
466  // cast the uint64_t output of __horizontal_add to uint32_t.
467 
468  result_dotp = (uint32_t) __horizontal_add(out_dotp);
469  result_sqrAdd = (uint32_t) __horizontal_add(out_sqrAdd);
470 
471  *pOutLocal = result_dotp;
472  *++pOutLocal = result_sqrAdd;
473 
474  __SE0_CLOSE();
475  __SE1_CLOSE();
476 
477  return DSPLIB_SUCCESS;
478 }
479 
480 /**********************************************************************/
481 /* Execute for datatype int16_t */
482 /**********************************************************************/
483 
484 template <>
486  void *restrict pIn1,
487  void *restrict pIn2,
488  void *restrict pOut)
489 {
490  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
491  int32_t blockSize = pKerPrivArgs->blockSize;
492 
493  __SE_TEMPLATE_v1 se0Params;
494 
495  int16_t *restrict pInLocal1 = (int16_t *) pIn1;
496  int16_t *restrict pInLocal2 = (int16_t *) pIn2;
497  int64_t *restrict pOutLocal = (int64_t *) pOut;
498 
499 #if DSPLIB_DEBUGPRINT
500  printf("Enter DSPLIB_dotp_sqr_exec_ci\n");
501 #endif
502 
503  typedef typename c7x::make_full_vector<int16_t>::type vec; // short16
504  int32_t eleCount = c7x::element_count_of<vec>::value;
505 
506 #if DSPLIB_DEBUGPRINT
507  printf("Enter eleCount %d\n", eleCount);
508 #endif
509  uint8_t *pBlock = pKerPrivArgs->bufPblock;
510 
511  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
512 
513  // Input samples
514  __SE0_OPEN(pInLocal1, se0Params);
515  __SE1_OPEN(pInLocal2, se0Params);
516 
517 #if DSPLIB_DEBUGPRINT
518  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
519 #endif
520 
521  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
522 
523  vec_out out_dotp;
524  out_dotp = (vec_out) 0;
525  int64_t result_dotp = 0;
526 
527  vec_out out_sqrAdd;
528  out_sqrAdd = (vec_out) 0;
529  int64_t result_sqrAdd = 0;
530  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
531  vec a = c7x::strm_eng<0, vec>::get_adv();
532  vec b = c7x::strm_eng<1, vec>::get_adv();
533 
534  out_dotp += __vdotp4hd_vvv(a, b);
535 
536  out_sqrAdd += __vdotp4hd_vvv(b, b);
537  }
538 
539  // use intrensic for horizontal add
540  result_dotp = __horizontal_add(out_dotp);
541  result_sqrAdd = __horizontal_add(out_sqrAdd);
542 
543  *pOutLocal = result_dotp;
544  *++pOutLocal = result_sqrAdd;
545 
546  __SE0_CLOSE();
547  __SE1_CLOSE();
548 
549  return DSPLIB_SUCCESS;
550 }
551 
552 /**********************************************************************/
553 /* Execute for datatype uint16_t */
554 /**********************************************************************/
555 
556 template <>
558  void *restrict pIn1,
559  void *restrict pIn2,
560  void *restrict pOut)
561 {
562  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
563  int32_t blockSize = pKerPrivArgs->blockSize;
564 
565  __SE_TEMPLATE_v1 se0Params;
566 
567  uint16_t *restrict pInLocal1 = (uint16_t *) pIn1;
568  uint16_t *restrict pInLocal2 = (uint16_t *) pIn2;
569  uint64_t *restrict pOutLocal = (uint64_t *) pOut;
570 
571 #if DSPLIB_DEBUGPRINT
572  printf("Enter DSPLIB_dotp_sqr_exec_ci\n");
573 #endif
574 
575  typedef typename c7x::make_full_vector<uint16_t>::type vec;
576  int32_t eleCount = c7x::element_count_of<vec>::value;
577 
578 #if DSPLIB_DEBUGPRINT
579  printf("Enter eleCount %d\n", eleCount);
580 #endif
581  uint8_t *pBlock = pKerPrivArgs->bufPblock;
582 
583  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
584 
585  // Input samples
586  __SE0_OPEN(pInLocal1, se0Params);
587  __SE1_OPEN(pInLocal2, se0Params);
588 
589 #if DSPLIB_DEBUGPRINT
590  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
591 #endif
592 
593  typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
594 
595  vec_out out_dotp;
596  out_dotp = (vec_out) 0;
597  uint64_t result_dotp = 0;
598 
599  vec_out out_sqrAdd;
600  out_sqrAdd = (vec_out) 0;
601  uint64_t result_sqrAdd = 0;
602  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
603  vec a = c7x::strm_eng<0, vec>::get_adv();
604  vec b = c7x::strm_eng<1, vec>::get_adv();
605 
606  out_dotp += __vdotp4uhd_vvv(a, b);
607 
608  out_sqrAdd += __vdotp4uhd_vvv(b, b);
609  }
610 
611  // use intrensic for horizontal add
612  result_dotp = __horizontal_add(out_dotp);
613  result_sqrAdd = __horizontal_add(out_sqrAdd);
614 
615  *pOutLocal = result_dotp;
616  *++pOutLocal = result_sqrAdd;
617 
618  __SE0_CLOSE();
619  __SE1_CLOSE();
620 
621  return DSPLIB_SUCCESS;
622 }
623 
624 /**********************************************************************/
625 /* Execute for datatype int32_t */
626 /**********************************************************************/
627 
628 template <>
630  void *restrict pIn1,
631  void *restrict pIn2,
632  void *restrict pOut)
633 {
634  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
635  int32_t blockSize = pKerPrivArgs->blockSize;
636 
637  __SE_TEMPLATE_v1 se0Params;
638 
639  int32_t *restrict pInLocal1 = (int32_t *) pIn1;
640  int32_t *restrict pInLocal2 = (int32_t *) pIn2;
641  int64_t *restrict pOutLocal = (int64_t *) pOut;
642 
643 #if DSPLIB_DEBUGPRINT
644  printf("Enter DSPLIB_dotp_sqr_exec_ci\n");
645 #endif
646 
647  typedef typename c7x::make_full_vector<int32_t>::type vec; // short16
648  int32_t eleCount = c7x::element_count_of<vec>::value;
649 
650 #if DSPLIB_DEBUGPRINT
651  printf("Enter eleCount %d\n", eleCount);
652 #endif
653  uint8_t *pBlock = pKerPrivArgs->bufPblock;
654 
655  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
656 
657  // Input samples
658  __SE0_OPEN(pInLocal1, se0Params);
659  __SE1_OPEN(pInLocal2, se0Params);
660 
661 #if DSPLIB_DEBUGPRINT
662  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
663 #endif
664 
665  typedef typename c7x::make_full_vector<int64_t>::type vec_out;
666 
667  vec_out out_dotp;
668  vec_out out0_dotp;
669  vec_out out1_dotp;
670  out_dotp = (vec_out) 0;
671  int64_t result_dotp = 0;
672 
673  vec_out out_sqrAdd;
674  vec_out out0_sqrAdd;
675  vec_out out1_sqrAdd;
676  out_sqrAdd = (vec_out) 0;
677  int64_t result_sqrAdd = 0;
678 
679  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
680  vec a = c7x::strm_eng<0, vec>::get_adv();
681  vec b = c7x::strm_eng<1, vec>::get_adv();
682 
683  __vmpywd_vvw(a, b, out0_dotp, out1_dotp);
684  out_dotp += (out0_dotp + out1_dotp);
685 
686  __vmpywd_vvw(b, b, out0_sqrAdd, out1_sqrAdd);
687  out_sqrAdd += (out0_sqrAdd + out1_sqrAdd);
688  }
689 
690  // use intrensic for horizontal add
691  result_dotp = __horizontal_add(out_dotp);
692  result_sqrAdd = __horizontal_add(out_sqrAdd);
693 
694  // print resutls
695  // printf("CI: result_dotp = %ld CI: result_sqrAdd = %ld", result_dotp, result_sqrAdd);
696 
697  *pOutLocal = result_dotp;
698  *++pOutLocal = result_sqrAdd;
699 
700  __SE0_CLOSE();
701  __SE1_CLOSE();
702 
703  return DSPLIB_SUCCESS;
704 }
705 
706 /**********************************************************************/
707 /* Execute for datatype uint32_t */
708 /**********************************************************************/
709 
710 template <>
712  void *restrict pIn1,
713  void *restrict pIn2,
714  void *restrict pOut)
715 {
716  DSPLIB_dotp_sqr_PrivArgs *pKerPrivArgs = (DSPLIB_dotp_sqr_PrivArgs *) handle;
717  int32_t blockSize = pKerPrivArgs->blockSize;
718 
719  __SE_TEMPLATE_v1 se0Params;
720 
721  uint32_t *restrict pInLocal1 = (uint32_t *) pIn1;
722  uint32_t *restrict pInLocal2 = (uint32_t *) pIn2;
723  uint64_t *restrict pOutLocal = (uint64_t *) pOut;
724 
725 #if DSPLIB_DEBUGPRINT
726  printf("Enter DSPLIB_dotp_sqr_exec_ci\n");
727 #endif
728 
729  typedef typename c7x::make_full_vector<uint32_t>::type vec; // short16
730  int32_t eleCount = c7x::element_count_of<vec>::value;
731 
732 #if DSPLIB_DEBUGPRINT
733  printf("Enter eleCount %d\n", eleCount);
734 #endif
735  uint8_t *pBlock = pKerPrivArgs->bufPblock;
736 
737  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
738 
739  // Input samples
740  __SE0_OPEN(pInLocal1, se0Params);
741  __SE1_OPEN(pInLocal2, se0Params);
742 
743 #if DSPLIB_DEBUGPRINT
744  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
745 #endif
746 
747  typedef typename c7x::make_full_vector<uint64_t>::type vec_out;
748 
749  vec_out out_dotp;
750  vec_out out0_dotp;
751  vec_out out1_dotp;
752  out_dotp = (vec_out) 0;
753  uint64_t result_dotp = 0;
754 
755  vec_out out_sqrAdd;
756  vec_out out0_sqrAdd;
757  vec_out out1_sqrAdd;
758  out_sqrAdd = (vec_out) 0;
759  uint64_t result_sqrAdd = 0;
760  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
761  vec a = c7x::strm_eng<0, vec>::get_adv();
762  vec b = c7x::strm_eng<1, vec>::get_adv();
763 
764  __vmpyuwd_vvw(a, b, out0_dotp, out1_dotp);
765  out_dotp += (out0_dotp + out1_dotp);
766 
767  __vmpyuwd_vvw(b, b, out0_sqrAdd, out1_sqrAdd);
768  out_sqrAdd += (out0_sqrAdd + out1_sqrAdd);
769  }
770 
771  // use intrensic for horizontal add
772  result_dotp = __horizontal_add(out_dotp);
773  result_sqrAdd = __horizontal_add(out_sqrAdd);
774 
775  *pOutLocal = result_dotp;
776  *++pOutLocal = result_sqrAdd;
777 
778  __SE0_CLOSE();
779  __SE1_CLOSE();
780 
781  return DSPLIB_SUCCESS;
782 }
783 
785  void *restrict pIn1,
786  void *restrict pIn2,
787  void *restrict pOut);
788 
790  void *restrict pIn1,
791  void *restrict pIn2,
792  void *restrict pOut);
793 
795  void *restrict pIn1,
796  void *restrict pIn2,
797  void *restrict pOut);
798 
800  void *restrict pIn1,
801  void *restrict pIn2,
802  void *restrict pOut);
803 
805  void *restrict pIn1,
806  void *restrict pIn2,
807  void *restrict pOut);
808 
810  void *restrict pIn1,
811  void *restrict pIn2,
812  void *restrict pOut);
813 
815  void *restrict pIn1,
816  void *restrict pIn2,
817  void *restrict pOut);
818 
820  void *restrict pIn1,
821  void *restrict pIn2,
822  void *restrict pOut);
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
#define SE_SE0_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
static float DSPLIB_horiAdd(c7x::float_vec vector)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
DSPLIB_STATUS DSPLIB_dotp_sqr_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_dotp_sqr_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_dotp_sqr_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_dotp_sqr.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_DOTP_SQR_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t blockSize
Size of input buffer for different batches DSPLIB_dotp_sqr_init that will be retrieved and used by DS...