DSPLIB User Guide
DSPLIB_w_vec_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /*******************************************************************************
38  *
39  * INCLUDES
40  *
41  ******************************************************************************/
42 
43 #include "../common/c71/DSPLIB_inlines.h"
44 #include "DSPLIB_w_vec_priv.h"
45 #include <float.h>
46 
47 /*******************************************************************************
48  *
49  * DEFINES
50  *
51  ******************************************************************************/
52 
53 #define SE_PARAM_BASE (0x0000)
54 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
55 #define SE_SE1_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_SIZE)
56 #define SE_SA0_PARAM_OFFSET (SE_SE1_PARAM_OFFSET + SE_PARAM_SIZE)
57 
58 template <typename dataType>
60  const DSPLIB_bufParams1D_t *bufParamsIn,
61  const DSPLIB_bufParams1D_t *bufParamsOut,
62  const DSPLIB_w_vec_InitArgs *pKerInitArgs)
63 {
65  __SE_TEMPLATE_v1 se0Params;
66  __SE_TEMPLATE_v1 se1Params;
67  __SA_TEMPLATE_v1 sa0Params;
68 
69  __SE_ELETYPE SE_ELETYPE;
70  __SE_VECLEN SE_VECLEN;
71  __SA_VECLEN SA_VECLEN;
72 
73  DSPLIB_w_vec_PrivArgs *pKerPrivArgs = (DSPLIB_w_vec_PrivArgs *) handle;
74 
75  uint8_t *pBlock = pKerPrivArgs->bufPblock;
76  uint32_t blockSize = pKerPrivArgs->blockSize;
77 
78  typedef typename c7x::make_full_vector<dataType>::type vec;
79  int32_t eleCount = c7x::element_count_of<vec>::value;
80  SE_VECLEN = c7x::se_veclen<vec>::value;
81  SA_VECLEN = c7x::sa_veclen<vec>::value;
82  SE_ELETYPE = c7x::se_eletype<vec>::value;
83 
84  // determine how many SIMD Widths SE1 has to fetch
85  uint32_t icnt2Size = blockSize / eleCount;
86  uint32_t remBlocksSize = blockSize % eleCount;
87  if (remBlocksSize) {
88  icnt2Size++;
89  }
90 
91 #if DSPLIB_DEBUGPRINT
92  printf("Enter eleCount %d\n", eleCount);
93 #endif
94 
95  // determine scalar of vector style weight input
96  DSPLIB_w_vec_weightStyle weight_flag = pKerPrivArgs->initArgs.weightStyle;
97 
98  // if scalar (weight flag = 0) SE is 1D
99  if (weight_flag == DSPLIB_SCALAR) {
100 
101  /**********************************************************************/
102  /* Prepare streaming engine 0,1 to fetch the input1 and input 2 */
103  /**********************************************************************/
104 
105  // se0 parameters
106  se0Params = __gen_SE_TEMPLATE_v1();
107 
108  se0Params.ICNT0 = blockSize;
109  se0Params.ELETYPE = SE_ELETYPE;
110  se0Params.VECLEN = SE_VECLEN;
111  se0Params.DIMFMT = __SE_DIMFMT_1D;
112 
113  // se1 parameters same as se0
114  se1Params = __gen_SE_TEMPLATE_v1();
115 
116  se1Params.ICNT0 = blockSize;
117  se1Params.ELETYPE = SE_ELETYPE;
118  se1Params.VECLEN = SE_VECLEN;
119  se1Params.DIMFMT = __SE_DIMFMT_1D;
120  }
121 
122  // if vector (weight flag = 1) SE is 2D
123  else {
124 
125  /**********************************************************************/
126  /* Prepare streaming engine 0 to fetch the weight input */
127  /**********************************************************************/
128  se0Params = __gen_SE_TEMPLATE_v1();
129 
130  se0Params.ICNT0 = blockSize;
131  se0Params.ELETYPE = SE_ELETYPE;
132  se0Params.VECLEN = SE_VECLEN;
133  se0Params.DIMFMT = __SE_DIMFMT_1D;
134 
135  /**********************************************************************/
136  /* Prepare streaming engine 1 to fetch input1 and input2 */
137  /**********************************************************************/
138  se1Params = __gen_SE_TEMPLATE_v1();
139 
140  se1Params.DIMFMT = __SE_DIMFMT_3D;
141  se1Params.ICNT0 = eleCount;
142  se1Params.ELETYPE = SE_ELETYPE;
143  se1Params.VECLEN = SE_VECLEN;
144  se1Params.ICNT1 = 2;
145  se1Params.ICNT2 = icnt2Size;
146  // se1Params.DIM1: set to ADDR(x2) - ADDR(x1) in exec
147  se1Params.DIM2 = eleCount;
148  se1Params.DECDIM1 = __SE_DECDIM_DIM2;
149  se1Params.DECDIM1_WIDTH = blockSize;
150  }
151 
152  /**********************************************************************/
153  /* Prepare SA template to store output */
154  /**********************************************************************/
155  sa0Params = __gen_SA_TEMPLATE_v1();
156 
157  sa0Params.ICNT0 = blockSize;
158  sa0Params.DIM1 = blockSize;
159  sa0Params.VECLEN = SA_VECLEN;
160  sa0Params.DIMFMT = __SA_DIMFMT_1D;
161 
162  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
163  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE1_PARAM_OFFSET) = se1Params;
164  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET) = sa0Params;
165 
166  return status;
167 }
168 
170  const DSPLIB_bufParams1D_t *bufParamsIn,
171  const DSPLIB_bufParams1D_t *bufParamsOut,
172  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
173 
175  const DSPLIB_bufParams1D_t *bufParamsIn,
176  const DSPLIB_bufParams1D_t *bufParamsOut,
177  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
178 
180  const DSPLIB_bufParams1D_t *bufParamsIn,
181  const DSPLIB_bufParams1D_t *bufParamsOut,
182  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
183 
185  const DSPLIB_bufParams1D_t *bufParamsIn,
186  const DSPLIB_bufParams1D_t *bufParamsOut,
187  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
188 
190  const DSPLIB_bufParams1D_t *bufParamsIn,
191  const DSPLIB_bufParams1D_t *bufParamsOut,
192  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
193 
195  const DSPLIB_bufParams1D_t *bufParamsIn,
196  const DSPLIB_bufParams1D_t *bufParamsOut,
197  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
198 
200  const DSPLIB_bufParams1D_t *bufParamsIn,
201  const DSPLIB_bufParams1D_t *bufParamsOut,
202  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
203 
205  const DSPLIB_bufParams1D_t *bufParamsIn,
206  const DSPLIB_bufParams1D_t *bufParamsOut,
207  const DSPLIB_w_vec_InitArgs *pKerInitArgs);
208 
209 // Execution Function of W_VEC Kernel (both scalar and vector weight input)
210 template <typename dataType>
212  void *restrict pIn1,
213  void *restrict pIn2,
214  void *restrict pM,
215  void *restrict pOut)
216 {
217  DSPLIB_STATUS status = DSPLIB_SUCCESS;
218  DSPLIB_w_vec_PrivArgs *pKerPrivArgs = (DSPLIB_w_vec_PrivArgs *) handle;
219  int32_t blockSize = pKerPrivArgs->blockSize;
220 
221  __SE_TEMPLATE_v1 se0Params;
222  __SE_TEMPLATE_v1 se1Params;
223  __SA_TEMPLATE_v1 sa0Params;
224 
225  // dataType *restrict pOutp = (dataType *) pOut;
226  dataType *restrict pInLocal1 = (dataType *) pIn1;
227  dataType *restrict pInLocal2 = (dataType *) pIn2;
228  dataType *restrict pMLocal = (dataType *) pM;
229  dataType *restrict pOutLocal = (dataType *) pOut;
230 
231 #if DSPLIB_DEBUGPRINT
232  printf("Enter DSPLIB_w_vec_exec_ci\n");
233 #endif
234 
235  typedef typename c7x::make_full_vector<dataType>::type vec;
236  int32_t eleCount = c7x::element_count_of<vec>::value;
237 
238 #if DSPLIB_DEBUGPRINT
239  printf("Enter eleCount %d\n", eleCount);
240 #endif
241  uint8_t *pBlock = pKerPrivArgs->bufPblock;
242  DSPLIB_w_vec_weightStyle weightFlag = pKerPrivArgs->initArgs.weightStyle;
243 
244  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
245  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE1_PARAM_OFFSET);
246  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET);
247 
248  // set se1 param DIM1 offset
249  se1Params.DIM1 = ((dataType *) pM) - ((dataType *) pIn1);
250 
251  __SA0_OPEN(sa0Params);
252 
253  if (weightFlag == DSPLIB_SCALAR) {
254  // Input samples
255  __SE0_OPEN(pInLocal1, se0Params);
256  __SE1_OPEN(pInLocal2, se0Params);
257 
258  // Output samples
259  __SA0_OPEN(sa0Params);
260 
261 #if DSPLIB_DEBUGPRINT
262  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
263 #endif
264 
265  vec out;
266 
267  // vec m = (vec)(*pMLocal);
268 
269  // vdup can't recognize float???
270  // vec m = __duplicate(*pMlocal);
271  vec m = __vload_dup(pMLocal);
272 
273  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
274  vec a = c7x::strm_eng<0, vec>::get_adv();
275  vec b = c7x::strm_eng<1, vec>::get_adv();
276 
277  out = a * m + b;
278 
279  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
280  vec *VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
281 
282  // out.print();
283 
284  __vstore_pred(tmp, VB1, out);
285  }
286  __SE0_CLOSE();
287  __SE1_CLOSE();
288  __SA0_CLOSE();
289 
290  status = DSPLIB_SUCCESS;
291  }
292 
293  else {
294 
295  // Input samples
296  // need to use se1 for fetching both input1 and input2 vector
297  __SE0_OPEN(pInLocal2, se0Params);
298  __SE1_OPEN(pInLocal1, se1Params);
299 
300  // Output samples
301  __SA0_OPEN(sa0Params);
302 
303 #if DSPLIB_DEBUGPRINT
304  printf("DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
305 #endif
306 
307  vec out;
308  for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
309  vec a = c7x::strm_eng<1, vec>::get_adv();
310  vec w = c7x::strm_eng<1, vec>::get_adv();
311  vec b = c7x::strm_eng<0, vec>::get_adv();
312 
313  out = a * w + b;
314 
315  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
316  vec *VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
317 
318  __vstore_pred(tmp, VB1, out);
319  }
320  __SE0_CLOSE();
321  __SE1_CLOSE();
322  __SA0_CLOSE();
323 
324  status = DSPLIB_SUCCESS;
325  }
326  return status;
327 }
328 
330  void *restrict pIn1,
331  void *restrict pIn2,
332  void *restrict pM,
333  void *restrict pOut);
334 
336  void *restrict pIn1,
337  void *restrict pIn2,
338  void *restrict pM,
339  void *restrict pOut);
340 
342  void *restrict pIn1,
343  void *restrict pIn2,
344  void *restrict pM,
345  void *restrict pOut);
346 
348  void *restrict pIn1,
349  void *restrict pIn2,
350  void *restrict pM,
351  void *restrict pOut);
352 
354  void *restrict pIn1,
355  void *restrict pIn2,
356  void *restrict pM,
357  void *restrict pOut);
358 
360  void *restrict pIn1,
361  void *restrict pIn2,
362  void *restrict pM,
363  void *restrict pOut);
364 
366  void *restrict pIn1,
367  void *restrict pIn2,
368  void *restrict pM,
369  void *restrict pOut);
370 
372  void *restrict pIn1,
373  void *restrict pIn2,
374  void *restrict pM,
375  void *restrict pOut);
DSPLIB_STATUS DSPLIB_w_vec_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
#define SE_SE0_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
#define SE_SE1_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_w_vec_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
DSPLIB_STATUS DSPLIB_w_vec_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_w_vec_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_w_vec_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn1, void *restrict pIn2, void *restrict pM, void *restrict pOut)
#define SE_SA0_PARAM_OFFSET
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_w_vec.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
DSPLIB_w_vec_weightStyle
Enumeration for w_vec flag pertaining to scalar or vector input weight.
Definition: DSPLIB_w_vec.h:107
@ DSPLIB_SCALAR
Input Weight is a Scalar.
Definition: DSPLIB_w_vec.h:109
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Definition: DSPLIB_w_vec.h:117
DSPLIB_w_vec_weightStyle weightStyle
Weight Input Style
Definition: DSPLIB_w_vec.h:123
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_W_VEC_IXX_IXX_OXX_PBLOCK_SIZE]
DSPLIB_w_vec_InitArgs initArgs
int32_t blockSize
Size of input buffer for different batches DSPLIB_w_vec_init that will be retrieved and used by DSPLI...