DSPLIB User Guide
DSPLIB_matMul_N_unroll_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
2 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 /*******************************************************************************
35  *
36  * INCLUDES
37  *
38  ******************************************************************************/
39 
40 #include "../common/c71/DSPLIB_inlines.h"
41 #include "DSPLIB_matMul_priv.h"
42 #include <float.h>
43 
44 /*******************************************************************************
45  *
46  * DEFINES
47  *
48  ******************************************************************************/
49 
50 #define SE_PARAM_BASE (0x0000)
51 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
52 #define SE_SE1_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_BASE)
53 #define SE_SA0_PARAM_OFFSET (SE_SE1_PARAM_OFFSET + SE_PARAM_SIZE)
54 #define SE_SA1_PARAM_OFFSET (SE_SA0_PARAM_OFFSET + SE_PARAM_SIZE)
55 
56 #define DSPLIB_MATMUL_UNROLL_FACTOR (16)
57 #define DSPLIB_MATMUL_SE_UNROLL_FACTOR (8)
58 
59 template <typename dataType>
61  const DSPLIB_bufParams2D_t *bufParamsIn0,
62  const DSPLIB_bufParams2D_t *bufParamsIn1,
63  const DSPLIB_bufParams2D_t *bufParamsOut,
64  const DSPLIB_matMul_InitArgs *pKerInitArgs)
65 {
67  __SE_TEMPLATE_v1 se0Params;
68  __SA_TEMPLATE_v1 sa0Params;
69  __SA_TEMPLATE_v1 sa1Params;
70 
71  __SE_ELETYPE SE_ELETYPE;
72  __SE_VECLEN SE_VECLEN;
73  __SA_VECLEN SA_VECLEN;
74 
75  DSPLIB_matMul_PrivArgs *pKerPrivArgs = (DSPLIB_matMul_PrivArgs *) handle;
76 
77  uint8_t *pBlock = pKerPrivArgs->bufPblock;
78 
79  int32_t M = pKerPrivArgs->M;
80  int32_t K = pKerPrivArgs->K;
81  int32_t N = pKerPrivArgs->N;
82  int32_t strideIn0 = pKerPrivArgs->strideIn0Elements;
83  int32_t strideIn1 = pKerPrivArgs->strideIn1Elements;
84  int32_t strideOut = pKerPrivArgs->strideOutElements;
85 
86  typedef typename c7x::make_full_vector<dataType>::type vec;
87 
88  int32_t elementCount = c7x::element_count_of<vec>::value;
89  SE_VECLEN = c7x::se_veclen<vec>::value;
90  SA_VECLEN = c7x::sa_veclen<vec>::value;
91  SE_ELETYPE = c7x::se_eletype<vec>::value;
92 
93  int32_t NBlocks = ((N + (DSPLIB_MATMUL_UNROLL_FACTOR - 1)) / ((DSPLIB_MATMUL_UNROLL_FACTOR) *elementCount));
94  pKerPrivArgs->NBlocks = NBlocks;
95 
96  /**********************************************************************/
97  /* Prepare SA template to fetch A matrix */
98  /**********************************************************************/
99 
100  sa0Params = __gen_SA_TEMPLATE_v1();
101  sa0Params.VECLEN = SA_VECLEN;
102  sa0Params.DIMFMT = __SA_DIMFMT_4D;
103 
104  sa0Params.ICNT0 = 1;
105  sa0Params.ICNT1 = K;
106  sa0Params.DIM1 = 1;
107  sa0Params.ICNT2 = NBlocks;
108  sa0Params.DIM2 = 0;
109  sa0Params.ICNT3 = M;
110  sa0Params.DIM3 = strideIn0;
111 
112  /**********************************************************************/
113  /* Prepare streaming engine 0 to fetch B matrix */
114  /**********************************************************************/
115 
116  se0Params = __gen_SE_TEMPLATE_v1();
117  se0Params.ELETYPE = SE_ELETYPE;
118  se0Params.VECLEN = SE_VECLEN;
119  se0Params.DIMFMT = __SE_DIMFMT_5D;
120 
121  se0Params.ICNT0 = elementCount;
122  se0Params.ICNT1 = DSPLIB_MATMUL_SE_UNROLL_FACTOR;
123  se0Params.DIM1 = (int32_t) ((uint32_t) elementCount << (uint32_t) 1);
124  se0Params.ICNT2 = K;
125  se0Params.DIM2 = strideIn1;
126  se0Params.ICNT3 = NBlocks;
127  se0Params.DIM3 = elementCount * DSPLIB_MATMUL_UNROLL_FACTOR;
128  se0Params.ICNT4 = M;
129  se0Params.DIM4 = 0;
130 
131  /**********************************************************************/
132  /* Prepare SA template to fetch A matrix */
133  /**********************************************************************/
134 
135  sa1Params = __gen_SA_TEMPLATE_v1();
136  sa1Params.VECLEN = SA_VECLEN;
137  sa1Params.DIMFMT = __SA_DIMFMT_4D;
138 
139  sa1Params.ICNT0 = elementCount;
140  sa1Params.ICNT1 = DSPLIB_MATMUL_UNROLL_FACTOR;
141  sa1Params.DIM1 = elementCount;
142  sa1Params.ICNT2 = NBlocks;
143  sa1Params.DIM2 = elementCount * DSPLIB_MATMUL_UNROLL_FACTOR;
144  sa1Params.ICNT3 = M;
145  sa1Params.DIM3 = strideOut;
146 
147  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
148 
149  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET) = sa0Params;
150  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA1_PARAM_OFFSET) = sa1Params;
151 
152  return status;
153 }
154 
156  const DSPLIB_bufParams2D_t *bufParamsIn0,
157  const DSPLIB_bufParams2D_t *bufParamsIn1,
158  const DSPLIB_bufParams2D_t *bufParamsOut,
159  const DSPLIB_matMul_InitArgs *pKerInitArgs);
161  const DSPLIB_bufParams2D_t *bufParamsIn0,
162  const DSPLIB_bufParams2D_t *bufParamsIn1,
163  const DSPLIB_bufParams2D_t *bufParamsOut,
164  const DSPLIB_matMul_InitArgs *pKerInitArgs);
165 template <typename T, typename vec> static inline void writeOutSA1(__vpred tmp, vec *addr, T pOut, vec out)
166 {
167  /* printf("\nOut vector below:\n"); */
168  DSPLIB_debugPrintVector(out);
169  tmp = c7x::strm_agen<1, vec>::get_vpred();
170  addr = c7x::strm_agen<1, vec>::get_adv(pOut);
171  __vstore_pred(tmp, addr, out);
172 }
173 
174 template <typename dataType>
176  void *restrict pIn0,
177  void *restrict pIn1,
178  void *restrict pOut)
179 {
180  DSPLIB_matMul_PrivArgs *pKerPrivArgs = (DSPLIB_matMul_PrivArgs *) handle;
181 
182  int32_t M = pKerPrivArgs->M;
183  int32_t K = pKerPrivArgs->K;
184  int32_t NBlocks = pKerPrivArgs->NBlocks;
185 
186  __SE_TEMPLATE_v1 se0Params;
187  __SE_TEMPLATE_v1 se1Params;
188  __SA_TEMPLATE_v1 sa0Params;
189  __SA_TEMPLATE_v1 sa1Params;
190 
191 #if DSPLIB_DEBUGPRINT
192  printf("Enter DSPLIB_matMul_exec_ci\n");
193 #endif
194 
195  typedef typename c7x::make_full_vector<dataType>::type vec;
196 
197  int32_t elementCount = c7x::element_count_of<vec>::value;
198 
199  uint8_t *pBlock = pKerPrivArgs->bufPblock;
200 
201  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
202  se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE1_PARAM_OFFSET);
203 
204  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET);
205  sa1Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA1_PARAM_OFFSET);
206 
207  // Input samples
208  __SE0_OPEN(pIn1, se0Params);
209  __SE1_OPEN(((dataType *) pIn1 + elementCount), se1Params);
210 
211  // Output samples
212  __SA0_OPEN(sa0Params);
213  __SA1_OPEN(sa1Params);
214 
215  /* vec c; */
216  vec a;
217  vec b;
218  vec r00, r01, r03, r02, r04, r05, r06, r07;
219  vec r08, r09, r0a, r0b, r0c, r0d, r0e, r0f;
220 
221  __vpred tmp;
222  vec *addr;
223 
224  /* #pragma MUST_ITERATE(2, , 2) */
225  for (int32_t n = 0; n < M * NBlocks; n++) {
226  /* for (int32_t n = 0; n < 1; n++) { */
227 
228  r00 = (vec) 0;
229  r01 = (vec) 0;
230  r02 = (vec) 0;
231  r03 = (vec) 0;
232  r04 = (vec) 0;
233  r05 = (vec) 0;
234  r06 = (vec) 0;
235  r07 = (vec) 0;
236  r08 = (vec) 0;
237  r09 = (vec) 0;
238  r0a = (vec) 0;
239  r0b = (vec) 0;
240  r0c = (vec) 0;
241  r0d = (vec) 0;
242  r0e = (vec) 0;
243  r0f = (vec) 0;
244 
245  for (int32_t k = 0; k < K; k++) {
246  /* printf("Iteration count m = %d, nBlocks= %d, = %d\n", m, n, k); */
247  dataType *addrA = (c7x::strm_agen<0, dataType>::get_adv(pIn0));
248  a = __vload_dup(addrA);
249 
250  /* printf("Vector a below:\n"); */
251  DSPLIB_debugPrintVector(a);
252 
253  b = c7x::strm_eng<0, vec>::get_adv();
254  r00 += a * b;
255 
256  b = c7x::strm_eng<1, vec>::get_adv();
257  r01 += a * b;
258 
259  b = c7x::strm_eng<0, vec>::get_adv();
260  /* printf("Vector b below:\n"); */
261  DSPLIB_debugPrintVector(b);
262  r02 += a * b;
263  /* printf("Vector r02 below:\n"); */
264  DSPLIB_debugPrintVector(r02);
265 
266  b = c7x::strm_eng<1, vec>::get_adv();
267  r03 += a * b;
268 
269  b = c7x::strm_eng<0, vec>::get_adv();
270  r04 += a * b;
271 
272  b = c7x::strm_eng<1, vec>::get_adv();
273  r05 += a * b;
274 
275  b = c7x::strm_eng<0, vec>::get_adv();
276  r06 += a * b;
277 
278  b = c7x::strm_eng<1, vec>::get_adv();
279  r07 += a * b;
280 
281  b = c7x::strm_eng<0, vec>::get_adv();
282  r08 += a * b;
283 
284  b = c7x::strm_eng<1, vec>::get_adv();
285  r09 += a * b;
286 
287  b = c7x::strm_eng<0, vec>::get_adv();
288  r0a += a * b;
289 
290  b = c7x::strm_eng<1, vec>::get_adv();
291  r0b += a * b;
292 
293  b = c7x::strm_eng<0, vec>::get_adv();
294  r0c += a * b;
295 
296  b = c7x::strm_eng<1, vec>::get_adv();
297  r0d += a * b;
298 
299  b = c7x::strm_eng<0, vec>::get_adv();
300  r0e += a * b;
301 
302  b = c7x::strm_eng<1, vec>::get_adv();
303  r0f += a * b;
304  }
305 
306  writeOutSA1(tmp, addr, pOut, r00);
307  writeOutSA1(tmp, addr, pOut, r01);
308  writeOutSA1(tmp, addr, pOut, r02);
309  writeOutSA1(tmp, addr, pOut, r03);
310  writeOutSA1(tmp, addr, pOut, r04);
311  writeOutSA1(tmp, addr, pOut, r05);
312  writeOutSA1(tmp, addr, pOut, r06);
313  writeOutSA1(tmp, addr, pOut, r07);
314  writeOutSA1(tmp, addr, pOut, r08);
315  writeOutSA1(tmp, addr, pOut, r09);
316  writeOutSA1(tmp, addr, pOut, r0a);
317  writeOutSA1(tmp, addr, pOut, r0b);
318  writeOutSA1(tmp, addr, pOut, r0c);
319  writeOutSA1(tmp, addr, pOut, r0d);
320  writeOutSA1(tmp, addr, pOut, r0e);
321  writeOutSA1(tmp, addr, pOut, r0f);
322  }
323 
324  __SE0_CLOSE();
325  __SE1_CLOSE();
326  __SA0_CLOSE();
327 
328  return DSPLIB_SUCCESS;
329 }
330 
332  void *restrict pIn0,
333  void *restrict pIn1,
334  void *restrict pOut);
336  void *restrict pIn0,
337  void *restrict pIn1,
338  void *restrict pOut);
#define DSPLIB_MATMUL_SE_UNROLL_FACTOR
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn0, void *restrict pIn1, void *restrict pOut)
#define SE_SE0_PARAM_OFFSET
#define SE_SA1_PARAM_OFFSET
static void writeOutSA1(__vpred tmp, vec *addr, T pOut, vec out)
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn0, void *restrict pIn1, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matMul_N_unroll_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn0, const DSPLIB_bufParams2D_t *bufParamsIn1, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matMul_InitArgs *pKerInitArgs)
#define SE_SE1_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn0, const DSPLIB_bufParams2D_t *bufParamsIn1, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matMul_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matMul_N_unroll_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn0, void *restrict pIn1, void *restrict pOut)
#define DSPLIB_MATMUL_UNROLL_FACTOR
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn0, const DSPLIB_bufParams2D_t *bufParamsIn1, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matMul_InitArgs *pKerInitArgs)
#define SE_SA0_PARAM_OFFSET
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_matMul.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_MATMUL_IXX_IXX_OXX_PBLOCK_SIZE]