DSPLIB User Guide
DSPLIB_matTrans_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************/
5 /* Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the
17  * distribution.
18  *
19  * Neither the name of Texas Instruments Incorporated nor the names of
20  * its contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  ******************************************************************************/
36 
37 /******************************************************************************
38  * Version 1.0 Date Aug 2023 Author: Asheesh Bhardwaj
39  *****************************************************************************/
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include "DSPLIB_matTrans_priv.h"
48 
49 /*******************************************************************************
50  *
51  * DEFINES
52  *
53  ******************************************************************************/
54 #define SE_PARAM_BASE (0x0000)
55 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
56 #define SE_SA0_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_SIZE)
57 
58 /**********************************************************************/
59 /* INITIALIZATION */
60 /**********************************************************************/
61 template <typename dataType>
63  const DSPLIB_bufParams2D_t *bufParamsIn,
64  const DSPLIB_bufParams2D_t *bufParamsOut)
65 {
66  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering template function to init SE for 64-bit data");
67 
68  DSPLIB_matTrans_PrivArgs *pKerPrivArgs = (DSPLIB_matTrans_PrivArgs *) handle;
69  uint8_t *pBlock = pKerPrivArgs->bufPblock;
70  uint32_t widthIn = pKerPrivArgs->widthIn;
71  uint32_t heightIn = pKerPrivArgs->heightIn;
72  int32_t strideIn = bufParamsIn->stride_y;
73  int32_t strideOut = bufParamsOut->stride_y;
74  int32_t dataSize = sizeof(dataType);
75  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
76  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
77  typedef typename c7x::make_full_vector<dataType>::type vec;
78  int32_t eleCount = c7x::element_count_of<vec>::value;
79  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
80  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
81  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
82  int32_t iter = (heightIn + (eleCount * 2) - 1) / (eleCount * 2);
83 
84  se0Params.ICNT0 = widthIn;
85  se0Params.ICNT1 = (heightIn > (uint32_t) eleCount) ? eleCount : heightIn;
86  se0Params.DIM1 = strideIn / dataSize;
87  se0Params.ICNT2 = iter;
88  se0Params.DIM2 = (strideIn / dataSize) * eleCount * 2;
89  se0Params.DIMFMT = __SE_DIMFMT_3D;
90  se0Params.TRANSPOSE = __SE_TRANSPOSE_64BIT;
91  se0Params.ELETYPE = SE_ELETYPE;
92  se0Params.VECLEN = SE_VECLEN;
93 
94  sa0Params.ICNT0 = eleCount * 2;
95  sa0Params.ICNT1 = widthIn;
96  sa0Params.DIM1 = strideOut / dataSize;
97  sa0Params.ICNT2 = iter;
98  sa0Params.DIM2 = eleCount * 2;
99  sa0Params.VECLEN = SA_VECLEN;
100  sa0Params.DIMFMT = __SA_DIMFMT_3D;
101  sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
102  sa0Params.DECDIM1_WIDTH = heightIn;
103 
104  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
105  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET) = sa0Params;
106  *(int32_t *) ((uint8_t *) pBlock + (2 * SE_SA0_PARAM_OFFSET)) = iter;
107 
108  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting template function to init SE for 64-bit data");
109 }
110 
111 template <typename dataType>
113  const DSPLIB_bufParams2D_t *bufParamsIn,
114  const DSPLIB_bufParams2D_t *bufParamsOut)
115 {
116  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering template function to init SE for 32-bit data");
117 
118  DSPLIB_matTrans_PrivArgs *pKerPrivArgs = (DSPLIB_matTrans_PrivArgs *) handle;
119  uint8_t *pBlock = pKerPrivArgs->bufPblock;
120  uint32_t widthIn = pKerPrivArgs->widthIn;
121  uint32_t heightIn = pKerPrivArgs->heightIn;
122  int32_t strideIn = bufParamsIn->stride_y;
123  int32_t strideOut = bufParamsOut->stride_y;
124  int32_t dataSize = sizeof(dataType);
125  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
126  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
127  typedef typename c7x::make_full_vector<dataType>::type vec;
128  int32_t eleCount = c7x::element_count_of<vec>::value;
129  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
130  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
131  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
132  int32_t iter = (heightIn + (eleCount * 2) - 1) / (eleCount * 2);
133 
134  se0Params.TRANSPOSE = __SE_TRANSPOSE_32BIT;
135  se0Params.ICNT0 = widthIn;
136  se0Params.ICNT1 = (heightIn > (uint32_t) eleCount) ? eleCount : heightIn;
137  se0Params.DIM1 = strideIn / dataSize;
138  se0Params.ICNT2 = iter;
139  se0Params.DIM2 = (strideIn / dataSize) * eleCount * 2;
140  se0Params.DIMFMT = __SE_DIMFMT_3D;
141  se0Params.ELETYPE = SE_ELETYPE;
142  se0Params.VECLEN = SE_VECLEN;
143 
144  sa0Params.ICNT0 = eleCount * 2;
145  sa0Params.ICNT1 = widthIn;
146  sa0Params.DIM1 = strideOut / dataSize;
147  sa0Params.ICNT2 = iter;
148  sa0Params.DIM2 = eleCount * 2;
149  sa0Params.VECLEN = SA_VECLEN;
150  sa0Params.DIMFMT = __SA_DIMFMT_3D;
151  sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
152  sa0Params.DECDIM1_WIDTH = heightIn;
153 
154  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
155  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET) = sa0Params;
156  *(int32_t *) ((uint8_t *) pBlock + (2 * SE_SA0_PARAM_OFFSET)) = iter;
157 
158  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting template function to init SE for 32-bit data");
159 }
160 
161 template <typename dataType>
163  const DSPLIB_bufParams2D_t *bufParamsIn,
164  const DSPLIB_bufParams2D_t *bufParamsOut)
165 {
166  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering template function to init SE for 16-bit data");
167 
168  DSPLIB_matTrans_PrivArgs *pKerPrivArgs = (DSPLIB_matTrans_PrivArgs *) handle;
169  uint8_t *pBlock = pKerPrivArgs->bufPblock;
170  uint32_t widthIn = pKerPrivArgs->widthIn;
171  uint32_t heightIn = pKerPrivArgs->heightIn;
172  int32_t strideIn = bufParamsIn->stride_y;
173  int32_t strideOut = bufParamsOut->stride_y;
174  int32_t dataSize = sizeof(dataType);
175  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
176  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
177  typedef typename c7x::make_full_vector<dataType>::type vec;
178  int32_t eleCount = c7x::element_count_of<vec>::value;
179  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
180  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
181  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
182  int32_t iter = (heightIn + eleCount - 1) / eleCount;
183 
184  se0Params.TRANSPOSE = __SE_TRANSPOSE_32BIT;
185  se0Params.ICNT0 = widthIn;
186  se0Params.ICNT1 = (heightIn > (uint32_t) (eleCount / 2)) ? (eleCount / 2) : heightIn;
187  se0Params.DIM1 = strideIn / dataSize;
188  se0Params.ICNT2 = iter;
189  se0Params.DIM2 = (strideIn / dataSize) * eleCount;
190  se0Params.DIMFMT = __SE_DIMFMT_3D;
191  se0Params.ELETYPE = SE_ELETYPE;
192  se0Params.VECLEN = SE_VECLEN;
193 
194  sa0Params.ICNT0 = eleCount;
195  sa0Params.ICNT1 = widthIn;
196  sa0Params.DIM1 = strideOut / dataSize;
197  sa0Params.ICNT2 = iter;
198  sa0Params.DIM2 = eleCount;
199  sa0Params.VECLEN = SA_VECLEN;
200  sa0Params.DIMFMT = __SA_DIMFMT_3D;
201  sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
202  sa0Params.DECDIM1_WIDTH = heightIn;
203 
204  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
205  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET) = sa0Params;
206  *(int32_t *) ((uint8_t *) pBlock + (2 * SE_SA0_PARAM_OFFSET)) = iter;
207 
208  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting template function to init SE for 16-bit data");
209 }
210 
211 template <typename dataType>
213  const DSPLIB_bufParams2D_t *bufParamsIn,
214  const DSPLIB_bufParams2D_t *bufParamsOut)
215 {
216  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering template function to init SE for 8-bit data");
217 
218  DSPLIB_matTrans_PrivArgs *pKerPrivArgs = (DSPLIB_matTrans_PrivArgs *) handle;
219  uint8_t *pBlock = pKerPrivArgs->bufPblock;
220  uint32_t widthIn = pKerPrivArgs->widthIn;
221  uint32_t heightIn = pKerPrivArgs->heightIn;
222  int32_t strideIn = bufParamsIn->stride_y;
223  int32_t strideOut = bufParamsOut->stride_y;
224  int32_t dataSize = sizeof(dataType);
225  int32_t iter = 0;
226  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
227  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
228  typedef typename c7x::make_full_vector<dataType>::type vec;
229  int32_t eleCount = c7x::element_count_of<vec>::value;
230  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
231  __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
232  __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
233 
234 #if __C7X_VEC_SIZE_BITS__ == 512
235  int32_t outEleCount = eleCount / 2;
236 #else
237  int32_t outEleCount = eleCount;
238 #endif
239 
240  iter = (heightIn + outEleCount - 1) / outEleCount;
241 
242  se0Params.TRANSPOSE = __SE_TRANSPOSE_32BIT;
243  se0Params.ICNT0 = widthIn;
244  se0Params.ICNT1 = 16;
245  se0Params.DIM1 = strideIn;
246  se0Params.ICNT2 = iter;
247  se0Params.DIM2 = strideIn * outEleCount;
248  se0Params.DIMFMT = __SE_DIMFMT_3D;
249  se0Params.ELETYPE = SE_ELETYPE;
250  se0Params.VECLEN = SE_VECLEN;
251 
252  sa0Params.ICNT0 = outEleCount;
253  sa0Params.ICNT1 = widthIn;
254  sa0Params.DIM1 = strideOut / dataSize;
255  sa0Params.ICNT2 = iter;
256  sa0Params.DIM2 = outEleCount;
257  sa0Params.VECLEN = SA_VECLEN;
258  sa0Params.DIMFMT = __SA_DIMFMT_3D;
259  sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
260  sa0Params.DECDIM1_WIDTH = heightIn;
261 
262  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET) = se0Params;
263  *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET) = sa0Params;
264  *(int32_t *) ((uint8_t *) pBlock + (2 * SE_SA0_PARAM_OFFSET)) = iter;
265 
266  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting template function to init SE for 8-bit data");
267 }
268 
269 template <>
271  const DSPLIB_bufParams2D_t *bufParamsIn,
272  const DSPLIB_bufParams2D_t *bufParamsOut,
273  const DSPLIB_matTransInitArgs *pKerInitArgs)
274 {
275  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
276 
277  matTrans_init_32bit<float>(handle, bufParamsIn, bufParamsOut);
278 
279  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
280 
281  return DSPLIB_SUCCESS;
282 }
283 
284 template <>
286  const DSPLIB_bufParams2D_t *bufParamsIn,
287  const DSPLIB_bufParams2D_t *bufParamsOut,
288  const DSPLIB_matTransInitArgs *pKerInitArgs)
289 {
290  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
291 
292  matTrans_init_64bit<double>(handle, bufParamsIn, bufParamsOut);
293 
294  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
295 
296  return DSPLIB_SUCCESS;
297 }
298 
299 template <>
301  const DSPLIB_bufParams2D_t *bufParamsIn,
302  const DSPLIB_bufParams2D_t *bufParamsOut,
303  const DSPLIB_matTransInitArgs *pKerInitArgs)
304 {
305  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
306 
307  matTrans_init_8bit<int8_t>(handle, bufParamsIn, bufParamsOut);
308 
309  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
310 
311  return DSPLIB_SUCCESS;
312 }
313 
314 template <>
316  const DSPLIB_bufParams2D_t *bufParamsIn,
317  const DSPLIB_bufParams2D_t *bufParamsOut,
318  const DSPLIB_matTransInitArgs *pKerInitArgs)
319 {
320  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
321 
322  matTrans_init_8bit<uint8_t>(handle, bufParamsIn, bufParamsOut);
323 
324  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
325 
326  return DSPLIB_SUCCESS;
327 }
328 
329 template <>
331  const DSPLIB_bufParams2D_t *bufParamsIn,
332  const DSPLIB_bufParams2D_t *bufParamsOut,
333  const DSPLIB_matTransInitArgs *pKerInitArgs)
334 {
335  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
336 
337  matTrans_init_16bit<int16_t>(handle, bufParamsIn, bufParamsOut);
338 
339  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
340 
341  return DSPLIB_SUCCESS;
342 }
343 
344 template <>
346  const DSPLIB_bufParams2D_t *bufParamsIn,
347  const DSPLIB_bufParams2D_t *bufParamsOut,
348  const DSPLIB_matTransInitArgs *pKerInitArgs)
349 {
350  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
351 
352  matTrans_init_16bit<uint16_t>(handle, bufParamsIn, bufParamsOut);
353 
354  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
355 
356  return DSPLIB_SUCCESS;
357 }
358 
359 template <>
361  const DSPLIB_bufParams2D_t *bufParamsIn,
362  const DSPLIB_bufParams2D_t *bufParamsOut,
363  const DSPLIB_matTransInitArgs *pKerInitArgs)
364 {
365  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
366 
367  matTrans_init_32bit<int32_t>(handle, bufParamsIn, bufParamsOut);
368 
369  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
370 
371  return DSPLIB_SUCCESS;
372 }
373 
374 template <>
376  const DSPLIB_bufParams2D_t *bufParamsIn,
377  const DSPLIB_bufParams2D_t *bufParamsOut,
378  const DSPLIB_matTransInitArgs *pKerInitArgs)
379 {
380  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
381 
382  matTrans_init_32bit<uint32_t>(handle, bufParamsIn, bufParamsOut);
383 
384  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
385 
386  return DSPLIB_SUCCESS;
387 }
388 
389 template <>
391  const DSPLIB_bufParams2D_t *bufParamsIn,
392  const DSPLIB_bufParams2D_t *bufParamsOut,
393  const DSPLIB_matTransInitArgs *pKerInitArgs)
394 {
395  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
396 
397  matTrans_init_64bit<int64_t>(handle, bufParamsIn, bufParamsOut);
398 
399  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
400 
401  return DSPLIB_SUCCESS;
402 }
403 
404 template <>
406  const DSPLIB_bufParams2D_t *bufParamsIn,
407  const DSPLIB_bufParams2D_t *bufParamsOut,
408  const DSPLIB_matTransInitArgs *pKerInitArgs)
409 {
410  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
411 
412  matTrans_init_64bit<uint64_t>(handle, bufParamsIn, bufParamsOut);
413 
414  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
415 
416  return DSPLIB_SUCCESS;
417 }
418 
419 /**********************************************************************/
420 /* IMPLEMENTATION */
421 /**********************************************************************/
422 template <typename dataType>
423 inline void matTrans_compute_64_32bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
424 {
425  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering template function for Transpose 64/32-bit data");
426 
427  DSPLIB_matTrans_PrivArgs *pKerPrivArgs = (DSPLIB_matTrans_PrivArgs *) handle;
428  uint32_t widthIn = pKerPrivArgs->widthIn;
429  int32_t strideIn = pKerPrivArgs->strideIn;
430  uint32_t dataSize = sizeof(dataType);
431  __SE_TEMPLATE_v1 se0Params;
432  __SA_TEMPLATE_v1 sa0Params;
433  uint8_t *pBlock = pKerPrivArgs->bufPblock;
434  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
435  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET);
436  int32_t iter = *(int32_t *) ((uint8_t *) pBlock + (2 * SE_SA0_PARAM_OFFSET));
437  dataType *restrict pInLocal = (dataType *) pIn;
438  dataType *restrict pOutLocal = (dataType *) pOut;
439  typedef typename c7x::make_full_vector<dataType>::type vec;
440  int32_t eleCount = c7x::element_count_of<vec>::value;
441  int32_t loopCount = iter * widthIn;
442 
443  DSPLIB_DEBUGPRINTFN(0, "pInLocal: %p pOutLocal: %p loopCount: %d\n", pInLocal, pOutLocal, loopCount);
444 
445  __SE0_OPEN(pInLocal, se0Params);
446  __SE1_OPEN(pInLocal + ((strideIn / dataSize) * eleCount), se0Params);
447  __SA0_OPEN(sa0Params);
448 
449  for (int32_t i = 0; i < loopCount; i++) {
450  vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
451  vec loadVec2 = c7x::strm_eng<1, vec>::get_adv();
452 
453  __vpred predTemp = c7x::strm_agen<0, vec>::get_vpred();
454  vec *pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
455  __vstore_pred(predTemp, pStoreVec, loadVec1);
456 
457  predTemp = c7x::strm_agen<0, vec>::get_vpred();
458  pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
459  __vstore_pred(predTemp, pStoreVec, loadVec2);
460  }
461  __SE0_CLOSE();
462  __SE1_CLOSE();
463  __SA0_CLOSE();
464 
465  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting template function for Transpose 64/32-bit data");
466 }
467 
468 template <typename dataType>
469 inline void matTrans_compute_16bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
470 {
471  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering template function for Transpose 16-bit data");
472 
473  DSPLIB_matTrans_PrivArgs *pKerPrivArgs = (DSPLIB_matTrans_PrivArgs *) handle;
474  uint32_t widthIn = pKerPrivArgs->widthIn;
475  int32_t strideIn = pKerPrivArgs->strideIn;
476  uint32_t dataSize = sizeof(dataType);
477  __SE_TEMPLATE_v1 se0Params;
478  __SA_TEMPLATE_v1 sa0Params;
479  uint8_t *pBlock = pKerPrivArgs->bufPblock;
480  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
481  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET);
482  int32_t iter = *(int32_t *) ((uint8_t *) pBlock + (2 * SE_SA0_PARAM_OFFSET));
483  dataType *restrict pInLocal = (dataType *) pIn;
484  dataType *restrict pOutLocal = (dataType *) pOut;
485  typedef typename c7x::make_full_vector<dataType>::type vec;
486  int32_t eleCount = c7x::element_count_of<vec>::value;
487  int32_t loopCount = iter * widthIn / 2;
488 
489  DSPLIB_DEBUGPRINTFN(0, "pInLocal: %p pOutLocal: %p loopCount: %d\n", pInLocal, pOutLocal, loopCount);
490 
491  __SE0_OPEN(pInLocal, se0Params);
492  __SA0_OPEN(sa0Params);
493  __SE1_OPEN(pInLocal + ((strideIn / dataSize) * (eleCount / 2)), se0Params);
494 
495  for (int i = 0; i < loopCount; i++) {
496  vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
497  vec loadVec2 = c7x::strm_eng<1, vec>::get_adv();
498 
499  vec checkEven = __pack_consec_low(loadVec2, loadVec1);
500  __vpred predTemp = c7x::strm_agen<0, vec>::get_vpred();
501  vec *pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
502  __vstore_pred(predTemp, pStoreVec, checkEven);
503 
504  vec checkOdd = __pack_consec_high(loadVec2, loadVec1);
505  predTemp = c7x::strm_agen<0, vec>::get_vpred();
506  pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
507  __vstore_pred(predTemp, pStoreVec, checkOdd);
508  }
509  __SE1_CLOSE();
510  __SE0_CLOSE();
511  __SA0_CLOSE();
512 
513  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting template function for Transpose 16-bit data");
514 }
515 
516 template <typename dataType>
517 inline void matTrans_compute_8bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
518 {
519  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering template function for Transpose 8-bit data");
520 
521  DSPLIB_matTrans_PrivArgs *pKerPrivArgs = (DSPLIB_matTrans_PrivArgs *) handle;
522  uint32_t widthIn = pKerPrivArgs->widthIn;
523  int32_t strideIn = pKerPrivArgs->strideIn;
524  uint32_t dataSize = sizeof(dataType);
525  __SE_TEMPLATE_v1 se0Params;
526  __SA_TEMPLATE_v1 sa0Params;
527  uint8_t *pBlock = pKerPrivArgs->bufPblock;
528  se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SE0_PARAM_OFFSET);
529  sa0Params = *(__SA_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE_SA0_PARAM_OFFSET);
530  int32_t iter = *(int32_t *) ((uint8_t *) pBlock + (2 * SE_SA0_PARAM_OFFSET));
531  dataType *restrict pInLocal = (dataType *) pIn;
532  dataType *restrict pOutLocal = (dataType *) pOut;
533  typedef typename c7x::make_full_vector<dataType>::type vec;
534  int32_t eleCount = c7x::element_count_of<vec>::value;
535  int32_t loopCount = iter * widthIn / 4;
536 
537  DSPLIB_DEBUGPRINTFN(0, "pInLocal: %p pOutLocal: %p loopCount: %d\n", pInLocal, pOutLocal, loopCount);
538 
539  __SE0_OPEN(pInLocal, se0Params);
540  __SA0_OPEN(sa0Params);
541 #if (__C7X_VEC_SIZE_BITS__ == 256)
542  __SE1_OPEN(pInLocal + ((strideIn / dataSize) * (eleCount / 2)), se0Params);
543  for (int i = 0; i < loopCount; i++) {
544 
545  vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
546  vec loadVec2 = c7x::strm_eng<0, vec>::get_adv();
547 
548  vec loadVec3 = c7x::strm_eng<1, vec>::get_adv();
549  vec loadVec4 = c7x::strm_eng<1, vec>::get_adv();
550 
551  vec iEven1 = __pack_consec_low(loadVec2, loadVec1);
552  vec iOdd1 = __pack_consec_high(loadVec2, loadVec1);
553  vec iEven2 = __pack_consec_low(loadVec4, loadVec3);
554  vec iOdd2 = __pack_consec_high(loadVec4, loadVec3);
555 
556  __vpred predTemp = c7x::strm_agen<0, vec>::get_vpred();
557  c7x::char_vec *pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
558  __vstore_pred_packl_2src(predTemp, pStoreVec, c7x::as_short_vec(iEven1), c7x::as_short_vec(iEven2));
559 
560  predTemp = c7x::strm_agen<0, vec>::get_vpred();
561  pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
562  __vstore_pred_packl_2src(predTemp, pStoreVec, c7x::as_short_vec(iOdd1), c7x::as_short_vec(iOdd2));
563 
564  predTemp = c7x::strm_agen<0, vec>::get_vpred();
565  pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
566  __vstore_pred_packh_2src(predTemp, pStoreVec, c7x::as_short_vec(iEven1), c7x::as_short_vec(iEven2));
567 
568  predTemp = c7x::strm_agen<0, vec>::get_vpred();
569  pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
570  __vstore_pred_packh_2src(predTemp, pStoreVec, c7x::as_short_vec(iOdd1), c7x::as_short_vec(iOdd2));
571  }
572 
573 #elif (__C7X_VEC_SIZE_BITS__ == 512)
574  typedef typename c7x::char_hvec vecOut;
575  __SE1_OPEN(pInLocal + ((strideIn / dataSize) * eleCount / 4), se0Params);
576  for (int i = 0; i < loopCount; i++) {
577  vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
578  vec loadVec2 = c7x::strm_eng<1, vec>::get_adv();
579 
580  vec iEven = __pack_consec_low(loadVec2, loadVec1);
581  vec iOdd = __pack_consec_high(loadVec2, loadVec1);
582 
583  __vpred predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
584  vecOut *pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
585  __vstore_pred_packl(predTemp, pStoreVec, c7x::as_short_vec(iEven));
586 
587  predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
588  pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
589  __vstore_pred_packl(predTemp, pStoreVec, c7x::as_short_vec(iOdd));
590 
591  predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
592  pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
593  __vstore_pred_packh(predTemp, pStoreVec, c7x::as_short_vec(iEven));
594 
595  predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
596  pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
597  __vstore_pred_packh(predTemp, pStoreVec, c7x::as_short_vec(iOdd));
598  }
599 
600 #endif
601  __SE1_CLOSE();
602  __SE0_CLOSE();
603  __SA0_CLOSE();
604 
605  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Exiting template function for Transpose 8-bit data");
606 }
607 
608 template <>
609 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<float>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
610 {
611  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
612 
613  matTrans_compute_64_32bit<float>(handle, pIn, pOut);
614 
615  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
616 
617  return DSPLIB_SUCCESS;
618 }
619 
620 template <>
621 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<double>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
622 {
623  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
624 
625  matTrans_compute_64_32bit<double>(handle, pIn, pOut);
626 
627  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
628 
629  return DSPLIB_SUCCESS;
630 }
631 
632 template <>
633 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<int8_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
634 {
635  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
636 
637  matTrans_compute_8bit<int8_t>(handle, pIn, pOut);
638 
639  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
640 
641  return DSPLIB_SUCCESS;
642 }
643 
644 template <>
645 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<uint8_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
646 {
647  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
648 
649  matTrans_compute_8bit<uint8_t>(handle, pIn, pOut);
650 
651  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
652 
653  return DSPLIB_SUCCESS;
654 }
655 
656 template <>
657 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<int16_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
658 {
659  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
660 
661  matTrans_compute_16bit<int16_t>(handle, pIn, pOut);
662 
663  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
664 
665  return DSPLIB_SUCCESS;
666 }
667 
668 template <>
669 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<uint16_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
670 {
671  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
672 
673  matTrans_compute_16bit<uint16_t>(handle, pIn, pOut);
674 
675  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
676 
677  return DSPLIB_SUCCESS;
678 }
679 
680 template <>
681 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<int32_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
682 {
683  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
684 
685  matTrans_compute_64_32bit<int32_t>(handle, pIn, pOut);
686 
687  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
688 
689  return DSPLIB_SUCCESS;
690 }
691 
692 template <>
693 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<uint32_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
694 {
695  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
696 
697  matTrans_compute_64_32bit<uint32_t>(handle, pIn, pOut);
698 
699  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
700 
701  return DSPLIB_SUCCESS;
702 }
703 
704 template <>
705 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<int64_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
706 {
707  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
708 
709  matTrans_compute_64_32bit<int64_t>(handle, pIn, pOut);
710 
711  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
712 
713  return DSPLIB_SUCCESS;
714 }
715 
716 template <>
717 DSPLIB_STATUS DSPLIB_matTrans_exec_ci<uint64_t>(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
718 {
719  DSPLIB_DEBUGPRINTFN(0, "%s\n", "Entering function");
720 
721  matTrans_compute_64_32bit<uint64_t>(handle, pIn, pOut);
722 
723  DSPLIB_DEBUGPRINTFN(0, "Exiting function with return status: %d\n", DSPLIB_SUCCESS);
724 
725  return DSPLIB_SUCCESS;
726 }
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int64_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define SE_SE0_PARAM_OFFSET
void matTrans_compute_64_32bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
void matTrans_init_8bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
void matTrans_init_64bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint64_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
void matTrans_compute_8bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
void matTrans_compute_16bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
void matTrans_init_16bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
void matTrans_init_32bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int64_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint64_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define SE_SA0_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_mat_trans.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
Definition: DSPLIB_types.h:83
DSPLIB_STATUS_NAME
The enumeration of all status codes.
Definition: DSPLIB_types.h:151
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Definition: DSPLIB_types.h:172
@ DSPLIB_SUCCESS
Definition: DSPLIB_types.h:152
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_MAT_TRANS_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...