VXLIB User Guide
VXLIB_tableLookup_ci.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 /**********************************************************************************************************************/
35 /* */
36 /* INCLUDES */
37 /* */
38 /**********************************************************************************************************************/
39 
40 #include "VXLIB_tableLookup_priv.h"
41 /**********************************************************************************************************************/
42 /* */
43 /* DEFINES */
44 /* */
45 /**********************************************************************************************************************/
46 #define VXLIB_COUNT_LIMIT 512
47 /**********************************************************************************************************************/
48 /* */
49 /* VXLIB_tableLookup_init_ci */
50 /* */
51 /**********************************************************************************************************************/
52 // this method initializes the kernel-specific parameters
53 // mainly, the streaming engine and streaming address generators
54 template <uint32_t dTypeIn, uint32_t dTypeOut>
56 {
57 #if VXLIB_DEBUGPRINT
58  printf("Enter VXLIB_tableLookup_checkSrcIdx_init_ci\n");
59 #endif
60  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
61 
62  // structs to hold SE and SA parameters
63  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
64  // typecast handle (void) to struct pointer type associated to kernel
65  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
66 
67  // obtain image size and compute number of blocks to process
68  size_t width = pKerPrivArgs->width;
69  size_t height = pKerPrivArgs->height;
70 
71  typedef typename VXLIB_vec_type<dTypeIn>::type vec;
72 
73  size_t elemCount = c7x::element_count_of<vec>::value;
74  uint32_t nTiles = (width + elemCount - 1) / elemCount;
75 
76  uint32_t ICNT2 = (height + 1) / 2;
77  int32_t strideIn = pKerPrivArgs->strideInElements;
78 
79  uint8_t *pBlock = pKerPrivArgs->bufPblock; // address to retrieve to store SE/SA params
80 
81  __SE_VECLEN SE_VECLEN = c7x::se_veclen<c7x::uint_vec>::value;
82  __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
83 
84  // set SE0, and SA0 params
85  se0Params.ICNT0 = width;
86  se0Params.DIM1 = strideIn * 2;
87  se0Params.ICNT1 = ICNT2;
88  se0Params.DIMFMT = __SE_DIMFMT_2D;
89  se0Params.VECLEN = SE_VECLEN;
90  se0Params.DECDIM1 = __SE_DECDIM_DIM1;
91  se0Params.DECDIM1SD = __SE_DECDIMSD_DIM0;
92  se0Params.DECDIM1_WIDTH = height * strideIn;
93  se0Params.ELETYPE = SE_ELETYPE;
94 
95  pKerPrivArgs->numBlocksIdxCheck = ICNT2 * nTiles;
96 
97  /**************************/
98  /* Store SE and SA params */
99  /**************************/
100  *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET) = se0Params;
101 #if VXLIB_DEBUGPRINT
102  printf("Exit VXLIB_tableLookup_checkSrcIdx_init_ci\n");
103 #endif
104  return status;
105 }
108 template VXLIB_STATUS
110 template VXLIB_STATUS
112 template <uint32_t dTypeIn, uint32_t dTypeOut>
114  const VXLIB_bufParams2D_t *bufParamsIn,
115  const VXLIB_bufParams2D_t *bufParamsOut,
116  const VXLIB_bufParams1D_t *bufParamsLut,
117  const VXLIB_tableLookup_InitArgs *pKerInitArgs)
118 {
119 #if VXLIB_DEBUGPRINT
120  printf("Enter VXLIB_tableLookup_init_ci\n");
121 #endif
122  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
123 
124  status = VXLIB_tableLookup_checkSrcIdx_init_ci<dTypeIn, dTypeOut>(handle);
125 
126  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
127  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
128  if (count > VXLIB_COUNT_LIMIT) {
129  status = VXLIB_tableLookup_generic_init_ci<dTypeIn, dTypeOut>(handle, bufParamsIn, bufParamsOut, bufParamsLut,
130  pKerInitArgs);
131  }
132  else {
133  status = VXLIB_tableLookup_lut_init_ci<dTypeIn, dTypeOut>(handle, bufParamsIn, bufParamsOut, bufParamsLut,
134  pKerInitArgs);
135  }
136 
137 #if VXLIB_DEBUGPRINT
138  printf("Exit VXLIB_tableLookup_init_ci\n");
139 #endif
140  return status;
141 }
142 template VXLIB_STATUS
144  const VXLIB_bufParams2D_t *bufParamsIn,
145  const VXLIB_bufParams2D_t *bufParamsOut,
146  const VXLIB_bufParams1D_t *bufParamsLut,
147  const VXLIB_tableLookup_InitArgs *pKerInitArgs);
148 template VXLIB_STATUS
150  const VXLIB_bufParams2D_t *bufParamsIn,
151  const VXLIB_bufParams2D_t *bufParamsOut,
152  const VXLIB_bufParams1D_t *bufParamsLut,
153  const VXLIB_tableLookup_InitArgs *pKerInitArgs);
154 template VXLIB_STATUS
156  const VXLIB_bufParams2D_t *bufParamsIn,
157  const VXLIB_bufParams2D_t *bufParamsOut,
158  const VXLIB_bufParams1D_t *bufParamsLut,
159  const VXLIB_tableLookup_InitArgs *pKerInitArgs);
160 template VXLIB_STATUS
162  const VXLIB_bufParams2D_t *bufParamsIn,
163  const VXLIB_bufParams2D_t *bufParamsOut,
164  const VXLIB_bufParams1D_t *bufParamsLut,
165  const VXLIB_tableLookup_InitArgs *pKerInitArgs);
166 
167 /**********************************************************************************************************************/
168 /* */
169 /* VXLIB_tableLookup_set_ci */
170 /* */
171 /**********************************************************************************************************************/
172 
173 template <typename dataType> VXLIB_STATUS VXLIB_tableLookup_set_ci(VXLIB_kernelHandle handle, void *restrict lut)
174 {
175 #if VXLIB_DEBUGPRINT
176  printf("Enter VXLIB_tableLookup_set_ci\n");
177 #endif
178  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
179 
180  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
181  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
182  if (count > VXLIB_COUNT_LIMIT) {
183  status = VXLIB_tableLookup_generic_set_ci<dataType>(handle, lut);
184  }
185  else {
186  status = VXLIB_tableLookup_lut_set_ci<dataType>(handle, lut);
187  }
188 
189 #if VXLIB_DEBUGPRINT
190  printf("Exit VXLIB_tableLookup_set_ci\n");
191 #endif
192  return (status);
193 }
198 
199 /**********************************************************************************************************************/
200 /* */
201 /* VXLIB_tableLookup_exec_ci */
202 /* */
203 /**********************************************************************************************************************/
204 template <typename dataType>
206 
208 {
209 #if VXLIB_DEBUGPRINT
210  printf("Enter VXLIB_tableLookup_checkSrcIdx_exec_ci\n");
211 #endif
212  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
213  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
214  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
215  uint32_t offset = pKerPrivArgs->pKerInitArgs.offset;
216  size_t height = pKerPrivArgs->height;
217  int32_t strideIn = pKerPrivArgs->strideInElements;
218 
219  uint8_t *pBlock = pKerPrivArgs->bufPblock; // address to retrieve to store SE/SA params
220  int32_t numBlocksIdxCheck = (int32_t) pKerPrivArgs->numBlocksIdxCheck;
221 
222  __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
223  __SE_TEMPLATE_v1 se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
224  se1Params.DECDIM1_WIDTH = (height % 2 == 0) ? (height * strideIn) : ((height - 1) * strideIn);
225 
226  typedef typename c7x::make_full_vector<uint8_t>::type vec;
227 
228  vec offsetVec = (vec) offset;
229  vec countVec = (vec) (count - 1);
230  countVec -= offsetVec;
231 
232  uint8_t *pInLocal = (uint8_t *) pIn;
233 
234  vec flagVec0 = (vec) 0;
235  vec flagVec1 = (vec) 0;
236  vec oneVec = (vec) 1;
237 
238  __SE0_OPEN(pInLocal, se0Params);
239  __SE1_OPEN(pInLocal + strideIn, se1Params);
240 
241  for (int32_t i = 0; i < numBlocksIdxCheck; i++) {
242  vec srcVec0 = c7x::strm_eng<0, vec>::get_adv();
243  vec srcVec1 = c7x::strm_eng<1, vec>::get_adv();
244 
245  __vpred vpred0 = __cmp_gt_pred(srcVec0, countVec);
246  __vpred vpred1 = __cmp_gt_pred(srcVec1, countVec);
247 
248  flagVec0 = __select(vpred0, oneVec, flagVec0);
249  flagVec1 = __select(vpred1, oneVec, flagVec1);
250  }
251  __SE0_CLOSE();
252  __SE1_CLOSE();
253  flagVec0 += flagVec1;
254  uint32_t flag = __horizontal_add(flagVec0);
255  status = (flag == 0) ? VXLIB_SUCCESS : VXLIB_ERR_INVALID_DIMENSION;
256 #if VXLIB_DEBUGPRINT
257  printf("Exit VXLIB_tableLookup_checkSrcIdx_exec_ci \n");
258 #endif
259 
260  return (status);
261 }
262 
264 {
265 #if VXLIB_DEBUGPRINT
266  printf("Enter VXLIB_tableLookup_checkSrcIdx_exec_ci\n");
267 #endif
268  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
269  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
270  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
271  uint32_t offset = pKerPrivArgs->pKerInitArgs.offset;
272  size_t height = pKerPrivArgs->height;
273  int32_t strideIn = pKerPrivArgs->strideInElements;
274 
275  uint8_t *pBlock = pKerPrivArgs->bufPblock; // address to retrieve to store SE/SA params
276  int32_t numBlocksIdxCheck = (int32_t) pKerPrivArgs->numBlocksIdxCheck;
277 
278  __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
279  __SE_TEMPLATE_v1 se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
280  se1Params.DECDIM1_WIDTH = (height % 2 == 0) ? (height * strideIn) : ((height - 1) * strideIn);
281 
282  typedef typename c7x::make_full_vector<uint16_t>::type vec;
283 
284  vec offsetVec = (vec) offset;
285  vec countVec = (vec) (count - 1);
286  countVec -= offsetVec;
287 
288  uint16_t *pInLocal = (uint16_t *) pIn;
289 
290  vec flagVec0 = (vec) 0;
291  vec flagVec1 = (vec) 0;
292  vec oneVec = (vec) 1;
293 
294  __SE0_OPEN(pInLocal, se0Params);
295  __SE1_OPEN(pInLocal + strideIn, se1Params);
296 
297  for (int32_t i = 0; i < numBlocksIdxCheck; i++) {
298  vec srcVec0 = c7x::strm_eng<0, vec>::get_adv();
299  vec srcVec1 = c7x::strm_eng<1, vec>::get_adv();
300 
301  __vpred vpred0 = __cmp_gt_pred(srcVec0, countVec);
302  __vpred vpred1 = __cmp_gt_pred(srcVec1, countVec);
303 
304  flagVec0 = __select(vpred0, oneVec, flagVec0);
305  flagVec1 = __select(vpred1, oneVec, flagVec1);
306  }
307  __SE0_CLOSE();
308  __SE1_CLOSE();
309  flagVec0 += flagVec1;
310  uint32_t flag = __horizontal_add(flagVec0);
311  status = (flag == 0) ? VXLIB_SUCCESS : VXLIB_ERR_INVALID_DIMENSION;
312 #if VXLIB_DEBUGPRINT
313  printf("Exit VXLIB_tableLookup_checkSrcIdx_exec_ci \n");
314 #endif
315 
316  return (status);
317 }
319 {
320 #if VXLIB_DEBUGPRINT
321  printf("Enter VXLIB_tableLookup_checkSrcIdx_exec_ci\n");
322 #endif
323  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
324  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
325  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
326  uint32_t offset = pKerPrivArgs->pKerInitArgs.offset;
327  size_t height = pKerPrivArgs->height;
328  int32_t strideIn = pKerPrivArgs->strideInElements;
329 
330  uint8_t *pBlock = pKerPrivArgs->bufPblock; // address to retrieve to store SE/SA params
331  int32_t numBlocksIdxCheck = (int32_t) pKerPrivArgs->numBlocksIdxCheck;
332 
333  __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
334  __SE_TEMPLATE_v1 se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
335  se1Params.DECDIM1_WIDTH = (height % 2 == 0) ? (height * strideIn) : ((height - 1) * strideIn);
336 
337  typedef typename c7x::make_full_vector<int8_t>::type vec;
338  typedef typename c7x::make_full_vector<uint8_t>::type uvec;
339 
340  uvec offsetVec = (uvec) offset;
341  uvec countVec = (uvec) (count - 1);
342 
343  vec zeroVec = (vec) (-(int8_t) offset);
344 
345  int8_t *pInLocal = (int8_t *) pIn;
346 
347  vec flagVec0 = (vec) 0;
348  vec flagVec1 = (vec) 0;
349  vec oneVec = (vec) 1;
350 
351  __SE0_OPEN(pInLocal, se0Params);
352  __SE1_OPEN(pInLocal + strideIn, se1Params);
353 
354  for (int32_t i = 0; i < numBlocksIdxCheck; i++) {
355  vec srcVec0 = c7x::strm_eng<0, vec>::get_adv();
356  vec srcVec1 = c7x::strm_eng<1, vec>::get_adv();
357 
358  __vpred vpred0 = __cmp_gt_pred(c7x::as_uchar_vec(srcVec0 + c7x::as_char_vec(offsetVec)), countVec);
359  __vpred vpred1 = __cmp_gt_pred(c7x::as_uchar_vec(srcVec1 + c7x::as_char_vec(offsetVec)), countVec);
360 
361  __vpred vpred2 = __cmp_gt_pred(zeroVec, srcVec0);
362  __vpred vpred3 = __cmp_gt_pred(zeroVec, srcVec1);
363 
364  vpred0 = __or(vpred0, vpred2);
365  vpred1 = __or(vpred1, vpred3);
366 
367  flagVec0 = __select(vpred0, oneVec, flagVec0);
368  flagVec1 = __select(vpred1, oneVec, flagVec1);
369  }
370 
371  __SE0_CLOSE();
372  __SE1_CLOSE();
373  flagVec0 += flagVec1;
374  uint32_t flag = __horizontal_add(flagVec0);
375  status = (flag == 0) ? VXLIB_SUCCESS : VXLIB_ERR_INVALID_DIMENSION;
376 #if VXLIB_DEBUGPRINT
377  printf("Exit VXLIB_tableLookup_checkSrcIdx_exec_ci \n");
378 #endif
379 
380  return (status);
381 }
382 
384 {
385 #if VXLIB_DEBUGPRINT
386  printf("Enter VXLIB_tableLookup_checkSrcIdx_exec_ci\n");
387 #endif
388  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
389  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
390  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
391  uint32_t offset = pKerPrivArgs->pKerInitArgs.offset;
392  size_t height = pKerPrivArgs->height;
393  int32_t strideIn = pKerPrivArgs->strideInElements;
394 
395  uint8_t *pBlock = pKerPrivArgs->bufPblock; // address to retrieve to store SE/SA params
396  int32_t numBlocksIdxCheck = (int32_t) pKerPrivArgs->numBlocksIdxCheck;
397 
398  __SE_TEMPLATE_v1 se0Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
399  __SE_TEMPLATE_v1 se1Params = *(__SE_TEMPLATE_v1 *) ((uint8_t *) pBlock + SE0_PARAM_OFFSET);
400  se1Params.DECDIM1_WIDTH = (height % 2 == 0) ? (height * strideIn) : ((height - 1) * strideIn);
401 
402  typedef typename c7x::make_full_vector<int16_t>::type vec;
403  typedef typename c7x::make_full_vector<uint16_t>::type uvec;
404 
405  uvec offsetVec = (uvec) offset;
406  uvec countVec = (uvec) (count - 1);
407 
408  vec zeroVec = (vec) (-(int16_t) offset);
409 
410  int16_t *pInLocal = (int16_t *) pIn;
411 
412  vec flagVec0 = (vec) 0;
413  vec flagVec1 = (vec) 0;
414  vec oneVec = (vec) 1;
415 
416  __SE0_OPEN(pInLocal, se0Params);
417  __SE1_OPEN(pInLocal + strideIn, se1Params);
418 
419  for (int32_t i = 0; i < numBlocksIdxCheck; i++) {
420  vec srcVec0 = c7x::strm_eng<0, vec>::get_adv();
421  vec srcVec1 = c7x::strm_eng<1, vec>::get_adv();
422 
423  __vpred vpred0 = __cmp_gt_pred(c7x::as_ushort_vec(srcVec0 + c7x::as_short_vec(offsetVec)), countVec);
424  __vpred vpred1 = __cmp_gt_pred(c7x::as_ushort_vec(srcVec1 + c7x::as_short_vec(offsetVec)), countVec);
425 
426  __vpred vpred2 = __cmp_gt_pred(zeroVec, srcVec0);
427  __vpred vpred3 = __cmp_gt_pred(zeroVec, srcVec1);
428 
429  vpred0 = __or(vpred0, vpred2);
430  vpred1 = __or(vpred1, vpred3);
431 
432  flagVec0 = __select(vpred0, oneVec, flagVec0);
433  flagVec1 = __select(vpred1, oneVec, flagVec1);
434  }
435  __SE0_CLOSE();
436  __SE1_CLOSE();
437  flagVec0 += flagVec1;
438  uint32_t flag = __horizontal_add(flagVec0);
439  status = (flag == 0) ? VXLIB_SUCCESS : VXLIB_ERR_INVALID_DIMENSION;
440 #if VXLIB_DEBUGPRINT
441  printf("Exit VXLIB_tableLookup_checkSrcIdx_exec_ci \n");
442 #endif
443 
444  return (status);
445 }
446 
447 template <typename dataType>
449 VXLIB_tableLookup_exec_ci(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut, void *restrict lut)
450 {
451 #if VXLIB_DEBUGPRINT
452  printf("Enter VXLIB_tableLookup_exec_ci\n");
453 #endif
454  VXLIB_STATUS status = VXLIB_SUCCESS; // assign status to success by default
455 
456  status = VXLIB_tableLookup_checkSrcIdx_exec_ci<dataType>(handle, pIn);
457 
458  if (status == VXLIB_SUCCESS) {
459  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
460  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
461  if (count > VXLIB_COUNT_LIMIT) {
462  status = VXLIB_tableLookup_generic_exec_ci<dataType>(handle, pIn, pOut, lut);
463  }
464  else {
465  status = VXLIB_tableLookup_lut_exec_ci<dataType>(handle, pIn, pOut, lut);
466  }
467  }
468  else {
469  /* Nothing do here */
470  }
471 #if VXLIB_DEBUGPRINT
472  printf("Exit VXLIB_tableLookup_exec_ci \n");
473 #endif
474  return (status);
475 }
477  void *restrict pIn,
478  void *restrict pOut,
479  void *restrict lut);
481  void *restrict pIn,
482  void *restrict pOut,
483  void *restrict lut);
485  void *restrict pIn,
486  void *restrict pOut,
487  void *restrict lut);
489  void *restrict pIn,
490  void *restrict pOut,
491  void *restrict lut);
492 
493 template <uint32_t dTypeIn, uint32_t dTypeLut>
494 void VXLIB_idxCheck_perfEst(VXLIB_kernelHandle handle, size_t *archCycles);
495 
496 template <> void VXLIB_idxCheck_perfEst<VXLIB_UINT8, VXLIB_UINT8>(VXLIB_kernelHandle handle, size_t *archCycles)
497 {
498  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
499 
500  // obtain loop count for compute loop
501  size_t numBlocksIdxCheck = pKerPrivArgs->numBlocksIdxCheck;
502  *archCycles = 1 + numBlocksIdxCheck * 1; // obtained from asm
503 }
504 template <> void VXLIB_idxCheck_perfEst<VXLIB_INT8, VXLIB_INT8>(VXLIB_kernelHandle handle, size_t *archCycles)
505 {
506  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
507 
508  // obtain loop count for compute loop
509  size_t numBlocksIdxCheck = pKerPrivArgs->numBlocksIdxCheck;
510  *archCycles = 3 + numBlocksIdxCheck * 2; // obtained from asm
511 }
512 template <> void VXLIB_idxCheck_perfEst<VXLIB_UINT16, VXLIB_UINT16>(VXLIB_kernelHandle handle, size_t *archCycles)
513 {
514  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
515 
516  // obtain loop count for compute loop
517  size_t numBlocksIdxCheck = pKerPrivArgs->numBlocksIdxCheck;
518  *archCycles = 1 + numBlocksIdxCheck * 1; // obtained from asm
519 }
520 template <> void VXLIB_idxCheck_perfEst<VXLIB_INT16, VXLIB_INT16>(VXLIB_kernelHandle handle, size_t *archCycles)
521 {
522  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
523 
524  // obtain loop count for compute loop
525  size_t numBlocksIdxCheck = pKerPrivArgs->numBlocksIdxCheck;
526  *archCycles = 3 + numBlocksIdxCheck * 2; // obtained from asm
527 }
528 
529 template <uint32_t dTypeIn, uint32_t dTypeLut>
530 void VXLIB_lut_perfEst(VXLIB_kernelHandle handle, size_t *archCycles, size_t width);
531 
532 template <>
533 void VXLIB_lut_perfEst<VXLIB_UINT8, VXLIB_UINT8>(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
534 {
535  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
536  size_t iterConst;
537  size_t ii;
538  size_t numBlocksLut = pKerPrivArgs->numBlocksLut;
539 
540  if (width < WIDTH_UNROLL_FACTOR) {
541  ii = 6;
542  iterConst = 12;
543  }
544  else {
545  ii = 10;
546  iterConst = 11;
547  }
548 
549  // obtain loop count for compute loop
550  *archCycles = iterConst + numBlocksLut * ii; // obtained from asm
551 }
552 template <> void VXLIB_lut_perfEst<VXLIB_INT8, VXLIB_INT8>(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
553 {
554  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
555  size_t iterConst;
556  size_t ii;
557  size_t numBlocksLut = pKerPrivArgs->numBlocksLut;
558 
559  if (width < WIDTH_UNROLL_FACTOR) {
560  ii = 6;
561  iterConst = 12;
562  }
563  else {
564  ii = 10;
565  iterConst = 11;
566  }
567 
568  // obtain loop count for compute loop
569  *archCycles = iterConst + numBlocksLut * ii; // obtained from asm
570 }
571 
572 template <>
573 void VXLIB_lut_perfEst<VXLIB_UINT16, VXLIB_UINT16>(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
574 {
575  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
576  size_t iterConst;
577  size_t ii;
578  size_t numBlocksLut = pKerPrivArgs->numBlocksLut;
579 
580  if (width < WIDTH_UNROLL_FACTOR) {
581  ii = 8;
582  iterConst = 7;
583  }
584  else {
585  ii = 12;
586  iterConst = 8;
587  }
588 
589  // obtain loop count for compute loop
590  *archCycles = iterConst + numBlocksLut * ii; // obtained from asm
591 }
592 template <>
593 void VXLIB_lut_perfEst<VXLIB_INT16, VXLIB_INT16>(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
594 {
595  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
596  size_t iterConst;
597  size_t ii;
598  size_t numBlocksLut = pKerPrivArgs->numBlocksLut;
599 
600  if (width < WIDTH_UNROLL_FACTOR) {
601  ii = 8;
602  iterConst = 12;
603  }
604  else {
605  ii = 12;
606  iterConst = 8;
607  }
608  // obtain loop count for compute loop
609  *archCycles = iterConst + numBlocksLut * ii; // obtained from asm
610 }
611 
612 template <uint32_t dTypeIn, uint32_t dTypeLut> void VXLIB_generic_perfEst(VXLIB_kernelHandle handle, size_t *archCycles)
613 {
614  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
615 
616  // obtain loop count for compute loop
617  size_t numBlocksLut = pKerPrivArgs->numBlocksLut;
618  *archCycles = 10 + numBlocksLut * 8; // obtained from asm
619 }
620 
621 template void VXLIB_generic_perfEst<VXLIB_UINT8, VXLIB_UINT8>(VXLIB_kernelHandle handle, size_t *archCycles);
622 template void VXLIB_generic_perfEst<VXLIB_INT8, VXLIB_INT8>(VXLIB_kernelHandle handle, size_t *archCycles);
624 template void VXLIB_generic_perfEst<VXLIB_INT16, VXLIB_INT16>(VXLIB_kernelHandle handle, size_t *archCycles);
625 
627  const VXLIB_bufParams2D_t *bufParamsIn,
628  const VXLIB_bufParams2D_t *bufParamsOut,
629  const VXLIB_bufParams1D_t *bufParamsLut,
630  const VXLIB_tableLookup_InitArgs *pKerInitArgs,
631  size_t *archCycles,
632  size_t *estCycles)
633 {
634 
635  // typecast handle (void) to struct pointer type associated to kernel
636 
637  const uint32_t dTypeIn = bufParamsIn->data_type;
638  const uint32_t dTypeOut = bufParamsOut->data_type;
639  const uint32_t dTypeLut = bufParamsLut->data_type;
640 
641  // obtain loop count for compute loop
642  size_t overheadCnt = 110; // profiled code before entering compute loop
643 
644  size_t archSrcIdx;
645  size_t archLutLogic;
646 
647  /* srcIdx Check cycle estimation */
649  VXLIB_idxCheck_perfEst<VXLIB_TABLELOOKUP_DTYPE_I8U_O8U>(handle, &archSrcIdx);
650  }
652  VXLIB_idxCheck_perfEst<VXLIB_TABLELOOKUP_DTYPE_I8S_O8S>(handle, &archSrcIdx);
653  }
655  VXLIB_idxCheck_perfEst<VXLIB_TABLELOOKUP_DTYPE_I16U_O16U>(handle, &archSrcIdx);
656  }
657  else {
658  VXLIB_idxCheck_perfEst<VXLIB_TABLELOOKUP_DTYPE_I16S_O16S>(handle, &archSrcIdx);
659  }
660 
661  /* LUT operation cycle estimation */
662  VXLIB_tableLookup_PrivArgs *pKerPrivArgs = (VXLIB_tableLookup_PrivArgs *) handle;
663  size_t width = pKerPrivArgs->width;
664  uint32_t count = pKerPrivArgs->pKerInitArgs.count;
665  if (count < VXLIB_COUNT_LIMIT) {
666 
668  VXLIB_lut_perfEst<VXLIB_TABLELOOKUP_DTYPE_I8U_O8U>(handle, &archLutLogic, width);
669  }
671  VXLIB_lut_perfEst<VXLIB_TABLELOOKUP_DTYPE_I8S_O8S>(handle, &archLutLogic, width);
672  }
674  VXLIB_lut_perfEst<VXLIB_TABLELOOKUP_DTYPE_I16U_O16U>(handle, &archLutLogic, width);
675  }
676  else {
677  VXLIB_lut_perfEst<VXLIB_TABLELOOKUP_DTYPE_I16S_O16S>(handle, &archLutLogic, width);
678  }
679  }
680  else {
682  VXLIB_generic_perfEst<VXLIB_TABLELOOKUP_DTYPE_I8U_O8U>(handle, &archLutLogic);
683  }
685  VXLIB_generic_perfEst<VXLIB_TABLELOOKUP_DTYPE_I8S_O8S>(handle, &archLutLogic);
686  }
688  VXLIB_generic_perfEst<VXLIB_TABLELOOKUP_DTYPE_I16U_O16U>(handle, &archLutLogic);
689  }
690  else {
691  VXLIB_generic_perfEst<VXLIB_TABLELOOKUP_DTYPE_I16S_O16S>(handle, &archLutLogic);
692  }
693  }
694  *archCycles = archSrcIdx + archLutLogic; // obtained from asm
695  *estCycles = *archCycles + overheadCnt;
696 }
697 /* ======================================================================== */
698 /* End of file: VXLIB_tableLookup_ci.cpp */
699 /* ======================================================================== */
#define SE0_PARAM_OFFSET
template VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_init_ci< VXLIB_TABLELOOKUP_DTYPE_I16S_O16S >(VXLIB_kernelHandle handle)
void VXLIB_idxCheck_perfEst< VXLIB_INT16, VXLIB_INT16 >(VXLIB_kernelHandle handle, size_t *archCycles)
template VXLIB_STATUS VXLIB_tableLookup_set_ci< int16_t >(VXLIB_kernelHandle handle, void *restrict lut)
template void VXLIB_generic_perfEst< VXLIB_INT16, VXLIB_INT16 >(VXLIB_kernelHandle handle, size_t *archCycles)
void VXLIB_idxCheck_perfEst< VXLIB_INT8, VXLIB_INT8 >(VXLIB_kernelHandle handle, size_t *archCycles)
template VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_init_ci< VXLIB_TABLELOOKUP_DTYPE_I8U_O8U >(VXLIB_kernelHandle handle)
template void VXLIB_generic_perfEst< VXLIB_INT8, VXLIB_INT8 >(VXLIB_kernelHandle handle, size_t *archCycles)
template VXLIB_STATUS VXLIB_tableLookup_set_ci< int8_t >(VXLIB_kernelHandle handle, void *restrict lut)
template VXLIB_STATUS VXLIB_tableLookup_exec_ci< uint16_t >(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut, void *restrict lut)
void VXLIB_generic_perfEst(VXLIB_kernelHandle handle, size_t *archCycles)
template VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_init_ci< VXLIB_TABLELOOKUP_DTYPE_I16U_O16U >(VXLIB_kernelHandle handle)
template VXLIB_STATUS VXLIB_tableLookup_exec_ci< int8_t >(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut, void *restrict lut)
VXLIB_STATUS VXLIB_tableLookup_set_ci(VXLIB_kernelHandle handle, void *restrict lut)
This function set the lookup table in L1D SRAM for the C7x implementation of the kernel....
void VXLIB_idxCheck_perfEst< VXLIB_UINT8, VXLIB_UINT8 >(VXLIB_kernelHandle handle, size_t *archCycles)
template VXLIB_STATUS VXLIB_tableLookup_init_ci< VXLIB_TABLELOOKUP_DTYPE_I16U_O16U >(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_bufParams2D_t *bufParamsOut, const VXLIB_bufParams1D_t *bufParamsLut, const VXLIB_tableLookup_InitArgs *pKerInitArgs)
VXLIB_STATUS VXLIB_tableLookup_exec_ci(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut, void *restrict lut)
This function is the main execution function for the C7x implementation of the kernel....
VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_init_ci(VXLIB_kernelHandle handle)
template void VXLIB_generic_perfEst< VXLIB_UINT8, VXLIB_UINT8 >(VXLIB_kernelHandle handle, size_t *archCycles)
VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_exec_ci< uint8_t >(VXLIB_kernelHandle handle, void *restrict pIn)
template VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_init_ci< VXLIB_TABLELOOKUP_DTYPE_I8S_O8S >(VXLIB_kernelHandle handle)
template VXLIB_STATUS VXLIB_tableLookup_set_ci< uint8_t >(VXLIB_kernelHandle handle, void *restrict lut)
template VXLIB_STATUS VXLIB_tableLookup_exec_ci< int16_t >(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut, void *restrict lut)
void VXLIB_lut_perfEst< VXLIB_INT8, VXLIB_INT8 >(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
void VXLIB_lut_perfEst< VXLIB_UINT8, VXLIB_UINT8 >(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
template VXLIB_STATUS VXLIB_tableLookup_init_ci< VXLIB_TABLELOOKUP_DTYPE_I16S_O16S >(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_bufParams2D_t *bufParamsOut, const VXLIB_bufParams1D_t *bufParamsLut, const VXLIB_tableLookup_InitArgs *pKerInitArgs)
void VXLIB_lut_perfEst< VXLIB_UINT16, VXLIB_UINT16 >(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
template VXLIB_STATUS VXLIB_tableLookup_init_ci< VXLIB_TABLELOOKUP_DTYPE_I8U_O8U >(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_bufParams2D_t *bufParamsOut, const VXLIB_bufParams1D_t *bufParamsLut, const VXLIB_tableLookup_InitArgs *pKerInitArgs)
void VXLIB_lut_perfEst(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
template VXLIB_STATUS VXLIB_tableLookup_set_ci< uint16_t >(VXLIB_kernelHandle handle, void *restrict lut)
#define VXLIB_COUNT_LIMIT
VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_exec_ci< int8_t >(VXLIB_kernelHandle handle, void *restrict pIn)
template VXLIB_STATUS VXLIB_tableLookup_init_ci< VXLIB_TABLELOOKUP_DTYPE_I8S_O8S >(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_bufParams2D_t *bufParamsOut, const VXLIB_bufParams1D_t *bufParamsLut, const VXLIB_tableLookup_InitArgs *pKerInitArgs)
VXLIB_STATUS VXLIB_tableLookup_init_ci(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_bufParams2D_t *bufParamsOut, const VXLIB_bufParams1D_t *bufParamsLut, const VXLIB_tableLookup_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
void VXLIB_lut_perfEst< VXLIB_INT16, VXLIB_INT16 >(VXLIB_kernelHandle handle, size_t *archCycles, size_t width)
template void VXLIB_generic_perfEst< VXLIB_UINT16, VXLIB_UINT16 >(VXLIB_kernelHandle handle, size_t *archCycles)
VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_exec_ci(VXLIB_kernelHandle handle, void *restrict pIn)
VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_exec_ci< int16_t >(VXLIB_kernelHandle handle, void *restrict pIn)
void VXLIB_idxCheck_perfEst< VXLIB_UINT16, VXLIB_UINT16 >(VXLIB_kernelHandle handle, size_t *archCycles)
VXLIB_STATUS VXLIB_tableLookup_checkSrcIdx_exec_ci< uint16_t >(VXLIB_kernelHandle handle, void *restrict pIn)
template VXLIB_STATUS VXLIB_tableLookup_exec_ci< uint8_t >(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut, void *restrict lut)
void VXLIB_idxCheck_perfEst(VXLIB_kernelHandle handle, size_t *archCycles)
Header file for kernel's internal use. For the kernel's interface, please see VXLIB_tableLookup.
#define WIDTH_UNROLL_FACTOR
#define VXLIB_TABLELOOKUP_I16U_I16U_O16U
#define VXLIB_TABLELOOKUP_I8S_I8S_O8S
#define VXLIB_TABLELOOKUP_I8U_I8U_O8U
Macros that will be useful to check for datatype combinations.
void * VXLIB_kernelHandle
Handle type for VXLIB operations.
Definition: VXLIB_types.h:247
VXLIB_STATUS_NAME
The enumeration of all status codes.
Definition: VXLIB_types.h:220
@ VXLIB_ERR_INVALID_DIMENSION
Definition: VXLIB_types.h:225
@ VXLIB_SUCCESS
Definition: VXLIB_types.h:221
void VXLIB_tableLookup_perfEst(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_bufParams2D_t *bufParamsOut, const VXLIB_bufParams1D_t *bufParamsLut, const VXLIB_tableLookup_InitArgs *pKerInitArgs, size_t *archCycles, size_t *estCycles)
This function estimates the archCycles and estCycles.
A structure for a 1 dimensional buffer descriptor.
uint32_t data_type
Values are of type VXLIB_data_type_e.
A structure for a 2 dimensional buffer descriptor.
uint32_t data_type
Values are of type VXLIB_data_type_e.
Structure containing the parameters to initialize the kernel.
uint32_t count
Parameter indicating size of lookup table
uint16_t offset
Parameter indicating index of input value = 0 in the lookup table
Structure that is reserved for internal use by the kernel.
size_t numBlocksLut
Number of blocks to be processed for lut logic after simidfication.
VXLIB_tableLookup_InitArgs pKerInitArgs
Initargs of the kernel.
size_t numBlocksIdxCheck
Number of blocks to be processed for index check after simidfication.
size_t strideInElements
Stride of input in elements.
uint8_t bufPblock[VXLIB_TABLELOOKUP_IXX_IXX_OXX_PBLOCK_SIZE]
Array to hold SE/SA params.