47 #define SE_PARAM_BASE (0x0000)
48 #define SE0_PARAM_OFFSET (SE_PARAM_BASE)
49 #define SE1_PARAM_OFFSET (SE0_PARAM_OFFSET + VXLIB_SE_PARAM_SIZE)
50 #define SA0_PARAM_OFFSET (SE1_PARAM_OFFSET + VXLIB_SE_PARAM_SIZE)
64 for (int32_t j = 0; j < numBins; j += 2) {
66 vSeed = (ulong8) (0x0000000000000000);
69 __lookup_init(__LUT_SET0, __as_ushort32(vSeed), row_offset);
78 template <u
int32_t dTypeIn, u
int32_t dTypeOut>
90 size_t width = pKerPrivArgs->
width;
91 size_t height = pKerPrivArgs->
height;
92 size_t elemCount = VXLIB_max_simd<dTypeOut>::value;
93 size_t wBlocks = (width + (elemCount - 1)) / (elemCount);
94 size_t numBlocks = height * wBlocks;
95 uint16_t numBins = pKerInitArgs->
numBins;
100 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
101 __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
102 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
104 __SE_ELETYPE SE_ELETYPE;
105 __SE_VECLEN SE_VECLEN;
106 __SA_VECLEN SA_VECLEN;
108 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
110 SE_ELETYPE = c7x::se_eletype<c7x::uchar_vec>::value;
111 SE_VECLEN = c7x::se_veclen<c7x::int_vec>::value;
112 SA_VECLEN = c7x::sa_veclen<c7x::int_vec>::value;
114 if (width == strideInElements) {
115 se0Params.ELETYPE = SE_ELETYPE;
116 se0Params.VECLEN = SE_VECLEN;
117 se0Params.DIMFMT = __SE_DIMFMT_1D;
118 se0Params.PROMOTE = __SE_PROMOTE_4X_ZEROEXT;
119 se0Params.ICNT0 = width * height;
123 se0Params.ELETYPE = SE_ELETYPE;
124 se0Params.VECLEN = SE_VECLEN;
125 se0Params.DIMFMT = __SE_DIMFMT_2D;
126 se0Params.PROMOTE = __SE_PROMOTE_4X_ZEROEXT;
128 se0Params.ICNT0 = width;
129 se0Params.ICNT1 = height;
130 se0Params.DIM1 = strideInElements;
133 sa0Params.VECLEN = SA_VECLEN;
134 sa0Params.DIMFMT = __SA_DIMFMT_1D;
135 sa0Params.ICNT0 = numBins;
148 __sLTCRFlags_t set0flags;
151 set0flags.INTERPOLATION = __LUT_INTERP_OFF;
152 set0flags.SATURATION = __LUT_SAT_ON;
153 set0flags.SIGNUNSIGN = __LUT_UNSIGNED;
154 set0flags.ELETYPE = __LUT_ELETYPE_32BIT;
155 set0flags.NUMTBLS = __LUT_NUM_TBL_16;
156 set0flags.TBLSIZE = __LUT_TBL_SIZE_16KBYTES;
157 set0flags.WEIGHTSIZE = __LUT_WSIZE_8BIT;
158 set0flags.PROMOTION = __LUT_PROMOTE_OFF;
161 __LUT_SET_LTCR(__LUT_SET0, __lut_set_param(&set0flags));
164 __LUT_SET_LTBR(__LUT_SET0, 0x0000);
167 __LUT_SET_LTER(__LUT_ENABLE_0);
173 if (width != strideInElements) {
175 int32_t blockCounter = 0;
181 uint64_t predictedValue = 0x0;
183 while (blockCounter < wBlocks) {
184 for (i = 0; i < elemCount; i++) {
186 predictedValue |= (uint64_t) ((uint64_t) 0xF << (i * 4));
193 *predRegister = predictedValue;
197 predictedValue = 0x0;
200 SE_ELETYPE = c7x::se_eletype<c7x::ulong_vec>::value;
201 SE_VECLEN = c7x::se_veclen<c7x::ulong_vec>::value;
203 se1Params.ELETYPE = __SE_ELETYPE_64BIT;
204 se1Params.VECLEN = __SE_VECLEN_1ELEM;
205 se1Params.DIMFMT = __SE_DIMFMT_3D;
207 se1Params.ICNT1 = wBlocks;
208 se1Params.ICNT2 = height;
221 uint64_t predictedValue = 0x0;
222 size_t widthLastBlock = (width * height) % elemCount;
224 for (i = 0; i < elemCount; i++) {
225 if (j < widthLastBlock) {
226 predictedValue |= (uint64_t) ((uint64_t) 0xF << (i * 4));
233 *predRegister = predictedValue;
235 se1Params.ELETYPE = __SE_ELETYPE_64BIT;
236 se1Params.VECLEN = __SE_VECLEN_1ELEM;
237 se1Params.DIMFMT = __SE_DIMFMT_1D;
265 template <
typename dTypeIn,
typename dTypeOut>
272 __SE_TEMPLATE_v1 se0Params;
273 __SE_TEMPLATE_v1 se1Params;
274 __SA_TEMPLATE_v1 sa0Params;
277 dTypeIn *restrict pInLocal = (dTypeIn *) pIn;
278 dTypeOut *restrict pOutLocal = (dTypeOut *) pOut;
282 printf(
"Enter VXLIB_histogram_exec_ci\n");
286 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
294 __SE0_OPEN(pInLocal, se0Params);
295 __SE1_OPEN(pPredicate, se1Params);
298 __SA0_OPEN(sa0Params);
300 size_t numBlocks = pKerPrivArgs->
numBlocks;
309 typedef typename c7x::make_full_vector<dTypeOut>::type vec;
311 uint32_t maxSIMD = c7x::max_simd<dTypeOut>::value;
320 if ((pKerPrivArgs->
width * pKerPrivArgs->
height) % maxSIMD) {
321 vec inVec, inHist, histIndex;
322 __vpred cmp_lower, cmp_upper, condHist;
323 size_t linearBlocks = (pKerPrivArgs->
width * pKerPrivArgs->
height) / maxSIMD;
324 for (uint32_t counter = 0; counter < linearBlocks; counter++) {
325 inVec = c7x::strm_eng<0, vec>::get_adv();
327 cmp_lower = __cmp_le_pred(lowerBound, inVec);
328 cmp_upper = __cmp_lt_pred(inVec, upperBound);
329 condHist = __and(cmp_lower, cmp_upper);
331 histIndex = ((inVec - lowerBound) * numBinsVec) / rangeVec;
333 inHist = __select(condHist, histIndex, numBinsVec);
335 __hist(__LUT_SET0, inHist);
338 inVec = c7x::strm_eng<0, vec>::get_adv();
339 __vpred inPred = _mvrp(c7x::strm_eng<1, ulong>::get_adv());
341 cmp_lower = __cmp_le_pred(lowerBound, inVec);
342 cmp_upper = __cmp_lt_pred(inVec, upperBound);
343 condHist = __and(cmp_lower, cmp_upper);
344 condHist = __and(condHist, inPred);
346 histIndex = ((inVec - lowerBound) * numBinsVec) / rangeVec;
348 inHist = __select(condHist, histIndex, numBinsVec);
350 __hist(__LUT_SET0, inHist);
354 size_t linearBlocks = (pKerPrivArgs->
width * pKerPrivArgs->
height) / maxSIMD;
355 for (uint32_t counter = 0; counter < linearBlocks; counter++) {
356 vec inVec = c7x::strm_eng<0, vec>::get_adv();
357 __vpred cmp_lower = __cmp_le_pred(lowerBound, inVec);
358 __vpred cmp_upper = __cmp_lt_pred(inVec, upperBound);
359 __vpred condHist = __and(cmp_lower, cmp_upper);
361 vec histIndex = ((inVec - lowerBound) * numBinsVec) / rangeVec;
363 vec inHist = __select(condHist, histIndex, numBinsVec);
365 __hist(__LUT_SET0, inHist);
371 for (uint32_t counter = 0; counter < numBlocks; counter++) {
372 vec inVec = c7x::strm_eng<0, vec>::get_adv();
373 __vpred inPred = _mvrp(c7x::strm_eng<1, ulong>::get_adv());
375 __vpred cmp_lower = __cmp_le_pred(lowerBound, inVec);
376 __vpred cmp_upper = __cmp_lt_pred(inVec, upperBound);
377 __vpred condHist = __and(cmp_lower, cmp_upper);
378 condHist = __and(condHist, inPred);
380 vec histIndex = ((inVec - lowerBound) * numBinsVec) / rangeVec;
385 vec inHist = __select(condHist, histIndex, numBinsVec);
387 __hist(__LUT_SET0, inHist);
398 uint32_t saStoreIter = (numBins / maxSIMD);
399 if (numBins % maxSIMD) {
403 uint16_t binCount = 0;
405 for (
int i = 0; i < saStoreIter; i++) {
407 for (
int j = 0; j < maxSIMD; j++) {
408 c7x::uint_vec vHist = __lookup_read_uint(__LUT_SET0, (c7x::uint_vec)(binCount));
410 bin.s[j] = __horizontal_add(vHist);
415 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
416 vec *addr = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
417 __vstore_pred(tmp, addr, bin);
437 void *restrict pOut);
446 size_t numBlocks = pKerPrivArgs->
numBlocks;
447 size_t overheadCnt = 17;
448 *archCycles = 7 + numBlocks * 2;
449 *estCycles = overheadCnt + *archCycles;
template VXLIB_STATUS VXLIB_histogram_init_ci< VXLIB_HISTOGRAM_DTYPE_I8U_O32U >(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn0, const VXLIB_bufParams1D_t *bufParamsOut, const VXLIB_histogram_InitArgs *pKerInitArgs)
template VXLIB_STATUS VXLIB_histogram_exec_ci< VXLIB_HISTOGRAM_TYPENAME_I8U_O32U >(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
VXLIB_STATUS VXLIB_histogram_init_ci(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_bufParams1D_t *bufParamsOut, const VXLIB_histogram_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
VXLIB_STATUS VXLIB_histogram_exec_ci(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
Header file for kernel's internal use. For the kernel's interface, please see VXLIB_histogram.
void * VXLIB_kernelHandle
Handle type for VXLIB operations.
#define VXLIB_DEBUGPRINT
Enable debug printf statements.
VXLIB_STATUS_NAME
The enumeration of all status codes.
void VXLIB_histogram_val_init(uint16_t numBins)
This is a helper function to help clear the values stored in the LUT histogram.
void VXLIB_histogram_perfEst(VXLIB_kernelHandle handle, size_t *archCycles, size_t *estCycles)
A structure for a 1 dimensional buffer descriptor.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
uint16_t numBins
Parameter indicating distribution number of bins (<= 256)
uint8_t offset
Parameter indicating distribution offset.
uint16_t range
Parameter indicating distribution range (<= 256)
uint8_t lastBlock
Flag that indicates if the function call is the final call for the image (0: intermediate call,...
Structure that is reserved for internal use by the kernel.
VXLIB_histogram_InitArgs pKerInitArgs
Initargs of the kernel.
uint8_t bufPredicateStore[PRIVATE_BUFSIZE_PREDICATESTORE]
size_t numBlocks
Number of blocks to be processed after simidfication.
size_t height
Height of image
uint8_t bufPblock[VXLIB_HISTOGRAM_IXX_IXX_OXX_PBLOCK_SIZE]
Array to hold SE/SA params.
size_t strideInElements
Stride of input0 in elements.
size_t width
Width of image