53 #define SE_PARAM_BASE (0x0000)
54 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
55 #define SE_SE1_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_SIZE)
56 #define LAST_VEC_AND_MASK_OFFSET (SE_PARAM_BASE)
61 template <
typename dataType>
71 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
72 uint32_t blockSize = pKerPrivArgs->
blockSize;
74 typedef typename c7x::make_full_vector<dataType>::type Vec;
75 uint32_t eleCount = (int32_t) c7x::element_count_of<Vec>::value;
76 uint32_t numVecs = (blockSize + eleCount - 1) / eleCount;
82 uint32_t remElements = numVecs * eleCount - blockSize;
83 for (int32_t i = 0; i < (int32_t) eleCount; i++) {
84 pAndMask[i] = (dataType) 0;
86 for (int32_t i = 0; i < (int32_t) (eleCount - remElements); i++) {
87 pAndMask[i] = (dataType) 0xFFFFFFFFFFFFFFFFU;
95 __SE_TEMPLATE_v1 se0Params;
96 __SE_TEMPLATE_v1 se1Params;
97 __SE_ELETYPE SE_ELETYPE;
98 __SE_VECLEN SE_VECLEN;
100 SE_VECLEN = c7x::se_veclen<Vec>::value;
101 SE_ELETYPE = c7x::se_eletype<Vec>::value;
103 se0Params = __gen_SE_TEMPLATE_v1();
105 se0Params.ICNT0 = (numVecs - pKerPrivArgs->
mainLoopCount) * eleCount;
106 se0Params.ELETYPE = SE_ELETYPE;
107 se0Params.VECLEN = SE_VECLEN;
108 se0Params.DIMFMT = __SE_DIMFMT_1D;
110 se1Params = __gen_SE_TEMPLATE_v1();
112 se1Params.ICNT0 = blockSize - se0Params.ICNT0;
113 se1Params.ELETYPE = SE_ELETYPE;
114 se1Params.VECLEN = SE_VECLEN;
115 se1Params.DIMFMT = __SE_DIMFMT_1D;
169 template <
typename Un
signedDataType>
172 uint8_t *restrict pBlock,
173 int32_t mainLoopCount)
177 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
179 typedef typename c7x::make_full_vector<UnsignedDataType>::type Vec;
182 Vec *pInLocal = (Vec *) pIn;
183 for (int32_t counter = 0; counter < (mainLoopCount - 1); counter++) {
187 Vec lastVec = *pInLocal;
189 lastVec &= lastVecAndMask;
192 UnsignedDataType mask_final = (UnsignedDataType) c7x_horizontal_max(mask);
197 int num_bits =
sizeof(UnsignedDataType) * 8;
199 using SignedDataType = std::make_signed_t<UnsignedDataType>;
200 shift = __norm((SignedDataType) mask_final);
201 out_val = (uint32_t) (shift + 1);
202 if (((uint64_t) 1 << (uint64_t)(num_bits - 1)) & mask_final) {
205 *pOutLocal = out_val;
210 template <
typename SignedDataType>
213 uint8_t *restrict pBlock,
214 int32_t mainLoopCount)
218 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
220 typedef typename c7x::make_full_vector<SignedDataType>::type Vec;
223 Vec *pInLocal = (Vec *) pIn;
225 for (int32_t counter = 0; counter < (mainLoopCount - 1); counter++) {
229 Vec lastVec = *pInLocal;
231 mask |= __abs(lastVec & lastVecAndMask);
233 using UnsignedDataType = std::make_unsigned_t<SignedDataType>;
234 typedef typename c7x::make_full_vector<UnsignedDataType>::type UVec;
235 SignedDataType mask_final = (SignedDataType) c7x_horizontal_max(c7x::reinterpret<UVec>(mask));
236 out_val = (uint32_t) __norm(mask_final);
237 *pOutLocal = out_val;
246 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
248 typedef typename c7x::make_full_vector<UnsignedDataType>::type Vec;
252 #pragma MUST_ITERATE(4, , 1)
253 for (uint32_t counter = 0; counter < mainLoopCount; counter++) {
254 Vec b0 = c7x::strm_eng<0, Vec>::get_adv();
255 Vec b1 = c7x::strm_eng<1, Vec>::get_adv();
261 UnsignedDataType mask_final = (UnsignedDataType) c7x_horizontal_max(mask0);
266 int num_bits =
sizeof(UnsignedDataType) * 8;
268 using SignedDataType = std::make_signed_t<UnsignedDataType>;
269 shift = __norm((SignedDataType) mask_final);
270 out_val = (uint32_t) (shift + 1);
271 if (((uint64_t) 1 << (uint64_t)(num_bits - 1)) & mask_final) {
274 *pOutLocal = out_val;
280 template <
typename SignedDataType>
inline void bexp_exec_ci_signed(
void *restrict pOut, uint32_t mainLoopCount)
284 uint32_t *restrict pOutLocal = (uint32_t *) pOut;
286 typedef typename c7x::make_full_vector<SignedDataType>::type Vec;
290 #pragma MUST_ITERATE(4, , 1)
291 for (uint32_t counter = 0; counter < mainLoopCount; counter++) {
292 Vec b0 = c7x::strm_eng<0, Vec>::get_adv();
293 Vec b1 = c7x::strm_eng<1, Vec>::get_adv();
298 using UnsignedDataType = std::make_unsigned_t<SignedDataType>;
299 typedef typename c7x::make_full_vector<UnsignedDataType>::type UVec;
300 SignedDataType mask_final = (SignedDataType) c7x_horizontal_max(c7x::reinterpret<UVec>(mask0));
301 out_val = (uint32_t) __norm(mask_final);
302 *pOutLocal = out_val;
320 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
323 __SE_TEMPLATE_v1 se0Params;
324 __SE_TEMPLATE_v1 se1Params;
327 __SE0_OPEN((int8_t *) pIn, se0Params);
330 bexp_exec_ci_signed<int8_t>(pOut, mainLoopCount);
336 bexp_exec_ci_signed_small_inputs<int8_t>(pIn, pOut, pBlock, mainLoopCount);
350 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
353 __SE_TEMPLATE_v1 se0Params;
354 __SE_TEMPLATE_v1 se1Params;
357 __SE0_OPEN((uint8_t *) pIn, se0Params);
360 bexp_exec_ci_unsigned<uint8_t>(pOut, mainLoopCount);
366 bexp_exec_ci_unsigned_small_inputs<uint8_t>(pIn, pOut, pBlock, mainLoopCount);
380 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
383 __SE_TEMPLATE_v1 se0Params;
384 __SE_TEMPLATE_v1 se1Params;
387 __SE0_OPEN((int16_t *) pIn, se0Params);
390 bexp_exec_ci_signed<int16_t>(pOut, mainLoopCount);
396 bexp_exec_ci_signed_small_inputs<int16_t>(pIn, pOut, pBlock, mainLoopCount);
410 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
413 __SE_TEMPLATE_v1 se0Params;
414 __SE_TEMPLATE_v1 se1Params;
417 __SE0_OPEN((uint16_t *) pIn, se0Params);
420 bexp_exec_ci_unsigned<uint16_t>(pOut, mainLoopCount);
426 bexp_exec_ci_unsigned_small_inputs<uint16_t>(pIn, pOut, pBlock, mainLoopCount);
440 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
443 __SE_TEMPLATE_v1 se0Params;
444 __SE_TEMPLATE_v1 se1Params;
447 __SE0_OPEN((int32_t *) pIn, se0Params);
450 bexp_exec_ci_signed<int32_t>(pOut, mainLoopCount);
456 bexp_exec_ci_signed_small_inputs<int32_t>(pIn, pOut, pBlock, mainLoopCount);
470 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
473 __SE_TEMPLATE_v1 se0Params;
474 __SE_TEMPLATE_v1 se1Params;
477 __SE0_OPEN((uint32_t *) pIn, se0Params);
480 bexp_exec_ci_unsigned<uint32_t>(pOut, mainLoopCount);
486 bexp_exec_ci_unsigned_small_inputs<uint32_t>(pIn, pOut, pBlock, mainLoopCount);
500 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
503 __SE_TEMPLATE_v1 se0Params;
504 __SE_TEMPLATE_v1 se1Params;
507 __SE0_OPEN((int32_t *) pIn, se0Params);
510 bexp_exec_ci_signed<int64_t>(pOut, mainLoopCount);
516 bexp_exec_ci_signed_small_inputs<int64_t>(pIn, pOut, pBlock, mainLoopCount);
530 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
533 __SE_TEMPLATE_v1 se0Params;
534 __SE_TEMPLATE_v1 se1Params;
537 __SE0_OPEN((uint32_t *) pIn, se0Params);
540 bexp_exec_ci_unsigned<uint64_t>(pOut, mainLoopCount);
546 bexp_exec_ci_unsigned_small_inputs<uint64_t>(pIn, pOut, pBlock, mainLoopCount);
template DSPLIB_STATUS DSPLIB_bexp_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_bexp_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_bexp_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
void bexp_exec_ci_unsigned(void *restrict pOut, uint32_t mainLoopCount)
void bexp_exec_ci_signed_small_inputs(void *restrict pIn, void *restrict pOut, uint8_t *restrict pBlock, int32_t mainLoopCount)
#define SE_SE0_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_bexp_exec_ci< uint64_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_bexp_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_bexp_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_bexp_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_bexp_init_ci< uint64_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_bexp_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define SE_SE1_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_bexp_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
void bexp_exec_ci_signed(void *restrict pOut, uint32_t mainLoopCount)
template DSPLIB_STATUS DSPLIB_bexp_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_bexp_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
void bexp_exec_ci_unsigned_small_inputs(void *restrict pIn, void *restrict pOut, uint8_t *restrict pBlock, int32_t mainLoopCount)
DSPLIB_STATUS DSPLIB_bexp_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_bexp_exec_ci< int64_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_bexp_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
template DSPLIB_STATUS DSPLIB_bexp_init_ci< int64_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_bexp_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_bexp_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define LAST_VEC_AND_MASK_OFFSET
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_bexp.
#define DSPLIB_BEXP_IXX_IXX_OXX_MIN_VECS_TO_ENABLE_SE_READS
Macro to define the minimum number of vector lengths that the input data should occupy before SE is u...
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint32_t se1StartAddressByteOffset
The offset in bytes of SE1 start address with respect to start address of the input data....
bool isSEReadEnabled
Flag indicating if SE reads are enabled, set by DSPLIB_bexp_init that will be retrieved and used by D...
uint32_t blockSize
Size of input buffer for different batches DSPLIB_bexp_init that will be retrieved and used by DSPLIB...
uint32_t mainLoopCount
Number of times to loop over data with isSEReadEnabled == true will be ceil_division(number of native...
uint8_t bufPblock[DSPLIB_BEXP_IXX_IXX_OXX_PBLOCK_SIZE]
A structure for a 1 dimensional buffer descriptor.