43 #include "../common/c71/DSPLIB_inlines.h"
47 #include "c7x_scalable.h"
56 #define SE_PARAM_BASE (0x0000)
57 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
58 #define SE_SA0_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_SIZE)
61 template <
typename dataType>
69 uint32_t blockSize = pKerPrivArgs->
blockSize;
71 __SE_TEMPLATE_v1 se0Params;
72 __SA_TEMPLATE_v1 sa0Params;
74 __SE_ELETYPE SE_ELETYPE;
75 __SE_VECLEN SE_VECLEN;
76 __SA_VECLEN SA_VECLEN;
78 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
80 typedef typename c7x::make_full_vector<dataType>::type vec;
81 SE_VECLEN = c7x::se_veclen<vec>::value;
82 SA_VECLEN = c7x::sa_veclen<vec>::value;
83 SE_ELETYPE = c7x::se_eletype<vec>::value;
86 int32_t eleCount = c7x::element_count_of<vec>::value;
87 printf(
"Enter eleCount %d\n", eleCount);
93 se0Params = __gen_SE_TEMPLATE_v1();
95 se0Params.ICNT0 = blockSize;
96 se0Params.ELETYPE = SE_ELETYPE;
97 se0Params.VECLEN = SE_VECLEN;
98 se0Params.DIMFMT = __SE_DIMFMT_1D;
103 sa0Params = __gen_SA_TEMPLATE_v1();
105 sa0Params.ICNT0 = blockSize;
106 sa0Params.DIM1 = blockSize;
107 sa0Params.VECLEN = SA_VECLEN;
108 sa0Params.DIMFMT = __SA_DIMFMT_1D;
136 uint32_t blockSize = pKerPrivArgs->
blockSize;
138 __SE_TEMPLATE_v1 se0Params;
139 __SA_TEMPLATE_v1 sa0Params;
141 __SE_ELETYPE SE_ELETYPE;
142 __SE_VECLEN SE_VECLEN;
143 __SA_VECLEN SA_VECLEN;
144 __SE_PROMOTE SE_PROMOTE;
146 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
148 typedef typename c7x::make_full_vector<int16_t>::type vec;
149 SE_VECLEN = c7x::se_veclen<c7x::int_vec>::value;
150 SA_VECLEN = c7x::sa_veclen<vec>::value;
151 SE_ELETYPE = c7x::se_eletype<vec>::value;
152 SE_PROMOTE = __SE_PROMOTE_2X_SIGNEXT;
154 #if DSPLIB_DEBUGPRINT
155 int32_t eleCount = c7x::element_count_of<c7x::int_vec>::value;
156 printf(
"Enter eleCount %d\n", eleCount);
160 se0Params = __gen_SE_TEMPLATE_v1();
162 se0Params.ICNT0 = blockSize;
163 se0Params.ELETYPE = SE_ELETYPE;
164 se0Params.VECLEN = SE_VECLEN;
165 se0Params.DIMFMT = __SE_DIMFMT_1D;
166 se0Params.PROMOTE = SE_PROMOTE;
169 sa0Params = __gen_SA_TEMPLATE_v1();
171 sa0Params.ICNT0 = blockSize;
172 sa0Params.DIM1 = blockSize;
175 sa0Params.ICNT0 = 2 * blockSize;
176 sa0Params.DIM1 = 2 * blockSize;
179 sa0Params.VECLEN = SA_VECLEN;
180 sa0Params.DIMFMT = __SA_DIMFMT_1D;
189 template <
typename dataType,
int32_t dataIn>
193 uint32_t blockSize = pKerPrivArgs->
blockSize;
195 __SE_TEMPLATE_v1 se0Params;
196 __SA_TEMPLATE_v1 sa0Params;
198 dataType *restrict pInLocal = (dataType *) pIn;
199 dataType *restrict pOutLocal = (dataType *) pOut;
201 #if DSPLIB_DEBUGPRINT
202 printf(
"Enter DSPLIB_recip_exec_ci\n");
205 typedef typename c7x::make_full_vector<dataType>::type vec;
206 int32_t eleCount = c7x::element_count_of<vec>::value;
208 #if DSPLIB_DEBUGPRINT
209 printf(
"Enter eleCount %d\n", eleCount);
212 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
217 __SE0_OPEN(pInLocal, se0Params);
220 __SA0_OPEN(sa0Params);
222 #if DSPLIB_DEBUGPRINT
223 printf(
"DSPLIB_DEBUGPRINT blockSize %d\n", blockSize);
231 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
232 vec a = c7x::strm_eng<0, vec>::get_adv();
243 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
244 vec *VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
246 __vstore_pred(tmp, VB1, out);
270 uint32_t blockSize = pKerPrivArgs->
blockSize;
273 __SE_TEMPLATE_v1 se0Params;
274 __SA_TEMPLATE_v1 sa0Params;
276 int16_t *restrict pInLocal = (int16_t *) pIn;
277 int16_t *restrict pOutLocal = (int16_t *) pOut;
279 #if DSPLIB_DEBUGPRINT
280 printf(
"Enter DSPLIB_recip_exec_ci\n");
283 typedef typename c7x::make_full_vector<int16_t>::type vec;
284 int32_t eleCount = c7x::element_count_of<c7x::int_vec>::value;
286 #if DSPLIB_DEBUGPRINT
287 printf(
"Enter eleCount %d\n", eleCount);
290 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
295 __SE0_OPEN(pInLocal, se0Params);
298 __SA0_OPEN(sa0Params);
300 c7x::int_vec zero = (c7x::int_vec) 0;
301 c7x::int_vec one = (c7x::int_vec) 1;
302 c7x::int_vec fifteen = (c7x::int_vec) 15;
309 __SA1_OPEN(sa0Params);
312 for (int32_t counter = 0; counter < blockSize; counter += 2 * eleCount) {
315 c7x::int_vec a_hi = c7x::strm_eng<0, c7x::int_vec>::get_adv();
316 c7x::int_vec a_lo = c7x::strm_eng<0, c7x::int_vec>::get_adv();
318 __vpred cmp_sign_hi = __cmp_gt_pred(zero, a_hi);
319 __vpred cmp_sign_lo = __cmp_gt_pred(zero, a_lo);
322 c7x::int_vec neg_hi = __select(cmp_sign_hi, one, zero);
323 c7x::int_vec neg_lo = __select(cmp_sign_lo, one, zero);
325 a_hi = __select(cmp_sign_hi, -a_hi, a_hi);
326 a_lo = __select(cmp_sign_lo, -a_lo, a_lo);
329 c7x::int_vec norm_hi = __norm(a_hi);
330 c7x::int_vec norm_lo = __norm(a_lo);
332 a_hi = a_hi << norm_hi;
333 a_lo = a_lo << norm_lo;
336 c7x::int_vec exp_hi = norm_hi - fifteen;
337 c7x::int_vec exp_lo = norm_lo - fifteen;
352 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
353 vec *VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
357 __vstore_pred_packl_2src(tmp, VB1, exp_hi, exp_lo);
359 c7x::uint_vec ub_hi = (c7x::uint_vec) 0x80000000U;
360 c7x::uint_vec ub_lo = (c7x::uint_vec) 0x80000000U;
362 c7x::uint_vec ua_hi = c7x::reinterpret<c7x::uint_vec>(a_hi);
363 c7x::uint_vec ua_lo = c7x::reinterpret<c7x::uint_vec>(a_lo);
368 for (int32_t i = 0; i < 15; i++) {
370 ub_hi = __sub_cond(ub_hi, ua_hi);
371 ub_lo = __sub_cond(ub_lo, ua_lo);
374 c7x::uint_vec divMask = (c7x::uint_vec) 0x7FFF;
376 ub_hi = ub_hi & divMask;
377 ub_lo = ub_lo & divMask;
379 c7x::int_vec b_hi = c7x::reinterpret<c7x::int_vec>(ub_hi);
380 c7x::int_vec b_lo = c7x::reinterpret<c7x::int_vec>(ub_lo);
382 __vpred cmp_neg_hi = __cmp_eq_pred(neg_hi, one);
383 __vpred cmp_neg_lo = __cmp_eq_pred(neg_lo, one);
385 b_hi = __select(cmp_neg_hi, -b_hi, b_hi);
386 b_lo = __select(cmp_neg_lo, -b_lo, b_lo);
396 tmp = c7x::strm_agen<1, vec>::get_vpred();
397 VB1 = c7x::strm_agen<1, vec>::get_adv(pOutLocal + blockSize);
400 __vstore_pred_packl_2src(tmp, VB1, b_hi, b_lo);
410 for (int32_t counter = 0; counter < blockSize; counter += eleCount) {
413 c7x::int_vec a = c7x::strm_eng<0, c7x::int_vec>::get_adv();
415 __vpred cmp_sign = __cmp_gt_pred(zero, a);
418 c7x::int_vec neg = __select(cmp_sign, one, zero);
420 a = __select(cmp_sign, -a, a);
423 c7x::int_vec norm = __norm(a);
428 c7x::int_vec expVec = norm - fifteen;
432 c7x::uint_vec ub = (c7x::uint_vec) 0x80000000U;
433 c7x::uint_vec ua = c7x::reinterpret<c7x::uint_vec>(a);
437 for (int32_t i = 0; i < 15; i++) {
439 ub = __sub_cond(ub, ua);
442 c7x::uint_vec divMask = (c7x::uint_vec) 0x7FFF;
446 c7x::int_vec b = c7x::reinterpret<c7x::int_vec>(ub);
448 __vpred cmp_neg = __cmp_eq_pred(neg, one);
450 b = __select(cmp_neg, -b, b);
460 vec exp_short = c7x::reinterpret<vec>(expVec);
461 vec frac_short = c7x::reinterpret<vec>(b);
463 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
464 vec *VB1 = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
467 __vstore_pred_interleave(tmp, VB1, exp_short, frac_short);
template DSPLIB_STATUS DSPLIB_recip_exec_ci< double, DSPLIB_FLOAT64 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_recip_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_recip_InitArgs *pKerInitArgs)
#define SE_SE0_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_recip_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_recip_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_recip_exec_ci< float, DSPLIB_FLOAT32 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_recip_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
This function is the main execution function for the C7x implementation of the kernel....
template DSPLIB_STATUS DSPLIB_recip_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_recip_InitArgs *pKerInitArgs)
template DSPLIB_STATUS DSPLIB_recip_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams1D_t *bufParamsIn, const DSPLIB_bufParams1D_t *bufParamsOut, const DSPLIB_recip_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_recip_exec_ci< int16_t, DSPLIB_INT16 >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define SE_SA0_PARAM_OFFSET
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_recip.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
@ DSPLIB_ERR_NOT_IMPLEMENTED
DSPLIB_recip_storeStyle
enumeration to determine store style of the fixed point
@ DSPLIB_HSTACK_ST
input is fixed point, store pattern is horizontal stack.
@ DSPLIB_INTERLEAVE_ST
input is fixed point, store pattern is interleaved.
A structure for a 1 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
DSPLIB_recip_storeStyle storeStyle
Fixed Point Output Store Style
Structure that is reserved for internal use by the kernel.
int32_t blockSize
Size of input buffer for different batches DSPLIB_recip_init that will be retrieved and used by DSPLI...
uint8_t bufPblock[DSPLIB_RECIP_IXX_IXX_OXX_PBLOCK_SIZE]
DSPLIB_recip_InitArgs initArgs