34 #include "c7x_scalable.h"
35 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
37 #define ELEMENT_TYPE(x) typename c7x::element_type_of<x>::type
63 template <
typename vecType>
static inline vecType
log_asinh_i(vecType inVec)
69 vecType C1, C2, C3, C4, C5, eMax, outVecMax;
72 zero = (c7x::uint_vec) 0;
74 ln2 = (c7x::double_vec) 0.693147180559945;
75 C1 = (vecType) -0.2302894f;
76 C2 = (vecType) 0.1908169f;
77 C3 = (vecType) -0.2505905f;
78 C4 = (vecType) 0.3333164f;
79 C5 = (vecType) -0.5000002f;
80 eMax = (vecType) 3.402823466e+38f;
81 outVecMax = (vecType) 88.72283905313f;
87 vecType pol, r1, r2, r3, r4, outVec;
88 c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
89 inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
90 TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, pol_0_7, pol_8_15, outVec_8_15, outVec_0_7;
91 c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT, upperBitsIndexT,
100 inVecVals_odd = __high_float_to_double(inVec);
101 inVecVals_even = __low_float_to_double(inVec);
102 inVecVals_oddReciprocal = __recip(inVecVals_odd);
103 inVecVals_evenReciprocal = __recip(inVecVals_even);
107 inVecReciprocal_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(
108 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecVals_oddReciprocal),
109 c7x::as_uchar_vec(inVecVals_evenReciprocal)));
112 inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & 0xFFFE0000u;
115 inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
116 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63),
117 c7x::as_uchar_vec(zero)));
118 inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(
119 __permute_low_low(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63),
120 c7x::as_uchar_vec(zero)));
123 inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(
124 __permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd),
125 c7x::as_uchar_vec(inVecVals_even)));
126 inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(
127 __permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd),
128 c7x::as_uchar_vec(inVecVals_even)));
131 rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
132 rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
135 r1 = c7x::reinterpret<vecType>(
136 __permute_even_even_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
137 c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
143 pol = (C5 * r2) + ((C4 * r3) + ((((C2 * r1) + C3) + (C1 * r2)) * r4));
150 inVecReciprocalApprox_32_63 = c7x::reinterpret<c7x::uint_vec>(
151 __permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
152 c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
154 N = c7x::convert<c7x::int_vec>(((inVecReciprocalApprox_32_63 << 1) >> 21) - 1023);
157 NVals_odd = __high_int_to_double(N);
158 NVals_even = __low_int_to_double(N);
159 NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
160 c7x::as_uchar_vec(NVals_odd),
161 c7x::as_uchar_vec(NVals_even)));
162 NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
163 c7x::as_uchar_vec(NVals_odd),
164 c7x::as_uchar_vec(NVals_even)));
178 TVals_8_15 = c7x::reinterpret<c7x::double_vec>(
179 __permute_high_high(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT),
180 c7x::as_uchar_vec(lowerBitsIndexT)));
181 TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_interweave_0_63,
182 c7x::as_uchar_vec(upperBitsIndexT),
183 c7x::as_uchar_vec(lowerBitsIndexT)));
186 TVals_8_15 = TVals_8_15 - (ln2 * NVals_8_15);
187 TVals_0_7 = TVals_0_7 - (ln2 * NVals_0_7);
194 pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
195 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(__high_float_to_double(pol)),
196 c7x::as_uchar_vec(__low_float_to_double(pol))));
197 pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
198 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(__high_float_to_double(pol)),
199 c7x::as_uchar_vec(__low_float_to_double(pol))));
202 outVec_0_7 = rVals_0_7 + TVals_0_7 + pol_0_7;
203 outVec_8_15 = rVals_8_15 + TVals_8_15 + pol_8_15;
206 outVec = c7x::reinterpret<vecType>(__permute_even_even_int(
207 MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
208 c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
217 __vpred cmp_max = __cmp_lt_pred(eMax, inVec);
218 outVec = __select(cmp_max, outVecMax, outVec);
224 template <
typename vecType>
static inline vecType
sqrt_asinh_i(vecType a, vecType x)
233 half = (vecType) 0.5;
234 OneP5 = (vecType) 1.5;
238 vecType p0, p1, r0, d0, y;
240 p0 = __recip_sqrt(a);
244 p1 = OneP5 - d0 * p0 * half;
250 __vpred cmp_xsqr = __cmp_eq_pred(a, x2);
252 y = __select(cmp_xsqr, x, y);
258 template <
typename T>
262 __SE_TEMPLATE_v1 *se0Params,
263 __SA_TEMPLATE_v1 *sa0Params)
266 size_t numBlocks = 0;
267 size_t remNumBlocks = 0;
270 typedef typename c7x::make_full_vector<T>::type vec;
272 numBlocks = length / c7x::element_count_of<vec>::value;
273 remNumBlocks = length % c7x::element_count_of<vec>::value;
283 vec half = (vec) 0.5;
285 vec x_abs, x2, sqrt_;
287 for (
size_t i = 0; i < numBlocks; i++) {
289 vec inVec_sqrt = c7x::strm_eng<0, vec>::get_adv();
291 x2 = inVec_sqrt * inVec_sqrt;
292 x_abs = __abs(inVec_sqrt);
297 vec outVec_sqrt = (sqrt_ * half) + (x_abs * half);
299 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
300 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
301 __vstore_pred(tmp, addr, outVec_sqrt);
308 template <
typename T>
312 __SE_TEMPLATE_v1 *se0Params,
313 __SA_TEMPLATE_v1 *sa0Params)
316 size_t numBlocks = 0;
317 size_t remNumBlocks = 0;
320 typedef typename c7x::make_full_vector<T>::type vec;
322 numBlocks = length / c7x::element_count_of<vec>::value;
323 remNumBlocks = length % c7x::element_count_of<vec>::value;
331 vec ln2 = (vec) 0.69314718056;
333 for (
size_t i = 0; i < numBlocks; i++) {
335 vec inVec_log = c7x::strm_eng<0, vec>::get_adv();
339 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
340 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
341 __vstore_pred(tmp, addr, outVec_log);
348 template <
typename T>
352 __SE_TEMPLATE_v1 *se0Params,
353 __SA_TEMPLATE_v1 *sa0Params)
356 size_t numBlocks = 0;
357 size_t remNumBlocks = 0;
360 typedef typename c7x::make_full_vector<T>::type vec;
362 numBlocks = length / c7x::element_count_of<vec>::value;
363 remNumBlocks = length % c7x::element_count_of<vec>::value;
375 vec zero = (vec) 0.0;
377 c2 = (vec) -0.166605362341955;
378 c4 = (vec) 0.0734464812833510;
379 c6 = (vec) -0.0330279320352987;
381 pol_bound = (vec) 0.5;
383 for (
size_t i = 0; i < numBlocks; i++) {
384 vec inVec_log = c7x::strm_eng<0, vec>::get_adv();
385 vec inVec = c7x::strm_eng<1, vec>::get_adv();
387 vec sign = (vec) 1.0;
389 __vpred cmp_sign = __cmp_lt_pred(inVec, zero);
390 sign = __select(cmp_sign, -sign, sign);
393 vec x2 = inVec * inVec;
397 vec pol = (x2 * c2) + (x4 * c4) + (x6 * c6);
398 pol = (pol * inVec) + inVec;
401 vec res = inVec_log * sign;
403 vec x_abs = __abs(inVec);
405 __vpred cmp_bound = __cmp_le_pred(x_abs, pol_bound);
406 res = __select(cmp_bound, pol, res);
410 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
411 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
412 __vstore_pred(tmp, addr, outVec);
419 template <
typename T>
static inline void MATHLIB_asinh_vector(
size_t length, T *restrict pSrc, T *restrict pDst)
422 size_t numBlocks = 0;
423 size_t remNumBlocks = 0;
426 typedef typename c7x::make_full_vector<T>::type vec;
428 numBlocks = length / c7x::element_count_of<vec>::value;
429 remNumBlocks = length % c7x::element_count_of<vec>::value;
435 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
436 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
static void MATHLIB_asinh_sqrt(size_t length, T *restrict pSrc, T *restrict pDst, __SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params)
MATHLIB_STATUS MATHLIB_asinh(size_t length, T *restrict pSrc, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_asinh< float >(size_t length, float *pSrc, float *pDst)
static vecType sqrt_asinh_i(vecType a, vecType x)
static void MATHLIB_asinh_polyEst(size_t length, T *restrict pSrc, T *restrict pDst, __SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params)
static vecType log_asinh_i(vecType inVec)
static void MATHLIB_asinh_vector(size_t length, T *restrict pSrc, T *restrict pDst)
static void MATHLIB_asinh_log(size_t length, T *restrict pSrc, T *restrict pDst, __SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params)
MATHLIB_STATUS MATHLIB_asinh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_asinh. Function accepts float pointers.
#define MATHLIB_LOGTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.