34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
55 template <
typename vecType>
static inline vecType
sqrt_acosh_i(vecType a, vecType x)
57 vecType half, OneP5, zero;
62 OneP5 = (vecType) 1.5;
64 vecType p0, p1, r0, d0, y;
70 p1 = OneP5 - d0 * p0 * half;
76 __vpred cmp_xsqr = __cmp_eq_pred(a, x2);
77 y = __select(cmp_xsqr, x, y);
80 __vpred cmp_zero = __cmp_eq_pred(a, zero);
81 y = __select(cmp_zero, zero, y);
88 MATHLIB_acosh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params,
size_t length, T *pSrc0, T *pDst)
93 size_t remNumBlocks = 0;
96 typedef typename c7x::make_full_vector<T>::type vec;
99 numBlocks = length / c7x::element_count_of<vec>::value;
100 remNumBlocks = length % c7x::element_count_of<vec>::value;
115 vec C1, C2, C3, C4, C5, eMax, outVecMax;
118 zero = (c7x::uint_vec) 0;
120 ln2 = (c7x::double_vec) 0.693147180559945;
121 C1 = (vec) -0.2302894f;
122 C2 = (vec) 0.1908169f;
123 C3 = (vec) -0.2505905f;
124 C4 = (vec) 0.3333164f;
125 C5 = (vec) -0.5000002f;
126 eMax = (vec) 3.402823466e+38f;
127 outVecMax = (vec) 88.72283905313f;
132 nan = (vec) 0x7FFFFFFFu;
133 ln2sp = (vec) 0.69314718056f;
136 for (
size_t i = 0; i < numBlocks; i++) {
137 vec inVec = c7x::strm_eng<0, vec>::get_adv();
143 vec pol, r1, r2, r3, r4, outVec;
144 c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
145 inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
146 TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, pol_0_7, pol_8_15, outVec_8_15,
148 c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT,
149 upperBitsIndexT, lowerBitsIndexT;
157 inVecVals_odd = __high_float_to_double(inVec);
158 inVecVals_even = __low_float_to_double(inVec);
159 inVecVals_oddReciprocal = __recip(inVecVals_odd);
160 inVecVals_evenReciprocal = __recip(inVecVals_even);
164 inVecReciprocal_32_63 = c7x::as_uint_vec(__permute_odd_odd_int(MATHLIB_vperm_data_interweave_0_63,
165 c7x::as_uchar_vec(inVecVals_oddReciprocal),
166 c7x::as_uchar_vec(inVecVals_evenReciprocal)));
169 inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & 0xFFFE0000U;
172 inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
173 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
174 inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
175 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
178 inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
179 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
180 inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
181 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
184 rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
185 rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
188 r1 = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
189 c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
190 c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
196 pol = (C5 * r2) + ((C4 * r3) + ((((C2 * r1) + C3) + (C1 * r2)) * r4));
203 inVecReciprocalApprox_32_63 =
204 c7x::as_uint_vec(__permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
205 c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
207 N = c7x::convert<c7x::int_vec>(((inVecReciprocalApprox_32_63 << 1) >> 21) - 1023);
210 NVals_odd = __high_int_to_double(N);
211 NVals_even = __low_int_to_double(N);
212 NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
213 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
214 NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
215 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
228 TVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
229 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
230 TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
231 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
234 TVals_8_15 = TVals_8_15 - (ln2 * NVals_8_15);
235 TVals_0_7 = TVals_0_7 - (ln2 * NVals_0_7);
242 pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
243 c7x::as_uchar_vec(__high_float_to_double(pol)),
244 c7x::as_uchar_vec(__low_float_to_double(pol))));
245 pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
246 c7x::as_uchar_vec(__high_float_to_double(pol)),
247 c7x::as_uchar_vec(__low_float_to_double(pol))));
250 outVec_0_7 = rVals_0_7 + TVals_0_7 + pol_0_7;
251 outVec_8_15 = rVals_8_15 + TVals_8_15 + pol_8_15;
254 outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
255 c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
256 c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
265 __vpred cmp_max = __cmp_lt_pred(eMax, inVec);
266 outVec = __select(cmp_max, outVecMax, outVec);
268 outVec = outVec + ln2sp;
270 vec inVecOriginal = c7x::strm_eng<1, vec>::get_adv();
272 __vpred cmp_bound = __cmp_lt_pred(inVecOriginal, one);
273 outVec = __select(cmp_bound, nan, outVec);
276 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
277 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
278 __vstore_pred(tmp, addr, outVec);
286 template <
typename T>
static inline void MATHLIB_acosh_vector(
size_t length, T *restrict pSrc, T *restrict pDst)
289 size_t numBlocks = 0;
290 size_t remNumBlocks = 0;
293 typedef typename c7x::make_full_vector<float>::type vec;
295 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
296 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
301 numBlocks = length / c7x::element_count_of<vec>::value;
302 remNumBlocks = length % c7x::element_count_of<vec>::value;
320 for (
size_t i = 0; i < numBlocks; i++) {
321 vec inVec = c7x::strm_eng<0, vec>::get_adv();
327 vec sqrtVec, temp, inVecSquare;
329 inVecSquare = inVec * inVec;
332 temp = (sqrtVec * half) + (inVec * half);
334 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
335 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
336 __vstore_pred(tmp, addr, temp);
static MATHLIB_STATUS MATHLIB_acosh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
template MATHLIB_STATUS MATHLIB_acosh< float >(size_t length, float *pSrc, float *pDst)
static vecType sqrt_acosh_i(vecType a, vecType x)
MATHLIB_STATUS MATHLIB_acosh(size_t length, T *restrict pSrc, T *restrict pDst)
static void MATHLIB_acosh_vector(size_t length, T *restrict pSrc, T *restrict pDst)
MATHLIB_STATUS MATHLIB_acosh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_acosh. Function accepts float pointers.
#define MATHLIB_LOGTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.