34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
57 MATHLIB_atanh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params,
size_t length, T *pSrc0, T *pDst)
62 size_t remNumBlocks = 0;
65 typedef typename c7x::make_full_vector<T>::type vec;
68 numBlocks = length / c7x::element_count_of<vec>::value;
69 remNumBlocks = length % c7x::element_count_of<vec>::value;
84 vec C1, C2, C3, C4, C5, pol_bound, half;
87 zero = (c7x::uint_vec) 0;
89 ln2 = (c7x::double_vec) 0.693147180559945;
90 C1 = (vec) -0.2302894f;
91 C2 = (vec) 0.1908169f;
92 C3 = (vec) -0.2505905f;
93 C4 = (vec) 0.3333164f;
94 C5 = (vec) -0.5000002f;
95 pol_bound = (vec) 0.1f;
99 for (
size_t i = 0; i < numBlocks; i++) {
100 vec inVec = c7x::strm_eng<0, vec>::get_adv();
106 vec pol, r1, r2, r3, r4, outVec, inVecOriginal_abs;
107 c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
108 inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
109 TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, pol_0_7, pol_8_15, outVec_8_15,
111 c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT,
112 upperBitsIndexT, lowerBitsIndexT;
120 inVecVals_odd = __high_float_to_double(inVec);
121 inVecVals_even = __low_float_to_double(inVec);
122 inVecVals_oddReciprocal = __recip(inVecVals_odd);
123 inVecVals_evenReciprocal = __recip(inVecVals_even);
127 inVecReciprocal_32_63 = c7x::reinterpret<c7x::uint_vec>(
128 __permute_odd_odd_int(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecVals_oddReciprocal),
129 c7x::as_uchar_vec(inVecVals_evenReciprocal)));
132 inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & 0xFFFE0000U;
135 inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
136 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
137 inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
138 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
141 inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
142 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
143 inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
144 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
147 rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
148 rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
151 r1 = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
152 c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
153 c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
159 pol = (C5 * r2) + ((C4 * r3) + ((((C2 * r1) + C3) + (C1 * r2)) * r4));
166 inVecReciprocalApprox_32_63 = c7x::reinterpret<c7x::uint_vec>(
167 __permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
168 c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
170 N = c7x::convert<c7x::int_vec>(((inVecReciprocalApprox_32_63 << 1) >> 21) - 1023);
173 NVals_odd = __high_int_to_double(N);
174 NVals_even = __low_int_to_double(N);
175 NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
176 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
177 NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
178 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
191 TVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
192 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
193 TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
194 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
197 TVals_8_15 = TVals_8_15 - (ln2 * NVals_8_15);
198 TVals_0_7 = TVals_0_7 - (ln2 * NVals_0_7);
205 pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
206 c7x::as_uchar_vec(__high_float_to_double(pol)),
207 c7x::as_uchar_vec(__low_float_to_double(pol))));
208 pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
209 c7x::as_uchar_vec(__high_float_to_double(pol)),
210 c7x::as_uchar_vec(__low_float_to_double(pol))));
213 outVec_0_7 = rVals_0_7 + TVals_0_7 + pol_0_7;
214 outVec_8_15 = rVals_8_15 + TVals_8_15 + pol_8_15;
217 outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
218 c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
219 c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
221 outVec = outVec * half;
228 vec inVecOriginal = c7x::strm_eng<1, vec>::get_adv();
229 inVecOriginal_abs = __abs(inVecOriginal);
230 __vpred cmp_le_pol = __cmp_le_pred(inVecOriginal_abs, pol_bound);
231 outVec = __select(cmp_le_pol, inVec, outVec);
235 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
236 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
237 __vstore_pred(tmp, addr, outVec);
244 template <
typename T>
246 MATHLIB_atanh_cond(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params,
size_t length, T *pSrc0, T *pDst)
250 size_t numBlocks = 0;
251 size_t remNumBlocks = 0;
254 typedef typename c7x::make_full_vector<T>::type vec;
257 numBlocks = length / c7x::element_count_of<vec>::value;
258 remNumBlocks = length % c7x::element_count_of<vec>::value;
273 vec limit, zeroF, inf;
277 inf = (vec) 0x7F800000u;
280 for (
size_t i = 0; i < numBlocks; i++) {
281 vec inVec = c7x::strm_eng<0, vec>::get_adv();
282 vec inVecOriginal = c7x::strm_eng<1, vec>::get_adv();
286 vec inVecOriginal_abs = __abs(inVecOriginal);
288 __vpred cmp_limit = __cmp_eq_pred(inVecOriginal_abs, limit);
289 outVec = __select(cmp_limit, inf, inVec);
292 __vpred cmp_lt_zero = __cmp_lt_pred(inVecOriginal, zeroF);
293 sign = __select(cmp_lt_zero, -sign, sign);
295 outVec = outVec * sign;
298 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
299 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
300 __vstore_pred(tmp, addr, outVec);
307 template <
typename vecType>
static inline vecType
divspMod_atanh(vecType a, vecType b)
309 vecType res, r0, d0, d1, p0, p1, Two;
329 size_t numBlocks = 0;
330 size_t remNumBlocks = 0;
333 typedef typename c7x::make_full_vector<float>::type vec;
335 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
336 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
341 numBlocks = length / c7x::element_count_of<vec>::value;
342 remNumBlocks = length % c7x::element_count_of<vec>::value;
354 vec pol_bound, c2, c4;
355 pol_bound = (vec) 0.1f;
356 c2 = (vec) 0.333327051f;
357 c4 = (vec) 0.202017226f;
360 for (
size_t i = 0; i < numBlocks; i++) {
361 vec inVec = c7x::strm_eng<0, vec>::get_adv();
367 vec x2, x4, pol, outVec, inVec_abs, temp1, temp2;
369 inVec_abs = __abs(inVec);
378 pol = (c2 * x2) + (c4 * x4);
379 pol = (pol * inVec_abs) + inVec_abs;
384 temp1 = 1.0f + inVec_abs;
385 temp2 = 1.0f - inVec_abs;
391 __vpred cmp_le_pol = __cmp_le_pred(inVec_abs, pol_bound);
392 outVec = __select(cmp_le_pol, pol, outVec);
396 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
397 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
398 __vstore_pred(tmp, addr, outVec);
MATHLIB_STATUS MATHLIB_atanh(size_t length, T *restrict pSrc, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_atanh< float >(size_t length, float *pSrc, float *pDst)
static MATHLIB_STATUS MATHLIB_atanh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
static MATHLIB_STATUS MATHLIB_atanh_cond(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
static void MATHLIB_atanh_vector(size_t length, T *pSrc, T *pDst)
static vecType divspMod_atanh(vecType a, vecType b)
MATHLIB_STATUS MATHLIB_atanh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_log10. Function accepts float pointers.
#define MATHLIB_LOGTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.