34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
57 MATHLIB_sinh_exp(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst,
size_t numBlocks)
60 typedef typename c7x::make_full_vector<T>::type vec;
69 vec log2_base_x16, half, negativeHalf, LnMax, Max, C0, C1, C2, ln2;
73 log2_base_x16 = (vec) 23.0831206542234f;
75 negativeHalf = (vec) -0.5f;
76 LnMax = (vec) 88.72283905f;
77 Max = (vec) 3.402823466E+38f;
78 mask = (c7x::uint_vec) 0x3u;
79 p = (c7x::double_vec) 0.0433216987816623;
80 ln2 = (vec) 0.693147180559945f;
83 C0 = (vec) 0.166668549286041f;
84 C1 = (vec) 0.500016170012920f;
85 C2 = (vec) 0.999999998618401f;
87 for (
size_t i = 0; i < numBlocks; i++) {
88 vec inVec = c7x::strm_eng<0, vec>::get_adv();
93 vec pol, r, r2, r3, outVec, Nf, absNf, rVals_odd, rVals_even;
94 c7x::uint_vec J, K, uN, dTAdjusted_32_63, dT_32_63, dT_0_31, upperBitsK, lowerBitsK, upperBitsJ, lowerBitsJ;
95 c7x::int_vec N, minusN;
96 c7x::double_vec KVals_8_15, KVals_0_7, JVals_8_15, JVals_0_7, dTVals_8_15, dTVals_0_7, pol_0_7, pol_8_15,
97 outVec_0_7, outVec_8_15, inVecVals_odd, inVecVals_even, NVals_odd, NVals_even;
102 Nf = inVec * log2_base_x16;
104 N = c7x::convert<c7x::int_vec>(absNf);
110 __vpred cmp_N = __cmp_lt_pred(Nf, negativeHalf);
111 N = __select(cmp_N, minusN, N);
118 inVecVals_odd = __high_float_to_double(inVec);
119 inVecVals_even = __low_float_to_double(inVec);
120 NVals_odd = __high_int_to_double(N);
121 NVals_even = __low_int_to_double(N);
122 rVals_odd = __double_to_float((inVecVals_odd - (p * NVals_odd)));
123 rVals_even = __double_to_float((inVecVals_even - (p * NVals_even)));
126 r = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_interweave_0_63,
127 c7x::as_uchar_vec(rVals_odd), c7x::as_uchar_vec(rVals_even)));
132 pol = (r * C2) + ((r3 * C0) + (r2 * C1));
142 uN = c7x::convert<c7x::uint_vec>(N);
152 KVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
153 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
154 KVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
155 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
156 JVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
157 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
158 JVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
159 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
162 dTVals_8_15 = KVals_8_15 * JVals_8_15;
163 dTVals_0_7 = KVals_0_7 * JVals_0_7;
170 dT_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(
171 MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
172 dT_0_31 = c7x::reinterpret<c7x::uint_vec>(__permute_even_even_int(
173 MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
175 uN = (uN >> 4) << 20;
176 dTAdjusted_32_63 = dT_32_63 + uN;
179 dTVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
180 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
181 dTVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
182 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
184 pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
185 c7x::as_uchar_vec(__high_float_to_double(pol)),
186 c7x::as_uchar_vec(__low_float_to_double(pol))));
187 pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
188 c7x::as_uchar_vec(__high_float_to_double(pol)),
189 c7x::as_uchar_vec(__low_float_to_double(pol))));
191 outVec_0_7 = dTVals_0_7 * (1.0f + pol_0_7);
192 outVec_8_15 = dTVals_8_15 * (1.0f + pol_8_15);
194 outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
195 c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
196 c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
207 __vpred cmp_max = __cmp_lt_pred(LnMax, (inVec - ln2));
208 outVec = __select(cmp_max, Max, outVec);
210 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
211 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
212 __vstore_pred(tmp, addr, outVec);
218 template <
typename T>
220 MATHLIB_sinh_pol(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst,
size_t numBlocks)
223 typedef typename c7x::make_full_vector<T>::type vec;
231 vec pol_bound, bound, C8, C6, C4, C2, two, zero, fltMax, half, max, inf;
232 pol_bound = (vec) 1.0f;
234 C8 = (vec) 2.75573192239859e-6f;
235 C6 = (vec) 0.000198412698412698f;
236 C4 = (vec) 0.00833333333333333f;
237 C2 = (vec) 0.166666666666667f;
240 fltMax = (vec) 3.40282347e+38f;
242 max = (vec) 89.41598629f;
243 inf = (vec) 0x7F800000;
245 for (
size_t i = 0; i < numBlocks; i++) {
246 vec expOut = c7x::strm_eng<1, vec>::get_adv();
251 vec inVec_abs, x2, x4, x6, x8, pol, x1, recip, expOut2, expRecip, outVec, sign;
259 expOut2 = expOut * two;
260 x1 = __recip(expOut2);
261 x1 = x1 * (two - (expOut2 * x1));
262 recip = x1 * (two - (expOut2 * x1));
271 __vpred cmp_eq_zero = __cmp_eq_pred(expOut2, zero);
272 recip = __select(cmp_eq_zero, zero, recip);
277 __vpred cmp_gt_flt = __cmp_lt_pred(fltMax, __abs(expOut2));
278 recip = __select(cmp_gt_flt, zero, recip);
280 expRecip = (expOut2 - recip) * half;
285 vec inVec = c7x::strm_eng<0, vec>::get_adv();
286 inVec_abs = __abs(inVec);
288 x2 = inVec_abs * inVec_abs;
293 pol = ((C2 * x2) + (C4 * x4)) + ((C6 * x6) + (C8 * x8));
294 pol = (pol * inVec_abs) + inVec_abs;
316 __vpred cmp_le_pol = __cmp_le_pred(inVec_abs, pol_bound);
321 __vpred cmp_lt_exp = __cmp_lt_pred(bound, inVec_abs);
323 __vpred cmp_else_exp = __negate(__or(cmp_lt_exp, cmp_le_pol));
325 outVec = __select(cmp_else_exp, expRecip, expOut);
326 outVec = __select(cmp_le_pol, pol, outVec);
332 __vpred cmp_lt_max = __cmp_lt_pred(max, inVec_abs);
333 outVec = __select(cmp_lt_max, inf, outVec);
335 __vpred cmp_sign = __cmp_lt_pred(inVec, zero);
336 sign = __select(cmp_sign, -sign, sign);
338 outVec = outVec * sign;
340 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
341 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
342 __vstore_pred(tmp, addr, outVec);
350 size_t numBlocks = 0;
351 size_t remNumBlocks = 0;
354 typedef typename c7x::make_full_vector<T>::type vec;
356 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
357 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
362 numBlocks = length / c7x::element_count_of<vec>::value;
363 remNumBlocks = length % c7x::element_count_of<vec>::value;
static void MATHLIB_sinh_exp(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst, size_t numBlocks)
static void MATHLIB_sinh_vector(size_t length, T *pSrc, T *pDst)
template MATHLIB_STATUS MATHLIB_sinh< float >(size_t length, float *pSrc, float *pDst)
static void MATHLIB_sinh_pol(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst, size_t numBlocks)
#define MATHLIB_KTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
#define MATHLIB_JTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS MATHLIB_sinh(size_t length, T *pSrc, T *pDst)
Performs the elementwise hyperbolic sine of an input vector. Function can be overloaded with float po...
MATHLIB_STATUS MATHLIB_sinh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_sinh. Function accepts float pointers.
MATHLIB_STATUS_NAME
The enumeration of all status codes.