34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
57 __SA_TEMPLATE_v1 *restrict sa0Params,
63 typedef typename c7x::make_full_vector<T>::type vec;
75 vec log2_base_x16, half, negativeHalf, LnMax, Max, C0, C1, C2, ln2;
79 log2_base_x16 = (vec) 23.0831206542234f;
81 negativeHalf = (vec) -0.5f;
82 LnMax = (vec) 88.72283905f;
83 Max = (vec) 3.402823466E+38f;
84 mask = (c7x::uint_vec) 0x3u;
85 p = (c7x::double_vec) 0.0433216987816623;
86 ln2 = (vec) 0.693147180559945f;
89 C0 = (vec) 0.166668549286041f;
90 C1 = (vec) 0.500016170012920f;
91 C2 = (vec) 0.999999998618401f;
93 for (
size_t i = 0; i < numBlocks; i++) {
94 vec inVec = c7x::strm_eng<0, vec>::get_adv();
101 vec pol, r, r2, r3, outVec, Nf, absNf, rVals_odd, rVals_even;
102 c7x::uint_vec J, K, uN, dTAdjusted_32_63, dT_32_63, dT_0_31, upperBitsK, lowerBitsK, upperBitsJ, lowerBitsJ;
103 c7x::int_vec N, minusN;
104 c7x::double_vec KVals_8_15, KVals_0_7, JVals_8_15, JVals_0_7, dTVals_8_15, dTVals_0_7, pol_0_7, pol_8_15,
105 outVec_0_7, outVec_8_15, inVecVals_odd, inVecVals_even, NVals_odd, NVals_even;
107 inVec_abs = __abs(inVec);
112 __vpred cmp_lt_exp = __cmp_lt_pred(bound, inVec_abs);
113 inVec = __select(cmp_lt_exp, inVec_abs, inVec);
116 Nf = inVec * log2_base_x16;
118 N = c7x::convert<c7x::int_vec>(absNf);
124 __vpred cmp_N = __cmp_lt_pred(Nf, negativeHalf);
125 N = __select(cmp_N, minusN, N);
132 inVecVals_odd = __high_float_to_double(inVec);
133 inVecVals_even = __low_float_to_double(inVec);
134 NVals_odd = __high_int_to_double(N);
135 NVals_even = __low_int_to_double(N);
136 rVals_odd = __double_to_float((inVecVals_odd - (p * NVals_odd)));
137 rVals_even = __double_to_float((inVecVals_even - (p * NVals_even)));
140 r = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_interweave_0_63,
141 c7x::as_uchar_vec(rVals_odd), c7x::as_uchar_vec(rVals_even)));
146 pol = (r * C2) + ((r3 * C0) + (r2 * C1));
156 uN = c7x::convert<c7x::uint_vec>(N);
166 KVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
167 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
168 KVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
169 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
170 JVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
171 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
172 JVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
173 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
176 dTVals_8_15 = KVals_8_15 * JVals_8_15;
177 dTVals_0_7 = KVals_0_7 * JVals_0_7;
184 dT_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(
185 MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
186 dT_0_31 = c7x::reinterpret<c7x::uint_vec>(__permute_even_even_int(
187 MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
189 uN = (uN >> 4) << 20;
190 dTAdjusted_32_63 = dT_32_63 + uN;
193 dTVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
194 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
195 dTVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
196 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
198 pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
199 c7x::as_uchar_vec(__high_float_to_double(pol)),
200 c7x::as_uchar_vec(__low_float_to_double(pol))));
201 pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
202 c7x::as_uchar_vec(__high_float_to_double(pol)),
203 c7x::as_uchar_vec(__low_float_to_double(pol))));
205 outVec_0_7 = dTVals_0_7 * (1.0f + pol_0_7);
206 outVec_8_15 = dTVals_8_15 * (1.0f + pol_8_15);
208 outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
209 c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
210 c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
221 __vpred cmp_max = __cmp_lt_pred(LnMax, (inVec - ln2));
222 outVec = __select(cmp_max, Max, outVec);
224 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
225 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
226 __vstore_pred(tmp, addr, outVec);
232 template <
typename T>
234 __SA_TEMPLATE_v1 *restrict sa0Params,
240 typedef typename c7x::make_full_vector<T>::type vec;
248 vec pol_bound, bound, C1, C2, C3, C4, two, zero, fltMax, half, max, inf;
249 pol_bound = (vec) 1.0f;
251 C1 = (vec) 2.48015873015873e-5f;
252 C2 = (vec) 0.00138888888888889f;
253 C3 = (vec) 0.0416666666666667f;
254 C4 = (vec) 0.5000000f;
257 fltMax = (vec) 3.40282347e+38f;
259 max = (vec) 89.41598629f;
260 inf = (vec) 0x7F800000;
262 for (
size_t i = 0; i < numBlocks; i++) {
263 vec inVec = c7x::strm_eng<0, vec>::get_adv();
264 vec expOut = c7x::strm_eng<1, vec>::get_adv();
269 vec inVec_abs, x2, x4, x6, x8, pol, x1, recip, expOut2, expRecip, outVec;
275 expOut2 = expOut * two;
276 x1 = __recip(expOut2);
277 x1 = x1 * (two - (expOut2 * x1));
278 recip = x1 * (two - (expOut2 * x1));
287 __vpred cmp_eq_zero = __cmp_eq_pred(expOut2, zero);
288 recip = __select(cmp_eq_zero, zero, recip);
293 __vpred cmp_gt_flt = __cmp_lt_pred(fltMax, __abs(expOut2));
294 recip = __select(cmp_gt_flt, zero, recip);
296 expRecip = (expOut2 + recip) * half;
308 pol = ((C4 * x2) + (C3 * x4)) + ((C1 * x8) + (C2 * x6));
328 inVec_abs = __abs(inVec);
333 __vpred cmp_le_pol = __cmp_le_pred(inVec_abs, pol_bound);
338 __vpred cmp_lt_exp = __cmp_lt_pred(bound, inVec_abs);
340 __vpred cmp_else_exp = __negate(__or(cmp_lt_exp, cmp_le_pol));
342 outVec = __select(cmp_else_exp, expRecip, expOut);
343 outVec = __select(cmp_le_pol, pol, outVec);
349 __vpred cmp_lt_max = __cmp_lt_pred(max, inVec_abs);
350 outVec = __select(cmp_lt_max, inf, outVec);
352 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
353 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
354 __vstore_pred(tmp, addr, outVec);
359 template <
typename T>
static inline void MATHLIB_cosh_vector(
size_t length, T *restrict pSrc, T *restrict pDst)
362 size_t numBlocks = 0;
363 size_t remNumBlocks = 0;
366 typedef typename c7x::make_full_vector<T>::type vec;
368 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
369 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
374 numBlocks = length / c7x::element_count_of<vec>::value;
375 remNumBlocks = length % c7x::element_count_of<vec>::value;
static void MATHLIB_cosh_pol(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, T *restrict pSrc, T *restrict pDst, size_t numBlocks)
static void MATHLIB_cosh_exp(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, T *restrict pSrc, T *restrict pDst, size_t numBlocks)
static void MATHLIB_cosh_vector(size_t length, T *restrict pSrc, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_cosh< float >(size_t length, float *pSrc, float *pDst)
MATHLIB_STATUS MATHLIB_cosh(size_t length, T *restrict pSrc, T *restrict pDst)
MATHLIB_STATUS MATHLIB_cosh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_cosh. Function accepts float pointers.
#define MATHLIB_KTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
#define MATHLIB_JTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.