34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
36 #define ELEMENT_TYPE(x) typename c7x::element_type_of<x>::type
55 template <
typename vecType>
static inline vecType
sqrt_acos_i(vecType a);
56 template <
typename vecType>
static inline vecType
pol_est_acos_i(vecType x);
59 template <
typename vecType>
static inline vecType
sqrt_acos_i(vecType a)
64 vecType half, OneP5, zero, maxValue;
67 OneP5 = (vecType) 1.5;
69 maxValue = (vecType) std::numeric_limits<elemType>::max();
79 p1 = OneP5 - d0 * p0 * half;
82 __vpred cmp_lezero = __cmp_le_pred((vecType) a, zero);
83 y = __select(cmp_lezero, zero, y);
85 __vpred cmp_gtmax = __cmp_lt_pred(maxValue, (vecType) a);
86 y = __select(cmp_gtmax, maxValue, y);
97 vecType c16, c14, c12, c10, c8, c6, c4, c2;
99 c16 = (vecType) 0.053002771381990;
100 c14 = (vecType) -0.010980624698693;
101 c12 = (vecType) 0.020659425186833;
102 c10 = (vecType) 0.022862784546374;
103 c8 = (vecType) 0.030636056280974;
104 c6 = (vecType) 0.044450959710588;
105 c4 = (vecType) 0.075034659380970;
106 c2 = (vecType) 0.166664771293503;
110 vecType x2, x4, x6, x8, x10, x12;
111 vecType pol, tmp1, tmp2;
138 tmp1 = ((c8 * x8) + (c6 * x6)) + ((c4 * x4) + (c2 * x2));
139 tmp2 = ((((c16 * x4) + (c14 * x2)) + c12) * x12) + (c10 * x10);
153 size_t numBlocks = 0;
154 size_t remNumBlocks = 0;
157 typedef typename c7x::make_full_vector<T>::type vec;
159 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
160 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
168 numBlocks = length / c7x::element_count_of<vec>::value;
169 remNumBlocks = length % c7x::element_count_of<vec>::value;
181 vec HalfPI, Zero, One, rsqr2, s, negativeOne, res, x_abs, a, temp1, temp2, negativeS, poly, scale, offset, nan;
183 HalfPI = (vec) 1.570796327;
184 rsqr2 = (vec) 0.7071067811;
187 nan = (vec) 0x7FFFFFFFu;
191 for (
size_t i = 0; i < numBlocks; i++) {
192 vec inVec = c7x::strm_eng<0, vec>::get();
200 x_abs = __abs(inVec);
201 a = One - (x_abs * x_abs);
203 temp1 = sqrt_acos_i<vec>(a);
206 __vpred cmp_x_abs = __cmp_lt_pred(rsqr2, x_abs);
207 temp2 = __select(cmp_x_abs, temp1, x_abs);
208 offset = __select(cmp_x_abs, HalfPI, Zero);
209 scale = __select(cmp_x_abs, negativeOne, One);
215 poly = pol_est_acos_i<vec>(temp2);
217 res = scale * poly + offset;
227 __vpred cmp_lt_zero = __cmp_lt_pred(inVec, Zero);
228 s = __select(cmp_lt_zero, negativeS, s);
230 res = HalfPI - (res * s);
239 vec inVec1 = c7x::strm_eng<0, vec>::get_adv();
240 vec x_abs1 = __abs(inVec1);
241 __vpred cmp_gt_one = __cmp_lt_pred(One, x_abs1);
242 res = __select(cmp_gt_one, nan, res);
245 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
246 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
247 __vstore_pred(tmp, addr, outVec);
static vecType sqrt_acos_i(vecType a)
static vecType pol_est_acos_i(vecType x)
template MATHLIB_STATUS MATHLIB_acos< float >(size_t length, float *pSrc, float *pDst)
MATHLIB_STATUS MATHLIB_acos(size_t length, T *pSrc, T *pDst)
Performs the elementwise arc-cosine of an input vector. Function can be overloaded with float pointer...
MATHLIB_STATUS MATHLIB_acos_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_acos. Function accepts float pointers.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.