35 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
59 template <
typename vecType>
static inline vecType
divspMod_atan2_i(vecType a, vecType b);
61 template <>
inline c7x::float_vec divspMod_atan2_i<c7x::float_vec>(c7x::float_vec a, c7x::float_vec b)
63 typedef typename c7x::make_full_vector<float>::type vecType;
65 vecType res_fVec, r0, d0, d1, p0_Vec, p1_Vec, Two;
82 template <
typename vecType,
typename vecBool>
86 inline c7x::float_vec atan22f_sr1i_atan2_i<c7x::float_vec, c7x::char_vec>(c7x::float_vec g1,
96 c7x::float_vec coef_vec, negativeCoef, pi_vec, polVec, Zero_vec, G2, G4, G6, G8, G10, G12, tmp1_vec, tmp2_vec, C1,
97 C2, C3, C4, C5, C6, C7, C8, Res, res_minus, res_plus;
98 c7x::char_vec false_vec;
100 false_vec = (c7x::char_vec) 0;
101 Zero_vec = (c7x::float_vec) 0.0;
102 pi_vec = (c7x::float_vec) 3.1415927;
105 C1 = (c7x::float_vec) 0.00230158202;
106 C2 = (c7x::float_vec) -0.01394551000;
107 C3 = (c7x::float_vec) 0.03937087815;
108 C4 = (c7x::float_vec) -0.07235669163;
109 C5 = (c7x::float_vec) 0.10521499322;
110 C6 = (c7x::float_vec) -0.14175076797;
111 C7 = (c7x::float_vec) 0.19989300877;
112 C8 = (c7x::float_vec) -0.33332930041;
121 __vpred cmp_swap = __cmp_eq_pred(s, false_vec);
122 coef_vec = __select(cmp_swap, pi_vec, coef_vec);
126 __vpred cmp_negb = __cmp_eq_pred(bn, false_vec);
127 __vpred and_negB_swap = __and(cmp_negb, cmp_swap);
129 coef_vec = __select(and_negB_swap, Zero_vec, coef_vec);
135 negativeCoef = -coef_vec;
136 __vpred cmp_sign = __cmp_eq_pred(an, false_vec);
137 coef_vec = __select(cmp_sign, coef_vec, negativeCoef);
147 tmp1_vec = ((C5 * G8) + (C6 * G6)) + ((C7 * G4) + (C8 * G2));
148 tmp2_vec = ((((C1 * G4) + (C2 * G2)) + C3) * G12) + (C4 * G10);
150 polVec = tmp1_vec + tmp2_vec;
151 polVec = (polVec * g1) + g1;
154 res_minus = coef_vec - polVec;
155 res_plus = coef_vec + polVec;
156 __vpred cmp_res = __cmp_eq_pred(s, false_vec);
157 Res = __select(cmp_res, res_plus, res_minus);
169 size_t numBlocks = 0;
170 size_t remNumBlocks = 0;
173 typedef typename c7x::make_full_vector<float>::type vec;
174 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
175 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
180 numBlocks = length / c7x::element_count_of<vec>::value;
181 remNumBlocks = length % c7x::element_count_of<vec>::value;
192 vec g, res_fvec, Zero, temp, abs_a, abs_b;
193 c7x::char_vec an, bn, s, TRUE_vec, FALSE_vec;
194 vec pih = (vec) 1.570796327;
195 vec pi_Vec = (vec) 3.141592741;
196 vec MAX = std::numeric_limits<float>::max();
198 TRUE_vec = (c7x::char_vec) 1;
199 FALSE_vec = (c7x::char_vec) 0;
203 for (
size_t i = 0; i < numBlocks; i++) {
204 vec a = c7x::strm_eng<0, vec>::get_adv();
205 vec b = c7x::strm_eng<1, vec>::get_adv();
216 __vpred cmp_negA = __cmp_lt_pred(a, Zero);
217 an = __select(cmp_negA, TRUE_vec, FALSE_vec);
218 __vpred cmp_negB = __cmp_lt_pred(b, Zero);
219 bn = __select(cmp_negB, TRUE_vec, FALSE_vec);
232 __vpred cmp_AgtB = __cmp_lt_pred(abs_b, abs_a);
233 temp = __select(cmp_AgtB, b, Zero);
234 b = __select(cmp_AgtB, a, b);
235 a = __select(cmp_AgtB, temp, a);
236 s = __select(cmp_AgtB, TRUE_vec, s);
240 g = divspMod_atan2_i<vec>(a, b);
245 res_fvec = atan22f_sr1i_atan2_i<vec, c7x::char_vec>(g, pih, s, bn, an);
255 __vpred cmp_zeroX = __cmp_eq_pred(x, Zero);
257 __vpred cmp_zeroY = __cmp_le_pred(Zero, y);
258 vec resY = __select(cmp_zeroY, Zero, pi_Vec);
260 res_fvec = __select(cmp_zeroX, resY, res_fvec);
267 __vpred gMax = __cmp_lt_pred(MAX, g);
268 res_fvec = __select(gMax, pih, res_fvec);
275 vec negativepih = -pih;
277 __vpred gMin = __cmp_lt_pred(g, MIN);
278 res_fvec = __select(gMin, negativepih, res_fvec);
280 vec outVec = res_fvec;
282 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
283 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
284 __vstore_pred(tmp, addr, outVec);
292 template <>
inline c7x::double_vec divspMod_atan2_i<c7x::double_vec>(c7x::double_vec a, c7x::double_vec b)
294 c7x::double_vec Two = (c7x::double_vec)(2.0f);
295 c7x::double_vec X_vec;
297 X_vec = X_vec * (Two - (b * X_vec));
298 X_vec = X_vec * (Two - (b * X_vec));
299 X_vec = X_vec * (Two - (b * X_vec));
305 template <
typename T>
307 __SA_TEMPLATE_v1 *restrict sa0Params,
313 size_t numBlocks = 0;
314 size_t remNumBlocks = 0;
317 typedef typename c7x::make_full_vector<T>::type vec;
318 typedef typename c7x::make_full_vector<long>::type vecLong;
321 numBlocks = length / c7x::element_count_of<vec>::value;
322 remNumBlocks = length % c7x::element_count_of<vec>::value;
330 vec p0_vec = (vec) (-1.3688768894191926929e+1);
331 vec p1_vec = (vec) (-2.0505855195861651981e+1);
332 vec p2_vec = (vec) (-8.4946240351320683534e+0);
333 vec p3_vec = (vec) (-8.3758299368150059274e-1);
334 vec q0_vec = (vec) (4.1066306682575781263e+1);
335 vec q1_vec = (vec) (8.6157349597130242515e+1);
336 vec q2_vec = (vec) (5.9578436142597344465e+1);
337 vec q3_vec = (vec) (1.5024001160028576121e+1);
338 vec sqrt3 = (vec) (1.7320508075688772935e+0);
339 vec iims3 = (vec) (2.6794919243112270647e-1);
340 vec zero = (vec) (0.0);
341 vec n_one = (vec) (-1.0);
342 vec p_one = (vec) (1.0);
344 vec F, G, H, R, RN, RD;
348 vecLong oneL = (vecLong) (1);
349 vecLong TwoL = (vecLong) (2);
351 for (
size_t i = 0; i < numBlocks; i++) {
353 vec a = c7x::strm_eng<0, vec>::get_adv();
355 Sign = (vecLong) (0);
360 __vpred cmp_cond1 = __cmp_lt_pred(F, zero);
361 F = __select(cmp_cond1, (F * n_one), F);
362 Sign = __select(cmp_cond1, oneL, Sign);
364 vec temp1 = divspMod_atan2_i<vec>(p_one, F);
365 vecLong temp2 = N + oneL;
366 vec temp3_0 = (F * sqrt3) - p_one;
367 vec temp3_1 = (F + sqrt3);
368 vec temp3 = divspMod_atan2_i<vec>(temp3_0, temp3_1);
370 __vpred cmp_cond2 = __cmp_lt_pred(p_one, F);
371 F = __select((cmp_cond2), temp1, F);
372 N = __select((cmp_cond2), TwoL, N);
374 __vpred cmp_cond3 = __cmp_lt_pred(iims3, F);
375 N = __select((cmp_cond3), temp2, N);
376 F = __select((cmp_cond3), temp3, F);
382 RN = ((((((p3_vec * G) + p2_vec) * G) + p1_vec) * G) + p0_vec) * G;
383 RD = ((((((G + q3_vec) * G) + q2_vec) * G) + q1_vec) * G) + q0_vec;
385 R = divspMod_atan2_i<vec>(RN, RD);
389 vec temp4 = F * n_one;
390 __vpred cmp_cond4 = __cmp_gt_pred(N, oneL);
391 F = __select(cmp_cond4, temp4, F);
397 vec vTable = c7x::reinterpret<c7x::double_vec>(__permute_even_even_int(
398 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(highbits), c7x::as_uchar_vec(lowbits)));
401 vec temp5 = H * n_one;
402 __vpred cmp_cond5 = __cmp_eq_pred(Sign, oneL);
403 H = __select(cmp_cond5, temp5, H);
405 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
406 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
407 __vstore_pred(tmp, addr, H);
415 size_t numBlocks = 0;
416 size_t remNumBlocks = 0;
419 typedef typename c7x::make_full_vector<double>::type vec;
421 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
422 __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
423 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
428 numBlocks = length / c7x::element_count_of<vec>::value;
429 remNumBlocks = length % c7x::element_count_of<vec>::value;
434 se1Params.DIMFMT = __SE_DIMFMT_3D;
435 se1Params.ELETYPE = c7x::se_eletype<vec>::value;
436 se1Params.VECLEN = c7x::se_veclen<vec>::value;
437 se1Params.DECDIM1 = __SE_DECDIM_DIM2;
438 se1Params.DECDIM1_WIDTH = length;
440 se1Params.ICNT0 = c7x::element_count_of<vec>::value;
442 se1Params.DIM1 = ((
double *) pSrc1) - ((
double *) pSrc0);
443 se1Params.ICNT2 = numBlocks;
444 se1Params.DIM2 = c7x::element_count_of<vec>::value;
452 vec HalfPI, MATHLIB_PI, Maxv, X, Y, Z, W, resVec, NegHalfPi, NegMaxv;
454 HalfPI = (vec) (1.57079632679489661923);
455 NegHalfPi = (vec) (-1.57079632679489661923);
456 MATHLIB_PI = (vec) (3.14159265358979323846);
457 Maxv = (vec) (1.7976931348623157e+308);
458 NegMaxv = (vec) (-1.7976931348623157e+308);
460 vec zero = (vec) (0.0);
462 for (
size_t i = 0; i < numBlocks; i++) {
464 Y = c7x::strm_eng<0, vec>::get_adv();
465 X = c7x::strm_eng<1, vec>::get_adv();
467 Z = divspMod_atan2_i<vec>(Y, X);
469 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
470 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
471 __vstore_pred(tmp, addr, Z);
477 __SE0_OPEN(pSrc0, se1Params);
478 __SE1_OPEN(pDst, se0Params);
479 __SA0_OPEN(sa0Params);
481 for (
size_t i = 0; i < numBlocks; i++) {
483 Y = c7x::strm_eng<0, vec>::get_adv();
484 X = c7x::strm_eng<0, vec>::get_adv();
485 W = c7x::strm_eng<1, vec>::get_adv();
487 Z = divspMod_atan2_i<vec>(Y, X);
491 vec res_pi = MATHLIB_PI + W;
492 __vpred cond1 = __cmp_lt_pred(X, zero);
493 __vpred cond2 = __cmp_lt_pred(zero, W);
494 res_pi = __select(cond2, (W - MATHLIB_PI), res_pi);
495 resVec = __select(cond1, res_pi, resVec);
497 __vpred cond3 = __cmp_eq_pred(X, zero);
498 __vpred cond4 = __cmp_lt_pred(zero, Y);
500 vec res1 = __select(cond4, HalfPI, (NegHalfPi));
501 resVec = __select(cond3, res1, resVec);
503 __vpred cond5 = __cmp_eq_pred(Y, zero);
504 __vpred cond6 = __cmp_le_pred(zero, X);
506 vec res2 = __select(cond6, zero, MATHLIB_PI);
507 resVec = __select(cond5, res2, resVec);
509 __vpred cond7 = __cmp_lt_pred(Maxv, Z);
510 resVec = __select((cond7), HalfPI, resVec);
512 __vpred cond8 = __cmp_lt_pred(Z, (NegMaxv));
513 resVec = __select(cond8, (NegHalfPi), resVec);
515 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
516 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
517 __vstore_pred(tmp, addr, resVec);
void MATHLIB_atan2_vector< double >(size_t length, double *pSrc0, double *pSrc1, double *pDst)
MATHLIB_STATUS MATHLIB_atan2< double >(size_t length, double *pSrc0, double *pSrc1, double *pDst)
static vecType atan22f_sr1i_atan2_i(vecType g1, vecType pih, vecBool s, vecBool bn, vecBool an)
void MATHLIB_atan2_vector< float >(size_t length, float *pSrc0, float *pSrc1, float *pDst)
static vecType divspMod_atan2_i(vecType a, vecType b)
MATHLIB_STATUS MATHLIB_atan2(size_t length, T *pSrc0, T *pSrc1, T *pDst)
static void MATHLIB_atan2_vector(size_t length, T *pSrc0, T *pSrc1, T *pDst)
MATHLIB_STATUS MATHLIB_atan2< float >(size_t length, float *pSrc0, float *pSrc1, float *pDst)
static void atandpMod_atan2dpi_dp(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, size_t length, T *restrict pSrc0, T *restrict pDst)
double MATHLIB_atan2_scalar_ci< double >(double a, double b)
float MATHLIB_atan2_scalar_ci< float >(float a, float b)
MATHLIB_STATUS MATHLIB_atan2_dp(size_t length, double *pSrc0, double *pSrc1, double *pDst)
This function is the C interface for MATHLIB_atan2. Function accepts double pointers.
MATHLIB_STATUS MATHLIB_atan2_sp(size_t length, float *pSrc0, float *pSrc1, float *pDst)
This function is the C interface for MATHLIB_atan2. Function accepts float pointers.
#define MATHLIB_VTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.