34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
55 static inline void MATHLIB_pow_vector(
size_t length, T *restrict pSrc0, T *restrict pSrc1, T *restrict pDst);
59 __SA_TEMPLATE_v1 *restrict sa0Params,
69 template <>
inline c7x::float_vec MATHLIB_pow_log<c7x::float_vec>(c7x::float_vec inVec)
74 typedef typename c7x::make_full_vector<float>::type vecType;
76 vecType C1, C2, C3, C4, C5, eMax, outVecMax;
77 c7x::double_vec ln2_vec;
79 zero = (c7x::uint_vec) 0;
81 ln2_vec = (c7x::double_vec) 0.693147180559945;
82 C1 = (vecType) -0.2302894f;
83 C2 = (vecType) 0.1908169f;
84 C3 = (vecType) -0.2505905f;
85 C4 = (vecType) 0.3333164f;
86 C5 = (vecType) -0.5000002f;
87 eMax = (vecType) 3.402823466e+38f;
88 outVecMax = (vecType) 88.72283905313f;
94 vecType Pol, R1, R2, R3, R4, outvec;
95 c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
96 inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
97 TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, Pol_0_7, Pol_8_15, outvec_8_15, outvec_0_7;
98 c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT, upperBitsIndexT,
107 inVecVals_odd = __high_float_to_double(inVec);
108 inVecVals_even = __low_float_to_double(inVec);
109 inVecVals_oddReciprocal = __recip(inVecVals_odd);
110 inVecVals_evenReciprocal = __recip(inVecVals_even);
114 inVecReciprocal_32_63 = c7x::as_uint_vec(
115 __permute_odd_odd_int(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecVals_oddReciprocal),
116 c7x::as_uchar_vec(inVecVals_evenReciprocal)));
119 inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & (c7x::uint_vec)0xFFFE0000U;
122 inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
123 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
124 inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
125 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
128 inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
129 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
130 inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
131 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
134 rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
135 rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
138 R1 = c7x::reinterpret<vecType>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
139 c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
140 c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
146 Pol = (C5 * R2) + ((C4 * R3) + ((((C2 * R1) + C3) + (C1 * R2)) * R4));
153 inVecReciprocalApprox_32_63 = c7x::as_uint_vec(
154 __permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
155 c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
159 c7x::uint_vec Nvec_temp = __shift_left(inVecReciprocalApprox_32_63, (c7x::uint_vec)1U);
160 Nvec_temp = __shift_right(Nvec_temp, (c7x::uint_vec)21U);
161 Nvec_temp = Nvec_temp - (c7x::uint_vec)1023U;
162 Nvec = c7x::convert_int_vec(Nvec_temp);
166 NVals_odd = __high_int_to_double(Nvec);
167 NVals_even = __low_int_to_double(Nvec);
168 NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
169 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
170 NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
171 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
179 indexT = __shift_left(inVecReciprocalApprox_32_63, (c7x::uint_vec)12U);
180 indexT = __shift_right(indexT, (c7x::uint_vec)29U);
188 TVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
189 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
190 TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
191 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
194 TVals_8_15 = TVals_8_15 - (ln2_vec * NVals_8_15);
195 TVals_0_7 = TVals_0_7 - (ln2_vec * NVals_0_7);
202 Pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
203 c7x::as_uchar_vec(__high_float_to_double(Pol)),
204 c7x::as_uchar_vec(__low_float_to_double(Pol))));
205 Pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
206 c7x::as_uchar_vec(__high_float_to_double(Pol)),
207 c7x::as_uchar_vec(__low_float_to_double(Pol))));
210 outvec_0_7 = rVals_0_7 + TVals_0_7 + Pol_0_7;
211 outvec_8_15 = rVals_8_15 + TVals_8_15 + Pol_8_15;
214 outvec = c7x::reinterpret<vecType>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
215 c7x::as_uchar_vec(__double_to_float(outvec_8_15)),
216 c7x::as_uchar_vec(__double_to_float(outvec_0_7))));
225 __vpred cmp_max = __cmp_lt_pred(eMax, inVec);
226 outvec = __select(cmp_max, outVecMax, outvec);
238 size_t numBlocks = 0;
239 size_t remNumBlocks = 0;
242 typedef typename c7x::make_full_vector<float>::type vec;
249 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
250 __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
251 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
254 numBlocks = length / c7x::element_count_of<vec>::value;
255 remNumBlocks = length % c7x::element_count_of<vec>::value;
260 __SE_ELETYPE SE_ELETYPE;
261 __SE_VECLEN SE_VECLEN;
262 __SA_VECLEN SA_VECLEN;
265 typedef typename c7x::make_full_vector<float>::type vec;
268 SE_VECLEN = c7x::se_veclen<vec>::value;
269 SA_VECLEN = c7x::sa_veclen<vec>::value;
270 SE_ELETYPE = c7x::se_eletype<vec>::value;
272 se0Params.ICNT0 = length;
273 se0Params.ELETYPE = SE_ELETYPE;
274 se0Params.VECLEN = SE_VECLEN;
275 se0Params.DIMFMT = __SE_DIMFMT_1D;
277 se1Params.DIMFMT = __SE_DIMFMT_3D;
278 se1Params.ICNT0 = c7x::element_count_of<vec>::value;
279 se1Params.ELETYPE = SE_ELETYPE;
280 se1Params.VECLEN = SE_VECLEN;
282 se1Params.ICNT2 = numBlocks;
283 se1Params.DIM2 = c7x::element_count_of<vec>::value;
284 se1Params.DECDIM1 = __SE_DECDIM_DIM2;
285 se1Params.DECDIM1_WIDTH = length;
286 se1Params.DIM1 = ((
float *) pSrc1) - ((
float *) pSrc0);
288 sa0Params.ICNT0 = length;
289 sa0Params.DIM1 = length;
290 sa0Params.VECLEN = SA_VECLEN;
291 sa0Params.DIMFMT = __SA_DIMFMT_1D;
294 __SE0_OPEN(pDst, se0Params);
295 __SE1_OPEN(pSrc0, se1Params);
296 __SA0_OPEN(sa0Params);
301 vec Zero, one, intMax, inf;
302 vec_type zeroInt, signNegative;
306 intMax = (vec) 0x7fffffffu;
307 inf = (vec) 0x7F800000u;
308 zeroInt = (vec_type) 0;
309 signNegative = (vec_type) -1;
311 for (
size_t i = 0; i < numBlocks; i++) {
312 vec inVec0 = c7x::strm_eng<1, vec>::get_adv();
313 vec outVec1 = c7x::strm_eng<0, vec>::get_adv();
314 vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
319 vec_type inVec1_round, inVec1_roundEven, sign;
327 inVec1_round = __float_to_int(inVec1);
328 inVec1_roundEven = inVec1_round & 1;
331 __vpred cmp_zero = __cmp_lt_pred(inVec0, Zero);
332 __vpred cmp_floateq = __cmp_eq_pred(inVec1, __int_to_float(inVec1_round));
333 __vpred cmp_odd = __negate(__cmp_eq_pred(inVec1_roundEven, zeroInt));
334 __vpred cond1 = __and(cmp_zero, cmp_floateq);
335 __vpred cond2 = __and(cond1, cmp_odd);
338 sign = __select(cond2, signNegative, sign);
340 outVec1 = __int_to_float(sign) * outVec1;
346 __vpred cond3 = __and(cmp_zero, __negate(cmp_floateq));
347 outVec1 = __select(cond3, intMax, outVec1);
349 __vpred cmp_inVec0_zero = __cmp_eq_pred(inVec0, Zero);
350 __vpred cmp_inVec1_zero_lt = __cmp_lt_pred(inVec1, Zero);
351 __vpred cond4 = __and(cmp_inVec0_zero, __negate(cmp_inVec1_zero_lt));
352 __vpred cond5 = __and(cmp_inVec0_zero, cmp_inVec1_zero_lt);
356 outVec1 = __select(cond4, Zero, outVec1);
357 outVec1 = __select(cond5, inf, outVec1);
360 __vpred cmp_inVec1_zero = __cmp_eq_pred(inVec1, Zero);
361 outVec1 = __select(cmp_inVec1_zero, one, outVec1);
363 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
364 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
365 __vstore_pred(tmp, addr, outVec1);
374 __SA_TEMPLATE_v1 *restrict sa0Params,
376 float *restrict pSrc0,
377 float *restrict pDst)
381 size_t numBlocks = 0;
382 size_t remNumBlocks = 0;
385 typedef typename c7x::make_full_vector<float>::type vec;
388 numBlocks = length / c7x::element_count_of<vec>::value;
389 remNumBlocks = length % c7x::element_count_of<vec>::value;
401 vec log2_base_x16, half, negativeHalf, Zero, LnMin, LnMax, Max, C0, c1, c2, P1, P2;
402 c7x::uint_vec mask, inVec_min;
404 log2_base_x16 = (vec) 23.083120654f;
406 negativeHalf = (vec) -0.5f;
408 LnMin = (vec) -87.33654475f;
409 LnMax = (vec) 88.72283905f;
410 Max = (vec) 3.402823466E+38f;
411 C0 = (vec) 0.166668549286041f;
412 c1 = (vec) 0.500016170012920f;
413 c2 = (vec) 0.999999998618401f;
414 P1 = (vec) 0.04331970214844f;
415 P2 = (vec) 1.99663646e-6f;
416 mask = (c7x::uint_vec) 0x3u;
417 inVec_min = (c7x::uint_vec) 114u;
420 for (
size_t i = 0; i < numBlocks; i++) {
421 vec inVec = c7x::strm_eng<0, vec>::get_adv();
427 vec pol_vec, R, R2_vec, R3_vec, outVec, Nf, absNf;
428 c7x::uint_vec J, K, uN, dTAdjusted_32_63, dT_32_63, dT_0_31, inVec_small, upperBitsK, lowerBitsK, upperBitsJ,
430 c7x::int_vec N_vec, minusN;
431 c7x::double_vec KVals_8_15, KVals_0_7, JVals_8_15, JVals_0_7, dTVals_8_15, dTVals_0_7, pol_0_7, pol_8_15,
432 outVec_0_7, outVec_8_15;
435 Nf = inVec * log2_base_x16;
437 N_vec = c7x::convert<c7x::int_vec>(absNf);
443 __vpred cmp_N = __cmp_lt_pred(Nf, negativeHalf);
444 N_vec = __select(cmp_N, minusN, N_vec);
451 R = (inVec - (P1 * __int_to_float(N_vec))) - (P2 * __int_to_float(N_vec));
456 pol_vec = (R * c2) + ((R3_vec * C0) + (R2_vec * c1));
463 uN = c7x::convert<c7x::uint_vec>(N_vec);
473 KVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
474 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
475 KVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
476 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
477 JVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
478 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
479 JVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
480 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
483 dTVals_8_15 = KVals_8_15 * JVals_8_15;
484 dTVals_0_7 = KVals_0_7 * JVals_0_7;
491 dT_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(
492 MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
493 dT_0_31 = c7x::reinterpret<c7x::uint_vec>(__permute_even_even_int(
494 MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
496 uN = (uN >> 4) << 20;
497 dTAdjusted_32_63 = dT_32_63 + uN;
500 dTVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
501 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
502 dTVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
503 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
505 pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
506 c7x::as_uchar_vec(__high_float_to_double(pol_vec)),
507 c7x::as_uchar_vec(__low_float_to_double(pol_vec))));
508 pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
509 c7x::as_uchar_vec(__high_float_to_double(pol_vec)),
510 c7x::as_uchar_vec(__low_float_to_double(pol_vec))));
512 outVec_0_7 = dTVals_0_7 * (1.0f + pol_0_7);
513 outVec_8_15 = dTVals_8_15 * (1.0f + pol_8_15);
515 outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
516 c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
517 c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
523 inVec_small = ((c7x::as_uint_vec(inVec) << 1u) >> 24u);
524 __vpred cmp_inVec = __cmp_gt_pred(inVec_min, inVec_small);
525 outVec = __select(cmp_inVec, 1.0f + inVec, outVec);
531 __vpred cmp_min = __cmp_lt_pred(inVec, LnMin);
532 outVec = __select(cmp_min, Zero, outVec);
538 __vpred cmpMax = __cmp_lt_pred(LnMax, inVec);
539 outVec = __select(cmpMax, Max, outVec);
542 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
543 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
544 __vstore_pred(tmp, addr, outVec);
553 size_t numBlocks = 0;
554 size_t remNumBlocks = 0;
557 typedef typename c7x::make_full_vector<float>::type vec;
559 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
560 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
563 numBlocks = length / c7x::element_count_of<vec>::value;
564 remNumBlocks = length % c7x::element_count_of<vec>::value;
579 zero_vec = (vec) 0.0f;
583 for (
size_t i = 0; i < numBlocks; i++) {
584 vec inVec0 = c7x::strm_eng<0, vec>::get_adv();
585 vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
590 vec inVec_log, inVec_exp;
595 inVec_log = __abs(inVec0);
597 __vpred cmp_one = __cmp_eq_pred(inVec_log, one);
598 inVec_exp = __select(cmp_one, zero_vec, inVec_exp);
601 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
602 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
603 __vstore_pred(tmp, addr, inVec_exp);
611 static inline c7x::double_vec
cmn_DIVDP_opt(c7x::double_vec a, c7x::double_vec b)
614 c7x::double_vec Two = (c7x::double_vec)(2.0f);
618 X = X * (Two - (b * X));
619 X = X * (Two - (b * X));
620 X = X * (Two - (b * X));
626 template <
typename T>
627 static inline void MATHLIB_pow_log_opt(
size_t length, T *restrict pSrc0, T *restrict pSrc1, T *restrict pDst);
634 size_t numBlocks = 0;
635 size_t remNumBlocks = 0;
638 typedef typename c7x::make_full_vector<double>::type vec;
640 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
641 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
646 numBlocks = length / c7x::element_count_of<vec>::value;
647 remNumBlocks = length % c7x::element_count_of<vec>::value;
655 vec Half, MAXe, srHalf, Half_sq, MINe, a0, a1, a2, b0, b1, b2, c1_vec, c2_vec, W, X, Y, Z, zn, zd, Rz, Sa, Bd, Cn, Da;
658 Half_sq = (vec) 0.5 * 0.5;
659 MAXe = (vec) 1.7976931348623157e+308;
660 srHalf = (vec) 0.70710678118654752440;
661 MINe = (vec) 2.2250738585072014e-308;
662 a0 = (vec) -0.64124943423745581147e+2;
663 a1 = (vec) 0.16383943563021534222e+2;
664 a2 = (vec) -0.78956112887491257267e+0;
665 b0 = (vec) -0.76949932108494879777e+3;
666 b1 = (vec) 0.31203222091924532844e+3;
667 b2 = (vec) -0.35667977739034646171e+2;
668 c1_vec = (vec) 0.693359375;
669 c2_vec = (vec) -2.121944400546905827679e-4;
671 c7x::long_vec long_zero_vec = (c7x::long_vec) 0;
672 vec double_zero_vec = (vec) 0.0;
673 vec outMAX = (vec)(308.254715974092);
675 for (
size_t i = 0; i < numBlocks; i++) {
677 vec a = c7x::strm_eng<0, vec>::get_adv();
679 c7x::long_vec exp_ = c7x::as_long_vec((c7x::as_ulong_vec(Y) << 1) >> 53);
681 c7x::ulong_vec upper = c7x::as_ulong_vec(Y) & (0x000FFFFF00000000u);
682 upper = 0x3FE0000000000000u | upper;
684 Z = c7x::as_double_vec((0x00000000FFFFFFFFu & c7x::as_ulong_vec(Y)) | upper);
686 __vpred cmp1 = __cmp_eq_pred(exp_, long_zero_vec);
687 Z = __select(cmp1, double_zero_vec, Z);
689 vec z_minus_half = Z - Half;
690 vec z_mul_half = (Z * Half) + Half;
691 __vpred cmp2 = __cmp_lt_pred(srHalf, Z);
692 zn = __select(cmp2, (z_minus_half - Half), z_minus_half);
693 zd = __select(cmp2, z_mul_half, (z_mul_half - Half_sq));
696 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
697 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
698 __vstore_pred(tmp, addr, X);
704 for (
size_t i = 0; i < numBlocks; i++) {
706 X = c7x::strm_eng<0, vec>::get_adv();
709 Bd = ((((W + b2) * W) + b1) * W) + b0;
710 Cn = (((W * a2) + a1) * W) + a0;
713 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
714 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
715 __vstore_pred(tmp, addr, Sa);
722 __SE1_OPEN(pDst, se0Params);
723 for (
size_t i = 0; i < numBlocks; i++) {
725 vec a = c7x::strm_eng<0, vec>::get_adv();
726 Sa = c7x::strm_eng<1, vec>::get_adv();
729 c7x::long_vec exp_ = c7x::as_long_vec((c7x::as_ulong_vec(Y) << 1) >> 53);
730 c7x::long_vec N_vec = exp_ - 1022;
732 c7x::ulong_vec upper = c7x::as_ulong_vec(Y) & (0x000FFFFF00000000u);
733 upper = 0x3FE0000000000000u | upper;
735 Z = c7x::as_double_vec((0x00000000FFFFFFFFu & c7x::as_ulong_vec(Y)) | upper);
737 __vpred cmp1 = __cmp_eq_pred(exp_, long_zero_vec);
738 Z = __select(cmp1, double_zero_vec, Z);
740 __vpred cmp2 = __cmp_lt_pred(srHalf, Z);
742 N_vec = __select(cmp2, N_vec, (N_vec - 1));
744 Cn = __low_int_to_double(c7x::as_int_vec(N_vec));
745 Da = ((Cn * c2_vec) + Sa) + (Cn * c1_vec);
754 __vpred cmp_min_vec = __cmp_lt_pred(Y, MINe);
755 Da = __select(cmp_min_vec, -MAXe, Da);
760 __vpred cmp_max_vec = __cmp_lt_pred(MAXe, Y);
761 Da = __select(cmp_max_vec, outMAX, Da);
763 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
764 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
765 __vstore_pred(tmp, addr, Da);
777 size_t numBlocks = 0;
778 size_t remNumBlocks = 0;
781 typedef typename c7x::make_full_vector<double>::type vec;
783 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
784 __SE_TEMPLATE_v1 se1Params = __gen_SE_TEMPLATE_v1();
785 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
788 numBlocks = length / c7x::element_count_of<vec>::value;
789 remNumBlocks = length % c7x::element_count_of<vec>::value;
794 __SE_ELETYPE SE_ELETYPE;
795 __SE_VECLEN SE_VECLEN;
796 __SA_VECLEN SA_VECLEN;
799 SE_VECLEN = c7x::se_veclen<vec>::value;
800 SA_VECLEN = c7x::sa_veclen<vec>::value;
801 SE_ELETYPE = c7x::se_eletype<vec>::value;
803 se0Params.ICNT0 = length;
804 se0Params.ELETYPE = SE_ELETYPE;
805 se0Params.VECLEN = SE_VECLEN;
806 se0Params.DIMFMT = __SE_DIMFMT_1D;
808 se1Params.DIMFMT = __SE_DIMFMT_3D;
809 se1Params.ICNT0 = c7x::element_count_of<vec>::value;
810 se1Params.ELETYPE = SE_ELETYPE;
811 se1Params.VECLEN = SE_VECLEN;
813 se1Params.ICNT2 = numBlocks;
814 se1Params.DIM2 = c7x::element_count_of<vec>::value;
815 se1Params.DECDIM1 = __SE_DECDIM_DIM2;
816 se1Params.DECDIM1_WIDTH = length;
817 se1Params.DIM1 = ((
double *) pSrc1) - ((
double *) pSrc0);
819 sa0Params.ICNT0 = length;
820 sa0Params.DIM1 = length;
821 sa0Params.VECLEN = SA_VECLEN;
822 sa0Params.DIMFMT = __SA_DIMFMT_1D;
824 __SE0_OPEN(pDst, se0Params);
825 __SE1_OPEN(pSrc0, se1Params);
826 __SA0_OPEN(sa0Params);
828 vec zeroVec, one, X2;
830 zeroVec = (vec) 0.0f;
832 c7x::long_vec zero_int = (c7x::long_vec)(0);
833 c7x::long_vec one_int = (c7x::long_vec)(1);
834 c7x::int_vec Sign = (c7x::int_vec)(1);
835 c7x::int_vec negate_Sign = (c7x::int_vec)(-1);
837 c7x::long_vec valueL1 = (c7x::long_vec)(0X7fffffffffffffff);
838 c7x::long_vec valueL2 = (c7x::long_vec)(0X7ff0000000000000);
840 for (
size_t i = 0; i < numBlocks; i++) {
841 vec inVec0 = c7x::strm_eng<1, vec>::get_adv();
842 vec inVec1 = c7x::strm_eng<1, vec>::get_adv();
843 vec W = c7x::strm_eng<0, vec>::get_adv();
845 c7x::int_vec y = __double_to_int(inVec1);
847 __vpred cmp_cond1 = __cmp_lt_pred(inVec0, zeroVec);
848 __vpred cmp_cond2 = __cmp_eq_pred(inVec1, __low_int_to_double(y));
850 cmp_cond2 = __and(cmp_cond2, cmp_cond1);
852 c7x::long_vec y_long = c7x::as_long_vec(y);
853 __vpred cmp_cond3 = __cmp_eq_pred((y_long & one_int), zero_int);
855 cmp_cond3 = __and(cmp_cond2, __negate(cmp_cond3));
857 Sign = __select(cmp_cond3, negate_Sign, Sign);
859 X2 = __low_int_to_double(Sign) * W;
861 __vpred cmp_cond5 = __cmp_eq_pred(inVec1, __low_int_to_double(y));
862 cmp_cond5 = __and(cmp_cond1, __negate(cmp_cond5));
863 X2 = __select(cmp_cond5, c7x::as_double_vec(valueL1), X2);
865 __vpred cmp_cond6 = __cmp_eq_pred(inVec0, zeroVec);
866 __vpred cmp_cond7 = __cmp_lt_pred(inVec1, zeroVec);
867 cmp_cond7 = __and(cmp_cond6, cmp_cond7);
868 X2 = __select(cmp_cond7, c7x::as_double_vec(valueL2), X2);
871 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
872 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
873 __vstore_pred(tmp, addr, X2);
881 __SA_TEMPLATE_v1 *restrict sa0Params,
883 double *restrict pSrc1,
884 double *restrict pDst)
889 size_t numBlocks = 0;
890 size_t remNumBlocks = 0;
893 typedef typename c7x::make_full_vector<double>::type vec;
896 numBlocks = length / c7x::element_count_of<vec>::value;
897 remNumBlocks = length % c7x::element_count_of<vec>::value;
905 __SE1_OPEN(pSrc1, *se0Params);
907 vec Halfe, Maxe, LnMaxe, LnMine, a0e, a1e, a2e, b0e, b1e, b2e, c1e, C2e, L2e, Zero;
910 Maxe = (vec) 1.7976931348623157e+308;
911 LnMaxe = (vec) 709.78271289338;
912 LnMine = (vec) -708.3964185322641;
913 a0e = (vec) 0.249999999999999993;
914 a1e = (vec) 0.694360001511792852e-2;
915 a2e = (vec) 0.165203300268279130e-4;
917 b1e = (vec) 0.555538666969001188e-1;
918 b2e = (vec) 0.495862884905441294e-3;
919 c1e = (vec) 0.693359375;
920 C2e = (vec) -2.1219444005469058277e-4;
921 L2e = (vec) 1.4426950408889634074;
924 vec Ye, Xe, We, Re, Se, Be, Ce, De;
927 for (
size_t i = 0; i < numBlocks; i++) {
928 vec invec = c7x::strm_eng<0, vec>::get_adv();
929 vec b = c7x::strm_eng<1, vec>::get_adv();
934 c7x::int_vec Ne = __double_to_int(Ce);
936 Se = __low_int_to_double(Ne);
937 Xe = (Ye - (Se * c1e)) - (Se * C2e);
940 Be = (((b2e * We) + b1e) * We) + b0e;
941 De = ((((a2e * We) + a1e) * We) + a0e) * Xe;
948 c7x::int_vec upper = 1024 + (Ne);
949 upper = c7x::as_int_vec((c7x::as_uint_vec(upper) << 20));
950 Se = c7x::as_double_vec(c7x::as_long_vec(upper) << 32);
954 __vpred cmp_Max = __cmp_lt_pred(LnMaxe, invec);
955 Ce = __select(__negate(cmp_Max), Ce, Maxe);
957 __vpred cmp_Min = __cmp_lt_pred(invec, LnMine);
958 Ce = __select(cmp_Min, Zero, Ce);
961 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
962 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
963 __vstore_pred(tmp, addr, Ce);
975 size_t numBlocks = 0;
976 size_t remNumBlocks = 0;
979 typedef typename c7x::make_full_vector<double>::type vec;
981 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
982 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
985 numBlocks = length / c7x::element_count_of<vec>::value;
986 remNumBlocks = length % c7x::element_count_of<vec>::value;
1013 pDst[0] = MATHLIB_pow_scalar_ci<T>(pSrc0[0], pSrc1[0]);
1016 MATHLIB_pow_vector<T>(length, pSrc0, pSrc1, pDst);
1031 MATHLIB_pow<float>(
size_t length,
float *restrict pSrc0,
float *restrict pSrc1,
float *restrict pDst);
1035 MATHLIB_pow<double>(
size_t length,
double *restrict pSrc0,
double *restrict pSrc1,
double *restrict pDst);
template MATHLIB_STATUS MATHLIB_pow< double >(size_t length, double *restrict pSrc0, double *restrict pSrc1, double *restrict pDst)
static c7x::double_vec cmn_DIVDP_opt(c7x::double_vec a, c7x::double_vec b)
MATHLIB_STATUS MATHLIB_pow_dp(size_t length, double *restrict pSrc0, double *restrict pSrc1, double *restrict pDst)
MATHLIB_STATUS MATHLIB_pow_sp(size_t length, float *restrict pSrc0, float *restrict pSrc1, float *restrict pDst)
MATHLIB_STATUS MATHLIB_pow_cond< double >(size_t length, double *restrict pSrc0, double *restrict pSrc1, double *restrict pDst)
static MATHLIB_STATUS MATHLIB_pow_exp(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, size_t length, T *restrict pSrc0, T *restrict pDst)
MATHLIB_STATUS MATHLIB_pow_cond< float >(size_t length, float *restrict pSrc0, float *restrict pSrc1, float *restrict pDst)
static vecType MATHLIB_pow_log(vecType inVec)
static void MATHLIB_pow_vector(size_t length, T *restrict pSrc0, T *restrict pSrc1, T *restrict pDst)
static void MATHLIB_pow_log_opt(size_t length, T *restrict pSrc0, T *restrict pSrc1, T *restrict pDst)
void MATHLIB_pow_vector< double >(size_t length, double *restrict pSrc0, double *restrict pSrc1, double *restrict pDst)
MATHLIB_STATUS MATHLIB_pow_exp< double >(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, size_t length, double *restrict pSrc1, double *restrict pDst)
void MATHLIB_pow_log_opt< double >(size_t length, double *restrict pSrc0, double *restrict pSrc1, double *restrict pDst)
static MATHLIB_STATUS MATHLIB_pow_cond(size_t length, T *restrict pSrc0, T *restrict pSrc1, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_pow< float >(size_t length, float *restrict pSrc0, float *restrict pSrc1, float *restrict pDst)
void MATHLIB_pow_vector< float >(size_t length, float *restrict pSrc0, float *restrict pSrc1, float *restrict pDst)
MATHLIB_STATUS MATHLIB_pow_exp< float >(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, size_t length, float *restrict pSrc0, float *restrict pDst)
MATHLIB_STATUS MATHLIB_pow(size_t length, T *restrict pSrc0, T *restrict pSrc1, T *restrict pDst)
#define MATHLIB_KTABLE_OFFSET
#define MATHLIB_LOGTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
#define MATHLIB_JTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.