34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
56 static inline c7x::double_vec
cmn_DIVDP_opt(c7x::double_vec a, c7x::double_vec b)
59 c7x::double_vec Two = (c7x::double_vec)(2.0f);
62 X = X * (Two - (b * X));
63 X = X * (Two - (b * X));
64 X = X * (Two - (b * X));
76 size_t remNumBlocks = 0;
79 typedef typename c7x::make_full_vector<double>::type vec;
81 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
82 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
87 numBlocks = length / c7x::element_count_of<vec>::value;
88 remNumBlocks = length % c7x::element_count_of<vec>::value;
100 vec Half, MAXe, srHalf, Half_sq, MINe, a0, a1, a2, b0, b1, b2, c1, c2, c10e, W, X, Y, Z, zn, zd, Rz, Sa, Bd, Cn, Da;
103 Half_sq = (vec) 0.5 * 0.5;
104 MAXe = (vec) 1.7976931348623157e+308;
105 srHalf = (vec) 0.70710678118654752440;
106 MINe = (vec) 2.2250738585072014e-308;
107 a0 = (vec) -0.64124943423745581147e+2;
108 a1 = (vec) 0.16383943563021534222e+2;
109 a2 = (vec) -0.78956112887491257267e+0;
110 b0 = (vec) -0.76949932108494879777e+3;
111 b1 = (vec) 0.31203222091924532844e+3;
112 b2 = (vec) -0.35667977739034646171e+2;
113 c1 = (vec) 0.693359375;
114 c2 = (vec) -2.121944400546905827679e-4;
115 c10e = (vec) 0.43429448190325182765;
117 c7x::long_vec long_zero_vec = (c7x::long_vec) 0;
118 vec double_zero_vec = (vec) 0.0;
119 vec outMAX = (vec)(308.254715974092);
122 for (
size_t i = 0; i < numBlocks; i++) {
124 vec a = c7x::strm_eng<0, vec>::get_adv();
127 c7x::long_vec exp_ = c7x::as_long_vec((c7x::as_ulong_vec(Y) << 1) >> 53);
129 c7x::ulong_vec upper = c7x::as_ulong_vec(Y) & (0x000FFFFF00000000u);
130 upper = 0x3FE0000000000000u | upper;
132 Z = c7x::as_double_vec((0x00000000FFFFFFFFu & c7x::as_ulong_vec(Y)) | upper);
134 __vpred cmp1 = __cmp_eq_pred(exp_, long_zero_vec);
135 Z = __select(cmp1, double_zero_vec, Z);
137 vec z_minus_half = Z - Half;
138 vec z_mul_half = (Z * Half) + Half;
139 __vpred cmp2 = __cmp_lt_pred(srHalf, Z);
140 zn = __select(cmp2, (z_minus_half - Half), z_minus_half);
141 zd = __select(cmp2, z_mul_half, (z_mul_half - Half_sq));
145 Bd = ((((W + b2) * W) + b1) * W) + b0;
146 Cn = (((W * a2) + a1) * W) + a0;
150 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
151 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
152 __vstore_pred(tmp, addr, Sa);
158 __SE1_OPEN(pDst, se0Params);
160 for (
size_t i = 0; i < numBlocks; i++) {
162 vec a = c7x::strm_eng<0, vec>::get_adv();
163 Sa = c7x::strm_eng<1, vec>::get_adv();
166 c7x::long_vec exp_ = c7x::as_long_vec((c7x::as_ulong_vec(Y) << 1) >> 53);
167 c7x::long_vec N = exp_ - 1022;
169 c7x::ulong_vec upper = c7x::as_ulong_vec(Y) & (0x000FFFFF00000000u);
170 upper = 0x3FE0000000000000u | upper;
172 Z = c7x::as_double_vec((0x00000000FFFFFFFFu & c7x::as_ulong_vec(Y)) | upper);
174 __vpred cmp1 = __cmp_eq_pred(exp_, long_zero_vec);
175 Z = __select(cmp1, double_zero_vec, Z);
177 __vpred cmp2 = __cmp_lt_pred(srHalf, Z);
179 N = __select(cmp2, N, (N - 1));
181 Cn = __low_int_to_double(c7x::as_int_vec(N));
182 Da = ((Cn * c2) + Sa) + (Cn * c1);
192 __vpred cmp_min = __cmp_lt_pred(Y, MINe);
193 Da = __select(cmp_min, -MAXe, Da);
198 __vpred cmp_max = __cmp_lt_pred(MAXe, Y);
199 Da = __select(cmp_max, outMAX, Da);
201 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
202 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
203 __vstore_pred(tmp, addr, Da);
214 size_t numBlocks = 0;
215 size_t remNumBlocks = 0;
218 typedef typename c7x::make_full_vector<double>::type vec;
220 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
221 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
226 numBlocks = length / c7x::element_count_of<vec>::value;
227 remNumBlocks = length % c7x::element_count_of<vec>::value;
239 vec Half, MAXe, srHalf, Half_sq, MINe, a0, a1, a2, b0, b1, b2, c1, c2, c10e, W, X, Y, Z, zn, zd, Rz, Sa, Bd, Cn, Da;
242 Half_sq = (vec) 0.5 * 0.5;
243 MAXe = (vec) 1.7976931348623157e+308;
244 srHalf = (vec) 0.70710678118654752440;
245 MINe = (vec) 2.2250738585072014e-308;
246 a0 = (vec) -0.64124943423745581147e+2;
247 a1 = (vec) 0.16383943563021534222e+2;
248 a2 = (vec) -0.78956112887491257267e+0;
249 b0 = (vec) -0.76949932108494879777e+3;
250 b1 = (vec) 0.31203222091924532844e+3;
251 b2 = (vec) -0.35667977739034646171e+2;
252 c1 = (vec) 0.693359375;
253 c2 = (vec) -2.121944400546905827679e-4;
254 c10e = (vec) 0.43429448190325182765;
256 c7x::long_vec long_zero_vec = (c7x::long_vec) 0;
257 vec double_zero_vec = (vec) 0.0;
258 vec outMAX = (vec)(308.254715974092);
261 for (
size_t i = 0; i < numBlocks; i++) {
263 vec a = c7x::strm_eng<0, vec>::get_adv();
266 c7x::long_vec exp_ = c7x::as_long_vec((c7x::as_ulong_vec(Y) << 1) >> 53);
268 c7x::ulong_vec upper = c7x::as_ulong_vec(Y) & (0x000FFFFF00000000u);
269 upper = 0x3FE0000000000000u | upper;
271 Z = c7x::as_double_vec((0x00000000FFFFFFFFu & c7x::as_ulong_vec(Y)) | upper);
273 __vpred cmp1 = __cmp_eq_pred(exp_, long_zero_vec);
274 Z = __select(cmp1, double_zero_vec, Z);
276 vec z_minus_half = Z - Half;
277 vec z_mul_half = (Z * Half) + Half;
278 __vpred cmp2 = __cmp_lt_pred(srHalf, Z);
279 zn = __select(cmp2, (z_minus_half - Half), z_minus_half);
280 zd = __select(cmp2, z_mul_half, (z_mul_half - Half_sq));
284 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
285 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
286 __vstore_pred(tmp, addr, X);
293 for (
size_t i = 0; i < numBlocks; i++) {
295 X = c7x::strm_eng<0, vec>::get_adv();
298 Bd = ((((W + b2) * W) + b1) * W) + b0;
299 Cn = (((W * a2) + a1) * W) + a0;
303 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
304 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
305 __vstore_pred(tmp, addr, Sa);
312 __SE1_OPEN(pDst, se0Params);
314 for (
size_t i = 0; i < numBlocks; i++) {
316 vec a = c7x::strm_eng<0, vec>::get_adv();
317 Sa = c7x::strm_eng<1, vec>::get_adv();
320 c7x::long_vec exp_ = c7x::as_long_vec((c7x::as_ulong_vec(Y) << 1) >> 53);
321 c7x::long_vec N = exp_ - 1022;
323 c7x::ulong_vec upper = c7x::as_ulong_vec(Y) & (0x000FFFFF00000000u);
324 upper = 0x3FE0000000000000u | upper;
326 Z = c7x::as_double_vec((0x00000000FFFFFFFFu & c7x::as_ulong_vec(Y)) | upper);
328 __vpred cmp1 = __cmp_eq_pred(exp_, long_zero_vec);
329 Z = __select(cmp1, double_zero_vec, Z);
331 __vpred cmp2 = __cmp_lt_pred(srHalf, Z);
333 N = __select(cmp2, N, (N - 1));
335 Cn = __low_int_to_double(c7x::as_int_vec(N));
336 Da = ((Cn * c2) + Sa) + (Cn * c1);
346 __vpred cmp_min = __cmp_lt_pred(Y, MINe);
347 Da = __select(cmp_min, -MAXe, Da);
352 __vpred cmp_max = __cmp_lt_pred(MAXe, Y);
353 Da = __select(cmp_max, outMAX, Da);
355 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
356 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
357 __vstore_pred(tmp, addr, Da);
372 size_t numBlocks = 0;
373 size_t remNumBlocks = 0;
376 typedef typename c7x::make_full_vector<float>::type vec;
378 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
379 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
384 numBlocks = length / c7x::element_count_of<vec>::value;
385 remNumBlocks = length % c7x::element_count_of<vec>::value;
397 vec C1, C2, C3, C4, C5, eMax, outVecMin, outVecMax;
398 c7x::double_vec ln2, base;
400 zero = (c7x::uint_vec) 0;
402 ln2 = (c7x::double_vec) 0.693147180559945;
403 base = (c7x::double_vec) 0.4342944819033f;
404 C1 = (vec) -0.2302894f;
405 C2 = (vec) 0.1908169f;
406 C3 = (vec) -0.2505905f;
407 C4 = (vec) 0.3333164f;
408 C5 = (vec) -0.5000002f;
409 eMax = (vec) 3.402823466e+38f;
410 outVecMin = (vec) 0xFF800000u;
411 outVecMax = (vec) 308.2547f;
414 for (
size_t i = 0; i < numBlocks; i++) {
415 vec inVec = c7x::strm_eng<0, vec>::get_adv();
420 vec pol, r1, r2, r3, r4;
421 c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
422 inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
423 TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, outVec_8_15, outVec_0_7;
424 c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT;
433 inVecVals_odd = __high_float_to_double(inVec);
434 inVecVals_even = __low_float_to_double(inVec);
435 inVecVals_oddReciprocal = __recip(inVecVals_odd);
436 inVecVals_evenReciprocal = __recip(inVecVals_even);
440 inVecReciprocal_32_63 = c7x::reinterpret<c7x::uint_vec>(
441 __permute_odd_odd_int(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecVals_oddReciprocal),
442 c7x::as_uchar_vec(inVecVals_evenReciprocal)));
445 inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & 0xFFFE0000u;
448 inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
449 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
450 inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
451 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
454 inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
455 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
456 inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
457 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
460 rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
461 rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
464 r1 = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
465 c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
466 c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
472 pol = (C5 * r2) + ((C4 * r3) + ((((C2 * r1) + C3) + (C1 * r2)) * r4));
473 pol = pol * __double_to_float(base);
480 inVecReciprocalApprox_32_63 = c7x::reinterpret<c7x::uint_vec>(
481 __permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
482 c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
484 N = c7x::convert<c7x::int_vec>(((inVecReciprocalApprox_32_63 << 1) >> 21) - 1023);
487 NVals_odd = __high_int_to_double(N);
488 NVals_even = __low_int_to_double(N);
489 NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
490 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
491 NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
492 MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
506 TVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
507 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
508 TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
509 MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
512 TVals_8_15 = (TVals_8_15 - (ln2 * NVals_8_15)) * base;
513 TVals_0_7 = (TVals_0_7 - (ln2 * NVals_0_7)) * base;
521 outVec_0_7 = (rVals_0_7 * base) + TVals_0_7;
522 outVec_8_15 = (rVals_8_15 * base) + TVals_8_15;
525 outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
526 c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
527 c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
528 outVec = outVec + pol;
537 __vpred cmp_min = __cmp_le_pred(inVec, c7x::convert<vec>(zero));
538 outVec = __select(cmp_min, outVecMin, outVec);
543 __vpred cmp_max = __cmp_lt_pred(eMax, inVec);
544 outVec = __select(cmp_max, outVecMax, outVec);
546 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
547 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
548 __vstore_pred(tmp, addr, outVec);
577 pDst[0] = MATHLIB_log10_scalar_ci<T>(pSrc[0]);
580 MATHLIB_log10_vector<T>(length, pSrc, pDst);
static c7x::double_vec cmn_DIVDP_opt(c7x::double_vec a, c7x::double_vec b)
void MATHLIB_log10_vector< double >(size_t length, double *restrict pSrc, double *restrict pDst)
static void MATHLIB_log10_vector_split2(size_t length, double *restrict pSrc, double *restrict pDst)
void MATHLIB_log10_vector< float >(size_t length, float *restrict pSrc, float *restrict pDst)
MATHLIB_STATUS MATHLIB_log10(size_t length, T *restrict pSrc, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_log10< float >(size_t length, float *pSrc, float *pDst)
static void MATHLIB_log10_vector_split1(size_t length, double *restrict pSrc, double *restrict pDst)
static void MATHLIB_log10_vector(size_t length, T *pSrc, T *pDst)
template MATHLIB_STATUS MATHLIB_log10< double >(size_t length, double *pSrc, double *pDst)
MATHLIB_STATUS MATHLIB_log10_dp(size_t length, double *pSrc, double *pDst)
This function is the C interface for MATHLIB_log10. Function accepts double pointers.
MATHLIB_STATUS MATHLIB_log10_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_log10. Function accepts float pointers.
#define MATHLIB_LOGTABLE_OFFSET
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.