34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 #define ELEMENT_TYPE(x) typename c7x::element_type_of<x>::type
63 size_t remNumBlocks = 0;
66 typedef typename c7x::make_full_vector<c7x::float_vec>::type vec;
71 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
72 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
79 numBlocks = length / c7x::element_count_of<vec>::value;
80 remNumBlocks = length % c7x::element_count_of<vec>::value;
92 vec half, OneP5, zero, maxValue;
97 maxValue = (vec) std::numeric_limits<elemType>::max();
100 for (
size_t i = 0; i < numBlocks; i++) {
101 vec inVec = c7x::strm_eng<0, vec>::get_adv();
114 p0 = __recip_sqrt(inVec);
116 p1 = OneP5 - d0 * p0 * half;
124 __vpred cmp_lezero = __cmp_le_pred(inVec, zero);
125 y = __select(cmp_lezero, zero, y);
128 __vpred cmp_gtmax = __cmp_le_pred(maxValue, inVec);
129 vec outVec = __select(cmp_gtmax, maxValue, y);
131 __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
132 vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
133 __vstore_pred(tmp, addr, outVec);
143 size_t numBlocks = 0;
144 size_t remNumBlocks = 0;
147 typedef typename c7x::make_full_vector<c7x::double_vec>::type vec;
152 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
153 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
158 numBlocks = length / c7x::element_count_of<vec>::value;
159 remNumBlocks = length % c7x::element_count_of<vec>::value;
171 vec half, OneP5, zero, maxValue;
176 maxValue = (vec) std::numeric_limits<elemType>::max();
179 for (
size_t i = 0; i < numBlocks; i++) {
181 vec invec = c7x::strm_eng<0, vec>::get_adv();
183 vec x = __recip_sqrt(invec);
185 x = x * (OneP5 - (invec * x * x * half));
186 x = x * (OneP5 - (invec * x * x * half));
187 x = x * (OneP5 - (invec * x * x * half));
191 __vpred cond1 = __cmp_le_pred(invec, zero);
192 y = __select(cond1, zero, y);
194 __vpred cond2 = __cmp_lt_pred(maxValue, invec);
195 y = __select(cond2, maxValue, y);
197 __vpred temp = c7x::strm_agen<0, vec>::get_vpred();
198 vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
199 __vstore_pred(temp, addr, y);
215 pDst[0] = MATHLIB_sqrt_scalar_ci<T>(pSrc[0]);
218 MATHLIB_sqrt_vector<T>(length, pSrc, pDst);
template MATHLIB_STATUS MATHLIB_sqrt< double >(size_t length, double *pSrc, double *pDst)
static void MATHLIB_sqrt_vector(size_t length, T *pSrc, T *pDst)
template MATHLIB_STATUS MATHLIB_sqrt< float >(size_t length, float *pSrc, float *pDst)
void MATHLIB_sqrt_vector< double >(size_t length, double *pSrc, double *pDst)
void MATHLIB_sqrt_vector< float >(size_t length, float *pSrc, float *pDst)
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS MATHLIB_sqrt_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_sqrt. Function accepts float pointers.
MATHLIB_STATUS MATHLIB_sqrt_dp(size_t length, double *pSrc, double *pDst)
This function is the C interface for MATHLIB_sqrt. Function accepts double pointers.
MATHLIB_STATUS MATHLIB_sqrt(size_t length, T *pSrc, T *pDst)
Performs the elementwise square root of an input vectors. Function can be overloaded with float and d...
MATHLIB_STATUS_NAME
The enumeration of all status codes.