36 #define DSP_INTRINSICS 39 #define DSP_INTRINSIC_FOR_RECIPORCAL 43 #define VLIB_F32 float 44 #define VLIB_D64 double 46 #define VLIB_OneByXF32 my_OneByX 47 #define VLIB_OneByX1X0F32 my_OneByX1X0 48 #define VLIB_SqrtXF32 my_SqrtX 51 #define __mmax(x,y) ((x>y)?x:y) 55 #define __mmin(x,y) ((x>y)?y:x) 59 #define SWAP_ME(X,Y) {temp=(X);(X)=(Y);(Y)=temp;} 62 #define SIGN(a, b) ((b) >= 0.0 ? _fabsf(a) : -_fabsf(a)) 77 #define SFM_TI_PI (3.14159265358979323846f) 81 #ifdef DSP_INTRINSIC_FOR_RECIPORCAL 99 if((x == 0.0f) | (x == -0.0f))
113 #ifdef DSP_INTRINSIC_FOR_RECIPORCAL 116 normInv = __recip_sqrt(x);
117 val = normInv*(3.0f-x*normInv*normInv)*0.5f;
118 normInv = val*(3.0f-x*val*val)*0.5f;
120 @TODO enable boundry check
125 return((1.0/sqrtf(x)));
134 #ifdef DSP_INTRINSIC_FOR_RECIPORCAL 135 float16 f2Pkd = (float16)2.0f;
139 vX0 = vX0 * (f2Pkd - vX0 * vX);
141 vX0 = vX0 * (f2Pkd - vX0 * vX);
150 vp = __cmp_le_pred((float16)FLT_MAX, vY);
152 vX0 = __select(vp,(float16)FLT_MIN,vX0);
156 vp = __cmp_le_pred(vY, (float16)FLT_MIN);
158 vX0 = __select(vp,(float16)FLT_MAX,vX0);
161 vX0.s0 = 1.0f/vX.s0; vX0.s1 = 1.0f/vX.s1; vX0.s2 = 1.0f/vX.s2; vX0.s3 = 1.0f/vX.s3;
162 vX0.s4 = 1.0f/vX.s4; vX0.s5 = 1.0f/vX.s5; vX0.s6 = 1.0f/vX.s6; vX0.s7 = 1.0f/vX.s7;
163 vX0.s8 = 1.0f/vX.s8; vX0.s9 = 1.0f/vX.s9; vX0.sa = 1.0f/vX.sa; vX0.sb = 1.0f/vX.sb;
164 vX0.sc = 1.0f/vX.sc; vX0.sd = 1.0f/vX.sd; vX0.se = 1.0f/vX.se; vX0.sf = 1.0f/vX.sf;
171 float16 f3Pkd = (float16)3.0f;
172 #ifdef DSP_INTRINSIC_FOR_RECIPORCAL 173 vX0 = __recip_sqrt(vX);
175 vX0 = vX0 * (f3Pkd - vX * vX0 * vX0) * (float16)0.5f;
176 vX0 = vX0 * (f3Pkd - vX * vX0 * vX0) * (float16)0.5f;
186 vp = __cmp_le_pred((float16)FLT_MAX, vY);
188 vX0 = __select(vp,(float16)FLT_MIN,vX0);
192 vp = __cmp_le_pred(vY, (float16)FLT_MIN);
194 vX0 = __select(vp,(float16)FLT_MAX,vX0);
200 vp = __cmp_le_pred(vX, (float16)0.0f);
202 vX0 = __select(vp,(float16)FLT_MIN,vX0);
206 vX0.s0 = 1.0f/sqrt(vX.s0); vX0.s1 = 1.0f/sqrt(vX.s1); vX0.s2 = 1.0f/sqrt(vX.s2); vX0.s3 = 1.0f/sqrt(vX.s3);
207 vX0.s4 = 1.0f/sqrt(vX.s4); vX0.s5 = 1.0f/sqrt(vX.s5); vX0.s6 = 1.0f/sqrt(vX.s6); vX0.s7 = 1.0f/sqrt(vX.s7);
208 vX0.s8 = 1.0f/sqrt(vX.s8); vX0.s9 = 1.0f/sqrt(vX.s9); vX0.sa = 1.0f/sqrt(vX.sa); vX0.sb = 1.0f/sqrt(vX.sb);
209 vX0.sc = 1.0f/sqrt(vX.sc); vX0.sd = 1.0f/sqrt(vX.sd); vX0.se = 1.0f/sqrt(vX.se); vX0.sf = 1.0f/sqrt(vX.sf);
225 float16 n1 = (float16)0.97239411f;
226 float16 n2 = (float16)-0.19194795f;
227 return (n1 + n2 * z * z) * z;
static float my_OneBySqrtX(float x)
Definition: sfm_ti_math.h:111
static float16 approxAtan(float16 z)
Definition: sfm_ti_math.h:223
static float my_YByX(float y, float x)
Definition: sfm_ti_math.h:213
static float my_OneByX(float x)
Definition: sfm_ti_math.h:79
static float16 VXLIB_oneByXVecF32(float16 vX)
Definition: sfm_ti_math.h:129
static float16 VXLIB_oneBySqrtXVecF32(float16 vX)
Definition: sfm_ti_math.h:168
static float my_SqrtX(float x)
Definition: sfm_ti_math.h:218