MATHLIB User Guide
MATHLIB_asin.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 /* the type of data that 'type x data structure' (i.e. vecType) holds */
36 #define ELEMENT_TYPE(x) typename c7x::element_type_of<x>::type
37 
38 /******************************************************************************/
39 /* */
40 /* Includes */
41 /* */
42 /******************************************************************************/
43 
44 #include "MATHLIB_types.h"
45 #include "MATHLIB_utility.h"
46 #include <cstddef>
47 #include <float.h>
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_asin */
52 /* */
53 /******************************************************************************/
54 
55 template <typename vecType> static inline vecType sqrt_asin_i(vecType a);
56 template <typename vecType> static inline vecType pol_est_asin_i(vecType x);
57 template <typename T> MATHLIB_STATUS MATHLIB_asin(size_t length, T *pSrc, T *pDst);
58 
59 template <typename vecType> static inline vecType sqrt_asin_i(vecType a)
60 {
61 
62  // define type of elements vecType vector holds as elemType
63  typedef ELEMENT_TYPE(vecType) elemType;
64 
65  vecType half, OneP5, zero, maxValue;
66 
67  half = (vecType) 0.5;
68  OneP5 = (vecType) 1.5;
69  zero = (vecType) 0.0;
70  maxValue = (vecType) std::numeric_limits<elemType>::max();
71 
72  vecType p0, p1, r0, d0, y;
73 
74  p0 = __recip_sqrt(a);
75  r0 = p0;
76  d0 = p0 * a;
77 
78  p1 = OneP5 - d0 * p0 * half;
79  y = a * r0 * p1;
80 
81  // if (a <= zero) {
82  // y = zero;
83  // }
84  __vpred cmp_lezero = __cmp_le_pred((vecType) a, zero);
85  y = __select(cmp_lezero, zero, y);
86 
87  // if (a > FLT_MAX) {
88  // y = FLT_MAX;
89  // }
90  __vpred cmp_gtmax = __cmp_lt_pred(maxValue, (vecType) a);
91  y = __select(cmp_gtmax, maxValue, y);
92 
93  return y;
94 }
95 
96 template <typename vecType> static inline vecType pol_est_asin_i(vecType x)
97 {
98  /**********************************************************************/
99  /* Coefficient declaration for the polynomial for asin(x) */
100  /**********************************************************************/
101 
102  vecType c16, c14, c12, c10, c8, c6, c4, c2, x2, x4, x6, x8, x10, x12, pol, tmp1, tmp2;
103 
104  c16 = (vecType) 0.053002771381990;
105  c14 = (vecType) -0.010980624698693;
106  c12 = (vecType) 0.020659425186833;
107  c10 = (vecType) 0.022862784546374;
108  c8 = (vecType) 0.030636056280974;
109  c6 = (vecType) 0.044450959710588;
110  c4 = (vecType) 0.075034659380970;
111  c2 = (vecType) 0.166664771293503;
112 
113  // calculate the powers of x in polynomial
114  x2 = x * x;
115  x4 = x2 * x2;
116  x6 = x2 * x4;
117  x8 = x4 * x4;
118  x10 = x6 * x4;
119  x12 = x8 * x4;
120 
121  /****************************************************************************/
122  /* Polynomial calculation to estimate the arc_sine funtion. */
123  /* The polynomial used is as follows: */
124  /* pol = x + c2 x^3 + c4 x^5 + c6 x^7 + c8 x^9 + c10 x^11 + c12 x^13 + */
125  /* c14 x^15 + c16 x^17, */
126  /* where x is the input, c2 through c16 are the corresponding coefficients */
127  /* to the polynomial, and pol is the result of the polynomial. This */
128  /* polynomial only covers inputs in the range [0, 1/sqrt(2)]. */
129  /****************************************************************************/
130 
131  /**************************************************************************/
132  /* The polynomial calculation is done in two separate parts. */
133  /* tmp1 = c2 x^2 + c4 x^4 + c6 x^6 + c8 x^8 */
134  /* tmp2 = c10 x^10 + c12 x^12 + c14 x^14 + c16 x^16 */
135  /* In order to reduce the number of multiplications x is factored out of */
136  /* the polynomial and multiplied by later. */
137  /**************************************************************************/
138 
139  tmp1 = ((c8 * x8) + (c6 * x6)) + ((c4 * x4) + (c2 * x2));
140  tmp2 = ((((c16 * x4) + (c14 * x2)) + c12) * x12) + (c10 * x10);
141 
142  pol = tmp1 + tmp2;
143  pol = (pol * x) + x;
144 
145  return pol;
146 }
147 
148 // this method performs arc-sine computation of input vector
149 template <typename T> MATHLIB_STATUS MATHLIB_asin(size_t length, T *pSrc, T *pDst)
150 {
151 
152  // variables
153  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
154  size_t numBlocks = 0; // compute loop's iteration count
155  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
156 
157  // derive c7x vector type from template typename
158  typedef typename c7x::make_full_vector<T>::type vec;
159 
160  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
161  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
162 
163  status = MATHLIB_checkParams(length, pSrc, pDst);
164 
165  if (status == MATHLIB_SUCCESS) {
166  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
167 
168  // calculate compute loop's iteration counter
169  numBlocks = length / c7x::element_count_of<vec>::value;
170  remNumBlocks = length % c7x::element_count_of<vec>::value;
171  if (remNumBlocks) {
172  numBlocks++;
173  }
174 
175  // open SE0, SE1, and SA0 for reading and writing operands
176  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
177 
178  /**********************************************************************/
179  /* Create and assign values for constants employed on asin computation */
180  /**********************************************************************/
181 
182  vec HalfPI, Zero, One, rsqr2, s, negativeOne;
183  vec res, x_abs, a, temp1, temp2, negativeS;
184  vec poly, scale, offset;
185 
186  HalfPI = (vec) 1.570796327; // pi/2
187  rsqr2 = (vec) 0.7071067811; // 1/sqrt(2)
188  One = (vec) 1.0;
189  Zero = (vec) 0.0;
190  negativeOne = -One;
191 
192  vec nan = (vec) 0x7FFFFFFFu;
193 
194  // compute loop to perform vector asin
195  for (size_t i = 0; i < numBlocks; i++) {
196  vec inVec = c7x::strm_eng<0, vec>::get();
197 
198  // Sign Vector
199  s = (vec) 1.0;
200 
201  /******************************************************************/
202  /* Modified Asin algorithm to decrease number of cycles (ii) */
203  /* Instead of pre-calculating both outcomes of |x| > 1/sqrt(2) */
204  /* only determine which arg to pass to pol_est and call once */
205  /* */
206  /* Final Form of Result */
207  /* res = scale * poly(arg) + offset */
208  /* for |x| <= 1/sqrt(2): scale = 1, offset = 0, arg = |x| */
209  /* for |x| > 1/np.sqrt(2): scale = -1, offset = pi/2, arg = 1-x^2 */
210  /******************************************************************/
211 
212  /***********************************************************************/
213  /* Calculate sqrt for asin computation */
214  /***********************************************************************/
215  x_abs = __abs(inVec);
216  a = One - (x_abs * x_abs);
217  // sqrt(1 - x^2)
218  temp1 = sqrt_asin_i<vec>(a);
219 
220  // |x| > 1/sqrt(2)
221  __vpred cmp_x_abs = __cmp_lt_pred(rsqr2, x_abs);
222  temp2 = __select(cmp_x_abs, temp1, x_abs);
223  offset = __select(cmp_x_abs, HalfPI, Zero);
224  scale = __select(cmp_x_abs, negativeOne, One);
225 
226  /***********************************************************************/
227  /* Calculate polynomial for asin computation */
228  /***********************************************************************/
229  // For |x| <= 1/sqrt(2)
230  poly = pol_est_asin_i<vec>(temp2);
231 
232  res = scale * poly + offset;
233 
234  /***********************************************************************/
235  /* Sign adjustment for quadrant 3 & 4 */
236  /***********************************************************************/
237 
238  // if(x < 0.0f){
239  // s = -s;
240  // }
241  negativeS = -s;
242 
243  __vpred cmp_lt_zero = __cmp_lt_pred(inVec, Zero);
244  s = __select(cmp_lt_zero, negativeS, s);
245  res = res * s;
246 
247  /***********************************************************************/
248  /* Bounds checking */
249  /***********************************************************************/
250 
251  // if(x_abs > 1.0f){
252  // res = _itof(0x7FFFFFFFu); NaN
253  // }
254  vec inVec1 = c7x::strm_eng<0, vec>::get_adv();
255  vec x_abs1 = __abs(inVec1);
256  __vpred cmp_gt_one = __cmp_lt_pred(One, x_abs1);
257  res = __select(cmp_gt_one, nan, res);
258 
259  vec outVec = res;
260 
261  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
262  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
263  __vstore_pred(tmp, addr, outVec);
264  }
265 
267  }
268 
269  return status;
270 }
271 
272 /******************************************************************************/
273 /* */
274 /* Explicit templatization for datatypes supported by MATHLIB_asin */
275 /* */
276 /******************************************************************************/
277 
278 // single precision
279 template MATHLIB_STATUS MATHLIB_asin<float>(size_t length, float *pSrc, float *pDst);
280 
281 /******************************************************************************/
282 /* */
283 /* C-interface wrapper functions */
284 /* */
285 /******************************************************************************/
286 
287 extern "C" {
288 
289 // single-precision wrapper
290 MATHLIB_STATUS MATHLIB_asin_sp(size_t length, float *pSrc, float *pDst)
291 {
292  MATHLIB_STATUS status = MATHLIB_asin(length, pSrc, pDst);
293  return status;
294 }
295 
296 } // extern "C"
static vecType sqrt_asin_i(vecType a)
static vecType pol_est_asin_i(vecType x)
#define ELEMENT_TYPE(x)
template MATHLIB_STATUS MATHLIB_asin< float >(size_t length, float *pSrc, float *pDst)
MATHLIB_STATUS MATHLIB_asin_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_asin. Function accepts float pointers.
MATHLIB_STATUS MATHLIB_asin(size_t length, T *pSrc, T *pDst)
Performs the elementwise arc-sine of an input vector. Function can be overloaded with float pointers,...
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS