MATHLIB User Guide
MATHLIB_acos.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 /* the type of data that 'type x data structure' (i.e. vecType) holds */
36 #define ELEMENT_TYPE(x) typename c7x::element_type_of<x>::type
37 
38 /******************************************************************************/
39 /* */
40 /* Includes */
41 /* */
42 /******************************************************************************/
43 
44 #include "MATHLIB_types.h"
45 #include "MATHLIB_utility.h"
46 #include <cstddef>
47 #include <float.h>
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_acos */
52 /* */
53 /******************************************************************************/
54 
55 template <typename vecType> static inline vecType sqrt_acos_i(vecType a);
56 template <typename vecType> static inline vecType pol_est_acos_i(vecType x);
57 template <typename T> MATHLIB_STATUS MATHLIB_acos(size_t length, T *pSrc, T *pDst);
58 
59 template <typename vecType> static inline vecType sqrt_acos_i(vecType a)
60 {
61  /* define type of elements vecType vector holds as elemType */
62  typedef ELEMENT_TYPE(vecType) elemType;
63 
64  vecType half, OneP5, zero, maxValue;
65 
66  half = (vecType) 0.5;
67  OneP5 = (vecType) 1.5;
68  zero = (vecType) 0.0;
69  maxValue = (vecType) std::numeric_limits<elemType>::max();
70 
71  vecType p0, p1;
72  vecType r0, d0;
73  vecType y;
74 
75  p0 = __recip_sqrt(a);
76  r0 = p0;
77  d0 = p0 * a;
78 
79  p1 = OneP5 - d0 * p0 * half;
80  y = a * r0 * p1;
81 
82  __vpred cmp_lezero = __cmp_le_pred((vecType) a, zero);
83  y = __select(cmp_lezero, zero, y);
84 
85  __vpred cmp_gtmax = __cmp_lt_pred(maxValue, (vecType) a);
86  y = __select(cmp_gtmax, maxValue, y);
87 
88  return y;
89 }
90 
91 template <typename vecType> static inline vecType pol_est_acos_i(vecType x)
92 {
93  /***********************************************************************/
94  /* Coefficient declaration for the polynomial for acos(x) */
95  /***********************************************************************/
96 
97  vecType c16, c14, c12, c10, c8, c6, c4, c2;
98 
99  c16 = (vecType) 0.053002771381990;
100  c14 = (vecType) -0.010980624698693;
101  c12 = (vecType) 0.020659425186833;
102  c10 = (vecType) 0.022862784546374;
103  c8 = (vecType) 0.030636056280974;
104  c6 = (vecType) 0.044450959710588;
105  c4 = (vecType) 0.075034659380970;
106  c2 = (vecType) 0.166664771293503;
107 
108  /* calculate the powers of x in polynomial */
109 
110  vecType x2, x4, x6, x8, x10, x12;
111  vecType pol, tmp1, tmp2;
112 
113  x2 = x * x;
114  x4 = x2 * x2;
115  x6 = x2 * x4;
116  x8 = x4 * x4;
117  x10 = x6 * x4;
118  x12 = x8 * x4;
119 
120  /****************************************************************************/
121  /* Polynomial calculation to estimate the arc_cosine funtion. */
122  /* The polynomial used is as follows: */
123  /* pol = x + c2 x^3 + c4 x^5 + c6 x^7 + c8 x^9 + c10 x^11 + c12 x^13 + */
124  /* c14 x^15 + c16 x^17, */
125  /* where x is the input, c2 through c16 are the corresponding coefficients */
126  /* to the polynomial, and pol is the result of the polynomial. This */
127  /* polynomial only covers inputs in the range [0, 1/sqrt(2)]. */
128  /****************************************************************************/
129 
130  /**************************************************************************/
131  /* The polynomial calculation is done in two separate parts. */
132  /* tmp1 = c2 x^2 + c4 x^4 + c6 x^6 + c8 x^8 */
133  /* tmp2 = c10 x^10 + c12 x^12 + c14 x^14 + c16 x^16 */
134  /* In order to reduce the number of multiplications x is factored out of */
135  /* the polynomial and multiplied by later. */
136  /**************************************************************************/
137 
138  tmp1 = ((c8 * x8) + (c6 * x6)) + ((c4 * x4) + (c2 * x2));
139  tmp2 = ((((c16 * x4) + (c14 * x2)) + c12) * x12) + (c10 * x10);
140 
141  pol = tmp1 + tmp2;
142  pol = (pol * x) + x;
143 
144  return pol;
145 }
146 
147 // this method performs arc-sine computation of input vector
148 template <typename T> MATHLIB_STATUS MATHLIB_acos(size_t length, T *pSrc, T *pDst)
149 {
150 
151  // variables
152  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
153  size_t numBlocks = 0; // compute loop's iteration count
154  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
155 
156  // derive c7x vector type from template typename
157  typedef typename c7x::make_full_vector<T>::type vec;
158 
159  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
160  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
161 
162  status = MATHLIB_checkParams(length, pSrc, pDst);
163 
164  if (status == MATHLIB_SUCCESS) {
165  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
166 
167  // calculate compute loop's iteration counter
168  numBlocks = length / c7x::element_count_of<vec>::value;
169  remNumBlocks = length % c7x::element_count_of<vec>::value;
170  if (remNumBlocks) {
171  numBlocks++;
172  }
173 
174  // open SE0, SE1, and SA0 for reading and writing operands
175  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
176 
177  /***********************************************************************/
178  /* Create and assign values for constants employed on acos computation */
179  /***********************************************************************/
180 
181  vec HalfPI, Zero, One, rsqr2, s, negativeOne, res, x_abs, a, temp1, temp2, negativeS, poly, scale, offset, nan;
182 
183  HalfPI = (vec) 1.570796327; /* pi/2 */
184  rsqr2 = (vec) 0.7071067811; /* 1/sqrt(2) */
185  One = (vec) 1.0;
186  Zero = (vec) 0.0;
187  nan = (vec) 0x7FFFFFFFu;
188  negativeOne = -One;
189 
190  // compute loop to perform vector acos
191  for (size_t i = 0; i < numBlocks; i++) {
192  vec inVec = c7x::strm_eng<0, vec>::get();
193 
194  s = (vec) 1.0;
195 
196  /***********************************************************************/
197  /* Calculate sqrt for acos computation */
198  /***********************************************************************/
199 
200  x_abs = __abs(inVec);
201  a = One - (x_abs * x_abs);
202  // sqrt(1 - x^2)
203  temp1 = sqrt_acos_i<vec>(a);
204 
205  // |x| > 1/sqrt(2)
206  __vpred cmp_x_abs = __cmp_lt_pred(rsqr2, x_abs);
207  temp2 = __select(cmp_x_abs, temp1, x_abs);
208  offset = __select(cmp_x_abs, HalfPI, Zero);
209  scale = __select(cmp_x_abs, negativeOne, One);
210 
211  /***********************************************************************/
212  /* Calculate polynomial for acos computation */
213  /***********************************************************************/
214 
215  poly = pol_est_acos_i<vec>(temp2);
216 
217  res = scale * poly + offset;
218 
219  /***********************************************************************/
220  /* Sign adjustment for quadrant 2 & 3 */
221  /***********************************************************************/
222 
223  // if(x < 0.0f){
224  // s = -s;
225  // }
226  negativeS = -s;
227  __vpred cmp_lt_zero = __cmp_lt_pred(inVec, Zero);
228  s = __select(cmp_lt_zero, negativeS, s);
229 
230  res = HalfPI - (res * s);
231 
232  /***********************************************************************/
233  /* Bounds checking */
234  /***********************************************************************/
235 
236  // if(x_abs > 1.0f){
237  // res = _itof(0x7FFFFFFFu); /* NaN */
238  // }
239  vec inVec1 = c7x::strm_eng<0, vec>::get_adv();
240  vec x_abs1 = __abs(inVec1);
241  __vpred cmp_gt_one = __cmp_lt_pred(One, x_abs1);
242  res = __select(cmp_gt_one, nan, res);
243  vec outVec = res;
244 
245  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
246  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
247  __vstore_pred(tmp, addr, outVec);
248  }
249 
251  }
252 
253  return status;
254 }
255 
256 /******************************************************************************/
257 /* */
258 /* Explicit templatization for datatypes supported by MATHLIB_acos */
259 /* */
260 /******************************************************************************/
261 
262 // single precision
263 template MATHLIB_STATUS MATHLIB_acos<float>(size_t length, float *pSrc, float *pDst);
264 
265 /******************************************************************************/
266 /* */
267 /* C-interface wrapper functions */
268 /* */
269 /******************************************************************************/
270 
271 extern "C" {
272 
273 // single-precision wrapper
274 MATHLIB_STATUS MATHLIB_acos_sp(size_t length, float *pSrc, float *pDst)
275 {
276  MATHLIB_STATUS status = MATHLIB_acos(length, pSrc, pDst);
277  return status;
278 }
279 
280 } // extern "C"
static vecType sqrt_acos_i(vecType a)
static vecType pol_est_acos_i(vecType x)
#define ELEMENT_TYPE(x)
template MATHLIB_STATUS MATHLIB_acos< float >(size_t length, float *pSrc, float *pDst)
MATHLIB_STATUS MATHLIB_acos(size_t length, T *pSrc, T *pDst)
Performs the elementwise arc-cosine of an input vector. Function can be overloaded with float pointer...
MATHLIB_STATUS MATHLIB_acos_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_acos. Function accepts float pointers.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS