MATHLIB User Guide
MATHLIB_acosh.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_acosh_scalar.h"
43 #include "MATHLIB_ilut.h"
44 #include "MATHLIB_lut.h"
45 #include "MATHLIB_permute.h"
46 #include "MATHLIB_types.h"
47 #include "MATHLIB_utility.h"
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_acosh */
52 /* */
53 /******************************************************************************/
54 // this method computes square root expressions in the acosh_sqrt sub-kernel
55 template <typename vecType> static inline vecType sqrt_acosh_i(vecType a, vecType x)
56 {
57  vecType half, OneP5, zero;
58  /* vec zero, maxValue; */
59 
60  half = (vecType) 0.5;
61  zero = (vecType) 0.0;
62  OneP5 = (vecType) 1.5;
63 
64  vecType p0, p1, r0, d0, y;
65 
66  p0 = __recip_sqrt(a);
67  r0 = p0;
68  d0 = p0 * a;
69 
70  p1 = OneP5 - d0 * p0 * half;
71  y = a * r0 * p1;
72 
73  vecType x2 = x * x;
74 
75  // if input a is x^2, select x as output
76  __vpred cmp_xsqr = __cmp_eq_pred(a, x2);
77  y = __select(cmp_xsqr, x, y);
78 
79  // if input a is 0, select 0 as output
80  __vpred cmp_zero = __cmp_eq_pred(a, zero);
81  y = __select(cmp_zero, zero, y);
82 
83  return y;
84 }
85 
86 template <typename T>
87 static inline MATHLIB_STATUS
88 MATHLIB_acosh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
89 {
90  // variables
91  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
92  size_t numBlocks = 0; // compute loop's iteration count
93  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
94 
95  // derive c7x vector type from template typename
96  typedef typename c7x::make_full_vector<T>::type vec;
97 
98  // calculate compute loop's iteration counter
99  numBlocks = length / c7x::element_count_of<vec>::value;
100  remNumBlocks = length % c7x::element_count_of<vec>::value;
101  if (remNumBlocks) {
102  numBlocks++;
103  }
104 
105  // if (status == MATHLIB_SUCCESS) {
106 
107  // open SE0, SE1, and SA0 for reading and writing operands
108  // MATHLIB_SE0SA0Open(se0Params, sa0Params, pDst);
109  MATHLIB_SE0SE1SA0Open(se0Params, sa0Params, pDst, pSrc0);
110 
111  /**********************************************************************/
112  /* Create and assign values for constants employed on pow computation */
113  /**********************************************************************/
114 
115  vec C1, C2, C3, C4, C5, eMax, outVecMax;
116  c7x::double_vec ln2;
117  c7x::uint_vec zero;
118  zero = (c7x::uint_vec) 0;
119 
120  ln2 = (c7x::double_vec) 0.693147180559945;
121  C1 = (vec) -0.2302894f;
122  C2 = (vec) 0.1908169f;
123  C3 = (vec) -0.2505905f;
124  C4 = (vec) 0.3333164f;
125  C5 = (vec) -0.5000002f;
126  eMax = (vec) 3.402823466e+38f;
127  outVecMax = (vec) 88.72283905313f;
128 
129  vec one, nan, ln2sp;
130 
131  one = (vec) 1.0f;
132  nan = (vec) 0x7FFFFFFFu;
133  ln2sp = (vec) 0.69314718056f; // ln(2)
134 
135  // compute loop to perform vector pow
136  for (size_t i = 0; i < numBlocks; i++) {
137  vec inVec = c7x::strm_eng<0, vec>::get_adv();
138 
139  /**********************************************************************/
140  /* Create variables employed on log computation */
141  /**********************************************************************/
142 
143  vec pol, r1, r2, r3, r4, outVec;
144  c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
145  inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
146  TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, pol_0_7, pol_8_15, outVec_8_15,
147  outVec_0_7;
148  c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT,
149  upperBitsIndexT, lowerBitsIndexT;
150  c7x::int_vec N;
151 
152  /**********************************************************************/
153  /* Calculate Taylor series approximation for log */
154  /**********************************************************************/
155 
156  // Split vectors to compute r with double precision
157  inVecVals_odd = __high_float_to_double(inVec);
158  inVecVals_even = __low_float_to_double(inVec);
159  inVecVals_oddReciprocal = __recip(inVecVals_odd);
160  inVecVals_evenReciprocal = __recip(inVecVals_even);
161 
162  // Create floating point reciprocal approximation
163  // Upper 32 bits of all inVec reciprocal values
164  inVecReciprocal_32_63 = c7x::as_uint_vec(__permute_odd_odd_int(MATHLIB_vperm_data_interweave_0_63,
165  c7x::as_uchar_vec(inVecVals_oddReciprocal),
166  c7x::as_uchar_vec(inVecVals_evenReciprocal)));
167 
168  // Clear bits 0-16 inclusive
169  inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & 0xFFFE0000U;
170 
171  // Concatenate cleared bit reciprocal with zero bits
172  inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
173  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
174  inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
175  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
176 
177  // Split inVec into two vectors with double precision
178  inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
179  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
180  inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
181  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
182 
183  // Calculate r in double precision
184  rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
185  rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
186 
187  // Convert r to float, compute r to the power of 2, 3, 4
188  r1 = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
189  c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
190  c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
191  r2 = r1 * r1;
192  r3 = r1 * r2;
193  r4 = r2 * r2;
194 
195  // Compute Taylor series polynomial
196  pol = (C5 * r2) + ((C4 * r3) + ((((C2 * r1) + C3) + (C1 * r2)) * r4));
197 
198  /**********************************************************************/
199  /* Calculate N */
200  /**********************************************************************/
201 
202  // Upper 32 bits of all inVec reciprocal approximation values
203  inVecReciprocalApprox_32_63 =
204  c7x::as_uint_vec(__permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
205  c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
206 
207  N = c7x::convert<c7x::int_vec>(((inVecReciprocalApprox_32_63 << 1) >> 21) - 1023);
208 
209  // Covert N to double precision for later calculation with LUT values
210  NVals_odd = __high_int_to_double(N);
211  NVals_even = __low_int_to_double(N);
212  NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
213  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
214  NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
215  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
216 
217  /**********************************************************************/
218  /* Determine LUT values */
219  /**********************************************************************/
220 
221  // Calculate LUT index
222  indexT = (((inVecReciprocalApprox_32_63 << 12) >> 29) + MATHLIB_LOGTABLE_OFFSET);
223 
224  // Read from LUT or ILUT and reconstruct double values split into two vectors
225  upperBitsIndexT = MATHLIB_LUTReadUpperBits(indexT);
226  lowerBitsIndexT = MATHLIB_LUTReadLowerBits(indexT);
227 
228  TVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
229  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
230  TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
231  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
232 
233  // Calculate an adjusted T
234  TVals_8_15 = TVals_8_15 - (ln2 * NVals_8_15);
235  TVals_0_7 = TVals_0_7 - (ln2 * NVals_0_7);
236 
237  /**********************************************************************/
238  /* Calculate output with adjusted LUT and Taylor series values */
239  /**********************************************************************/
240 
241  // Split polynomial result into two vectors with double precision
242  pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
243  c7x::as_uchar_vec(__high_float_to_double(pol)),
244  c7x::as_uchar_vec(__low_float_to_double(pol))));
245  pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
246  c7x::as_uchar_vec(__high_float_to_double(pol)),
247  c7x::as_uchar_vec(__low_float_to_double(pol))));
248 
249  // Add LUT values and Taylor series values
250  outVec_0_7 = rVals_0_7 + TVals_0_7 + pol_0_7;
251  outVec_8_15 = rVals_8_15 + TVals_8_15 + pol_8_15;
252 
253  // Combine output vector into one floating point result
254  outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
255  c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
256  c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
257 
258  /**********************************************************************/
259  /* Bounds checking */
260  /**********************************************************************/
261 
262  // if (inVec > eMax) {
263  // outVec = 88.72f;
264  // }
265  __vpred cmp_max = __cmp_lt_pred(eMax, inVec);
266  outVec = __select(cmp_max, outVecMax, outVec);
267 
268  outVec = outVec + ln2sp;
269 
270  vec inVecOriginal = c7x::strm_eng<1, vec>::get_adv();
271 
272  __vpred cmp_bound = __cmp_lt_pred(inVecOriginal, one);
273  outVec = __select(cmp_bound, nan, outVec);
274 
275  // Store exp computation result in
276  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
277  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
278  __vstore_pred(tmp, addr, outVec);
279  }
281  // }
282  return status;
283 }
284 
285 // this method performs acosh computation of input vector
286 template <typename T> static inline void MATHLIB_acosh_vector(size_t length, T *restrict pSrc, T *restrict pDst)
287 {
288  // variables
289  size_t numBlocks = 0; // compute loop's iteration count
290  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
291 
292  // derive c7x vector type from template typename
293  typedef typename c7x::make_full_vector<float>::type vec;
294 
295  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
296  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
297 
298  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
299 
300  // calculate compute loop's iteration counter
301  numBlocks = length / c7x::element_count_of<vec>::value;
302  remNumBlocks = length % c7x::element_count_of<vec>::value;
303  if (remNumBlocks) {
304  numBlocks++;
305  }
306 
307  // open SE0, SE1, and SA0 for reading and writing operands
308  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
309 
310  /***********************************************************************/
311  /* Create and assign values for constants employed on acosh computation */
312  /***********************************************************************/
313 
314  vec half, one;
315 
316  half = (vec) 0.5f;
317  one = (vec) 1.0f;
318 
319  // compute loop to perform vector acosh
320  for (size_t i = 0; i < numBlocks; i++) {
321  vec inVec = c7x::strm_eng<0, vec>::get_adv();
322 
323  /**********************************************************************/
324  /* Create variables employed on acosh computation */
325  /**********************************************************************/
326 
327  vec sqrtVec, temp, inVecSquare;
328 
329  inVecSquare = inVec * inVec;
330 
331  sqrtVec = sqrt_acosh_i((inVecSquare - one), inVec);
332  temp = (sqrtVec * half) + (inVec * half);
333 
334  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
335  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
336  __vstore_pred(tmp, addr, temp);
337  }
339 
340  MATHLIB_acosh_log(&se0Params, &sa0Params, length, pSrc, pDst);
341 }
342 
343 // this method performs exponential computation of input vector
344 template <typename T> MATHLIB_STATUS MATHLIB_acosh(size_t length, T *restrict pSrc, T *restrict pDst)
345 {
346  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
347 
348  // check for null pointers and non-zero length
349  status = MATHLIB_checkParams(length, pSrc, pDst);
350 
351  if (status == MATHLIB_SUCCESS) {
352  MATHLIB_acosh_vector(length, pSrc, pDst);
353  }
354  return status;
355 }
356 
357 /******************************************************************************/
358 /* */
359 /* Explicit templatization for datatypes supported by MATHLIB_acosh */
360 /* */
361 /******************************************************************************/
362 
363 // single precision
364 template MATHLIB_STATUS MATHLIB_acosh<float>(size_t length, float *pSrc, float *pDst);
365 
366 /******************************************************************************/
367 /* */
368 /* C-interface wrapper functions */
369 /* */
370 /******************************************************************************/
371 
372 extern "C" {
373 
374 // single-precision wrapper
375 MATHLIB_STATUS MATHLIB_acosh_sp(size_t length, float *pSrc, float *pDst)
376 {
377  MATHLIB_STATUS status = MATHLIB_acosh(length, pSrc, pDst);
378  return status;
379 }
380 
381 } // extern "C"
static MATHLIB_STATUS MATHLIB_acosh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
template MATHLIB_STATUS MATHLIB_acosh< float >(size_t length, float *pSrc, float *pDst)
static vecType sqrt_acosh_i(vecType a, vecType x)
MATHLIB_STATUS MATHLIB_acosh(size_t length, T *restrict pSrc, T *restrict pDst)
static void MATHLIB_acosh_vector(size_t length, T *restrict pSrc, T *restrict pDst)
MATHLIB_STATUS MATHLIB_acosh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_acosh. Function accepts float pointers.
#define MATHLIB_LOGTABLE_OFFSET
Definition: MATHLIB_lut.h:66
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:111
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:86
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS