MATHLIB User Guide
MATHLIB_atanh.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_atanh_scalar.h"
43 #include "MATHLIB_ilut.h"
44 #include "MATHLIB_lut.h"
45 #include "MATHLIB_permute.h"
46 #include "MATHLIB_types.h"
47 #include "MATHLIB_utility.h"
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_acosh */
52 /* */
53 /******************************************************************************/
54 
55 template <typename T>
56 static inline MATHLIB_STATUS
57 MATHLIB_atanh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
58 {
59  // variables
60  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
61  size_t numBlocks = 0; // compute loop's iteration count
62  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
63 
64  // derive c7x vector type from template typename
65  typedef typename c7x::make_full_vector<T>::type vec;
66 
67  // calculate compute loop's iteration counter
68  numBlocks = length / c7x::element_count_of<vec>::value;
69  remNumBlocks = length % c7x::element_count_of<vec>::value;
70  if (remNumBlocks) {
71  numBlocks++;
72  }
73 
74  // if (status == MATHLIB_SUCCESS) {
75 
76  // open SE0, SE1, and SA0 for reading and writing operands
77  // MATHLIB_SE0SA0Open(se0Params, sa0Params, pDst);
78  MATHLIB_SE0SE1SA0Open(se0Params, sa0Params, pDst, pSrc0);
79 
80  /**********************************************************************/
81  /* Create and assign values for constants employed on pow computation */
82  /**********************************************************************/
83 
84  vec C1, C2, C3, C4, C5, pol_bound, half;
85  c7x::double_vec ln2;
86  c7x::uint_vec zero;
87  zero = (c7x::uint_vec) 0;
88 
89  ln2 = (c7x::double_vec) 0.693147180559945;
90  C1 = (vec) -0.2302894f;
91  C2 = (vec) 0.1908169f;
92  C3 = (vec) -0.2505905f;
93  C4 = (vec) 0.3333164f;
94  C5 = (vec) -0.5000002f;
95  pol_bound = (vec) 0.1f;
96  half = (vec) 0.5f;
97 
98  // compute loop to perform vector pow
99  for (size_t i = 0; i < numBlocks; i++) {
100  vec inVec = c7x::strm_eng<0, vec>::get_adv();
101 
102  /**********************************************************************/
103  /* Create variables employed on log computation */
104  /**********************************************************************/
105 
106  vec pol, r1, r2, r3, r4, outVec, inVecOriginal_abs;
107  c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
108  inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
109  TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, pol_0_7, pol_8_15, outVec_8_15,
110  outVec_0_7;
111  c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT,
112  upperBitsIndexT, lowerBitsIndexT;
113  c7x::int_vec N;
114 
115  /**********************************************************************/
116  /* Calculate Taylor series approximation for log */
117  /**********************************************************************/
118 
119  // Split vectors to compute r with double precision
120  inVecVals_odd = __high_float_to_double(inVec);
121  inVecVals_even = __low_float_to_double(inVec);
122  inVecVals_oddReciprocal = __recip(inVecVals_odd);
123  inVecVals_evenReciprocal = __recip(inVecVals_even);
124 
125  // Create floating point reciprocal approximation
126  // Upper 32 bits of all inVec reciprocal values
127  inVecReciprocal_32_63 = c7x::reinterpret<c7x::uint_vec>(
128  __permute_odd_odd_int(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecVals_oddReciprocal),
129  c7x::as_uchar_vec(inVecVals_evenReciprocal)));
130 
131  // Clear bits 0-16 inclusive
132  inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & 0xFFFE0000U;
133 
134  // Concatenate cleared bit reciprocal with zero bits
135  inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
136  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
137  inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
138  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63), c7x::as_uchar_vec(zero)));
139 
140  // Split inVec into two vectors with double precision
141  inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
142  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
143  inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
144  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd), c7x::as_uchar_vec(inVecVals_even)));
145 
146  // Calculate r in double precision
147  rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
148  rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
149 
150  // Convert r to float, compute r to the power of 2, 3, 4
151  r1 = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
152  c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
153  c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
154  r2 = r1 * r1;
155  r3 = r1 * r2;
156  r4 = r2 * r2;
157 
158  // Compute Taylor series polynomial
159  pol = (C5 * r2) + ((C4 * r3) + ((((C2 * r1) + C3) + (C1 * r2)) * r4));
160 
161  /**********************************************************************/
162  /* Calculate N */
163  /**********************************************************************/
164 
165  // Upper 32 bits of all inVec reciprocal approximation values
166  inVecReciprocalApprox_32_63 = c7x::reinterpret<c7x::uint_vec>(
167  __permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
168  c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
169 
170  N = c7x::convert<c7x::int_vec>(((inVecReciprocalApprox_32_63 << 1) >> 21) - 1023);
171 
172  // Covert N to double precision for later calculation with LUT values
173  NVals_odd = __high_int_to_double(N);
174  NVals_even = __low_int_to_double(N);
175  NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
176  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
177  NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
178  MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(NVals_odd), c7x::as_uchar_vec(NVals_even)));
179 
180  /**********************************************************************/
181  /* Determine LUT values */
182  /**********************************************************************/
183 
184  // Calculate LUT index
185  indexT = (((inVecReciprocalApprox_32_63 << 12) >> 29) + MATHLIB_LOGTABLE_OFFSET);
186 
187  // Read from LUT and reconstruct double values split into two vectors
188  upperBitsIndexT = MATHLIB_LUTReadUpperBits(indexT);
189  lowerBitsIndexT = MATHLIB_LUTReadLowerBits(indexT);
190 
191  TVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
192  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
193  TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
194  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
195 
196  // Calculate an adjusted T
197  TVals_8_15 = TVals_8_15 - (ln2 * NVals_8_15);
198  TVals_0_7 = TVals_0_7 - (ln2 * NVals_0_7);
199 
200  /**********************************************************************/
201  /* Calculate output with adjusted LUT and Taylor series values */
202  /**********************************************************************/
203 
204  // Split polynomial result into two vectors with double precision
205  pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
206  c7x::as_uchar_vec(__high_float_to_double(pol)),
207  c7x::as_uchar_vec(__low_float_to_double(pol))));
208  pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
209  c7x::as_uchar_vec(__high_float_to_double(pol)),
210  c7x::as_uchar_vec(__low_float_to_double(pol))));
211 
212  // Add LUT values and Taylor series values
213  outVec_0_7 = rVals_0_7 + TVals_0_7 + pol_0_7;
214  outVec_8_15 = rVals_8_15 + TVals_8_15 + pol_8_15;
215 
216  // Combine output vector into one floating point result
217  outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
218  c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
219  c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
220 
221  outVec = outVec * half;
222 
223  /**********************************************************************/
224  /* Determine algorithm for specific input */
225  /**********************************************************************/
226 
227  // #if !defined(MATHLIB_FAST)
228  vec inVecOriginal = c7x::strm_eng<1, vec>::get_adv();
229  inVecOriginal_abs = __abs(inVecOriginal);
230  __vpred cmp_le_pol = __cmp_le_pred(inVecOriginal_abs, pol_bound);
231  outVec = __select(cmp_le_pol, inVec, outVec);
232  // #endif
233 
234  // Store atanh computation result in
235  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
236  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
237  __vstore_pred(tmp, addr, outVec);
238  }
240  // }
241  return status;
242 }
243 
244 template <typename T>
245 static inline MATHLIB_STATUS
246 MATHLIB_atanh_cond(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
247 {
248  // variables
249  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
250  size_t numBlocks = 0; // compute loop's iteration count
251  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
252 
253  // derive c7x vector type from template typename
254  typedef typename c7x::make_full_vector<T>::type vec;
255 
256  // calculate compute loop's iteration counter
257  numBlocks = length / c7x::element_count_of<vec>::value;
258  remNumBlocks = length % c7x::element_count_of<vec>::value;
259  if (remNumBlocks) {
260  numBlocks++;
261  }
262 
263  // if (status == MATHLIB_SUCCESS) {
264 
265  // open SE0, SE1, and SA0 for reading and writing operands
266  // MATHLIB_SE0SA0Open(se0Params, sa0Params, pDst);
267  MATHLIB_SE0SE1SA0Open(se0Params, sa0Params, pDst, pSrc0);
268 
269  /**********************************************************************/
270  /* Create and assign values for constants employed on pow computation */
271  /**********************************************************************/
272 
273  vec limit, zeroF, inf;
274 
275  limit = (vec) 1.0f;
276  zeroF = (vec) 0.0f;
277  inf = (vec) 0x7F800000u;
278 
279  // compute loop to perform vector pow
280  for (size_t i = 0; i < numBlocks; i++) {
281  vec inVec = c7x::strm_eng<0, vec>::get_adv();
282  vec inVecOriginal = c7x::strm_eng<1, vec>::get_adv();
283 
284  vec outVec, sign;
285 
286  vec inVecOriginal_abs = __abs(inVecOriginal);
287 
288  __vpred cmp_limit = __cmp_eq_pred(inVecOriginal_abs, limit);
289  outVec = __select(cmp_limit, inf, inVec);
290 
291  sign = (vec) 1.0;
292  __vpred cmp_lt_zero = __cmp_lt_pred(inVecOriginal, zeroF);
293  sign = __select(cmp_lt_zero, -sign, sign);
294 
295  outVec = outVec * sign;
296 
297  // Store exp computation result in
298  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
299  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
300  __vstore_pred(tmp, addr, outVec);
301  }
303  // }
304  return status;
305 }
306 
307 template <typename vecType> static inline vecType divspMod_atanh(vecType a, vecType b)
308 {
309  vecType res, r0, d0, d1, p0, p1, Two;
310 
311  Two = (vecType) 2.0;
312 
313  p0 = __recip(b);
314  d0 = p0 * b;
315  r0 = p0;
316 
317  p1 = Two - d0;
318  d1 = r0 * p1;
319 
320  res = a * d1;
321 
322  return res;
323 }
324 
325 // this method performs atanh computation of input vector
326 template <typename T> static inline void MATHLIB_atanh_vector(size_t length, T *pSrc, T *pDst)
327 {
328  // variables
329  size_t numBlocks = 0; // compute loop's iteration count
330  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
331 
332  // derive c7x vector type from template typename
333  typedef typename c7x::make_full_vector<float>::type vec;
334 
335  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
336  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
337 
338  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
339 
340  // calculate compute loop's iteration counter
341  numBlocks = length / c7x::element_count_of<vec>::value;
342  remNumBlocks = length % c7x::element_count_of<vec>::value;
343  if (remNumBlocks) {
344  numBlocks++;
345  }
346 
347  // open SE0, SE1, and SA0 for reading and writing operands
348  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
349 
350  /***********************************************************************/
351  /* Create and assign values for constants employed on acosh computation */
352  /***********************************************************************/
353 
354  vec pol_bound, c2, c4;
355  pol_bound = (vec) 0.1f;
356  c2 = (vec) 0.333327051f;
357  c4 = (vec) 0.202017226f;
358 
359  // compute loop to perform vector acosh
360  for (size_t i = 0; i < numBlocks; i++) {
361  vec inVec = c7x::strm_eng<0, vec>::get_adv();
362 
363  /**********************************************************************/
364  /* Create variables employed on acosh computation */
365  /**********************************************************************/
366 
367  vec x2, x4, pol, outVec, inVec_abs, temp1, temp2;
368 
369  inVec_abs = __abs(inVec);
370 
371  // #if !defined(MATHLIB_FAST)
372 
373  x2 = inVec * inVec;
374  x4 = x2 * x2;
375 
376  // use polynomial estimation for |x| <= 0.1
377  // polynomial to estimate atanh(x) for small inputs
378  pol = (c2 * x2) + (c4 * x4);
379  pol = (pol * inVec_abs) + inVec_abs;
380 
381  // #endif
382 
383  // calculate atanh(x) based on atanh(x) = 0.5*ln((1+ x) / (1- x))
384  temp1 = 1.0f + inVec_abs;
385  temp2 = 1.0f - inVec_abs;
386 
387  outVec = divspMod_atanh(temp1, temp2);
388 
389  // #if !defined(MATHLIB_FAST)
390 
391  __vpred cmp_le_pol = __cmp_le_pred(inVec_abs, pol_bound);
392  outVec = __select(cmp_le_pol, pol, outVec);
393 
394  // #endif
395 
396  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
397  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
398  __vstore_pred(tmp, addr, outVec);
399  }
400 
402 
403  MATHLIB_atanh_log(&se0Params, &sa0Params, length, pSrc, pDst);
404  MATHLIB_atanh_cond(&se0Params, &sa0Params, length, pSrc, pDst);
405 }
406 
407 // this method performs exponential computation of input vector
408 template <typename T> MATHLIB_STATUS MATHLIB_atanh(size_t length, T *restrict pSrc, T *restrict pDst)
409 {
410  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
411 
412  // check for null pointers and non-zero length
413  status = MATHLIB_checkParams(length, pSrc, pDst);
414 
415  if (status == MATHLIB_SUCCESS) {
416  MATHLIB_atanh_vector(length, pSrc, pDst);
417  }
418  return status;
419 }
420 
421 /******************************************************************************/
422 /* */
423 /* Explicit templatization for datatypes supported by MATHLIB_acosh */
424 /* */
425 /******************************************************************************/
426 
427 // single precision
428 template MATHLIB_STATUS MATHLIB_atanh<float>(size_t length, float *pSrc, float *pDst);
429 
430 /******************************************************************************/
431 /* */
432 /* C-interface wrapper functions */
433 /* */
434 /******************************************************************************/
435 
436 extern "C" {
437 
438 // single-precision wrapper
439 MATHLIB_STATUS MATHLIB_atanh_sp(size_t length, float *pSrc, float *pDst)
440 {
441  MATHLIB_STATUS status = MATHLIB_atanh(length, pSrc, pDst);
442  return status;
443 }
444 
445 } // extern "C"
MATHLIB_STATUS MATHLIB_atanh(size_t length, T *restrict pSrc, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_atanh< float >(size_t length, float *pSrc, float *pDst)
static MATHLIB_STATUS MATHLIB_atanh_log(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
static MATHLIB_STATUS MATHLIB_atanh_cond(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc0, T *pDst)
static void MATHLIB_atanh_vector(size_t length, T *pSrc, T *pDst)
static vecType divspMod_atanh(vecType a, vecType b)
MATHLIB_STATUS MATHLIB_atanh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_log10. Function accepts float pointers.
#define MATHLIB_LOGTABLE_OFFSET
Definition: MATHLIB_lut.h:66
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:111
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:86
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS