MATHLIB User Guide
MATHLIB_sinh.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_ilut.h"
43 #include "MATHLIB_lut.h"
44 #include "MATHLIB_permute.h"
45 #include "MATHLIB_sinh_scalar.h"
46 #include "MATHLIB_types.h"
47 #include "MATHLIB_utility.h"
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_sinh */
52 /* */
53 /******************************************************************************/
54 
55 template <typename T>
56 static inline void
57 MATHLIB_sinh_exp(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst, size_t numBlocks)
58 {
59  // derive c7x vector type from template typename
60  typedef typename c7x::make_full_vector<T>::type vec;
61 
62  // open SE0, SE1, and SA0 for reading and writing operands
63  MATHLIB_SE0SA0Open(se0Params, sa0Params, pSrc);
64 
65  /**********************************************************************/
66  /* Create and assign values for constants employed on exp computation */
67  /**********************************************************************/
68 
69  vec log2_base_x16, half, negativeHalf, LnMax, Max, C0, C1, C2, ln2;
70  c7x::uint_vec mask;
71  c7x::double_vec p;
72 
73  log2_base_x16 = (vec) 23.0831206542234f; // 1.442695041 * 16.0
74  half = (vec) 0.5f;
75  negativeHalf = (vec) -0.5f;
76  LnMax = (vec) 88.72283905f;
77  Max = (vec) 3.402823466E+38f;
78  mask = (c7x::uint_vec) 0x3u;
79  p = (c7x::double_vec) 0.0433216987816623; // 1/log2_base_x16
80  ln2 = (vec) 0.693147180559945f; // log(2)
81 
82  // coefficients to approximate the decimal part of the result
83  C0 = (vec) 0.166668549286041f;
84  C1 = (vec) 0.500016170012920f;
85  C2 = (vec) 0.999999998618401f;
86 
87  for (size_t i = 0; i < numBlocks; i++) {
88  vec inVec = c7x::strm_eng<0, vec>::get_adv();
89 
90  /*****************************************************************/
91  /* Create variables employed on exp computation */
92  /*****************************************************************/
93  vec pol, r, r2, r3, outVec, Nf, absNf, rVals_odd, rVals_even;
94  c7x::uint_vec J, K, uN, dTAdjusted_32_63, dT_32_63, dT_0_31, upperBitsK, lowerBitsK, upperBitsJ, lowerBitsJ;
95  c7x::int_vec N, minusN;
96  c7x::double_vec KVals_8_15, KVals_0_7, JVals_8_15, JVals_0_7, dTVals_8_15, dTVals_0_7, pol_0_7, pol_8_15,
97  outVec_0_7, outVec_8_15, inVecVals_odd, inVecVals_even, NVals_odd, NVals_even;
98 
99  inVec = __abs(inVec);
100 
101  // Get N such that |N - inVec*16/ln(2)| is minimized
102  Nf = inVec * log2_base_x16;
103  absNf = Nf + half;
104  N = c7x::convert<c7x::int_vec>(absNf);
105  minusN = N - 1;
106 
107  // if ((x * log2_base_x16) < - half) {
108  // N--;
109  // }
110  __vpred cmp_N = __cmp_lt_pred(Nf, negativeHalf);
111  N = __select(cmp_N, minusN, N);
112 
113  /**********************************************************************/
114  /* Calculate Taylor series approximation for exp */
115  /**********************************************************************/
116 
117  // Split vectors to compute r with double precision
118  inVecVals_odd = __high_float_to_double(inVec);
119  inVecVals_even = __low_float_to_double(inVec);
120  NVals_odd = __high_int_to_double(N);
121  NVals_even = __low_int_to_double(N);
122  rVals_odd = __double_to_float((inVecVals_odd - (p * NVals_odd)));
123  rVals_even = __double_to_float((inVecVals_even - (p * NVals_even)));
124 
125  // Rejoin vectors to create r with floating point precision
126  r = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_interweave_0_63,
127  c7x::as_uchar_vec(rVals_odd), c7x::as_uchar_vec(rVals_even)));
128 
129  // Taylor series approximation
130  r2 = r * r;
131  r3 = r2 * r;
132  pol = (r * C2) + ((r3 * C0) + (r2 * C1));
133 
134  // substract 16 in order to get (e^x)/2 as a result
135  N = N - 16;
136 
137  /**********************************************************************/
138  /* Get index of LUT and 2^M values */
139  /**********************************************************************/
140 
141  // Create vectors of LUT indices
142  uN = c7x::convert<c7x::uint_vec>(N);
143  K = ((uN << 28u) >> 30) + MATHLIB_KTABLE_OFFSET;
144  J = (uN & mask) + MATHLIB_JTABLE_OFFSET;
145 
146  // Read values from LUT and convert and store as doubles in split vectors
147  upperBitsK = MATHLIB_LUTReadUpperBits(K);
148  lowerBitsK = MATHLIB_LUTReadLowerBits(K);
149  upperBitsJ = MATHLIB_LUTReadUpperBits(J);
150  lowerBitsJ = MATHLIB_LUTReadLowerBits(J);
151 
152  KVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
153  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
154  KVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
155  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
156  JVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
157  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
158  JVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
159  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
160 
161  // Multiply LUT values
162  dTVals_8_15 = KVals_8_15 * JVals_8_15;
163  dTVals_0_7 = KVals_0_7 * JVals_0_7;
164 
165  /**********************************************************************/
166  /* Scale exponent to adjust for 2^M */
167  /**********************************************************************/
168 
169  // Upper 32 bits of all dT values and lower 32 bits of all dT values
170  dT_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(
171  MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
172  dT_0_31 = c7x::reinterpret<c7x::uint_vec>(__permute_even_even_int(
173  MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
174 
175  uN = (uN >> 4) << 20;
176  dTAdjusted_32_63 = dT_32_63 + uN;
177 
178  // Concatenate the adjusted upper 32 bits of dT values to the lower 32 bits, convert dT to float
179  dTVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
180  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
181  dTVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
182  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
183 
184  pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
185  c7x::as_uchar_vec(__high_float_to_double(pol)),
186  c7x::as_uchar_vec(__low_float_to_double(pol))));
187  pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
188  c7x::as_uchar_vec(__high_float_to_double(pol)),
189  c7x::as_uchar_vec(__low_float_to_double(pol))));
190 
191  outVec_0_7 = dTVals_0_7 * (1.0f + pol_0_7);
192  outVec_8_15 = dTVals_8_15 * (1.0f + pol_8_15);
193 
194  outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
195  c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
196  c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
197 
198  /**********************************************************************/
199  /* Bounds checking */
200  /**********************************************************************/
201 
202  // > LnMax returns MAX (inf)
203  // if (x > LnMax) {
204  // res = Max;
205  // }
206  // Ln2 adjusts the new boundary for exp(x)/2
207  __vpred cmp_max = __cmp_lt_pred(LnMax, (inVec - ln2));
208  outVec = __select(cmp_max, Max, outVec);
209 
210  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
211  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
212  __vstore_pred(tmp, addr, outVec);
213  }
214 
216 }
217 
218 template <typename T>
219 static inline void
220 MATHLIB_sinh_pol(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst, size_t numBlocks)
221 {
222  // derive c7x vector type from template typename
223  typedef typename c7x::make_full_vector<T>::type vec;
224 
225  // open SE0, SE1, and SA0 for reading and writing operands
226  MATHLIB_SE0SE1SA0Open(se0Params, sa0Params, pSrc, pDst);
227 
228  /*************************************************************************************************/
229  /* Create and assign values for constants employed on sinh polynomial and reciprocal computation */
230  /*************************************************************************************************/
231  vec pol_bound, bound, C8, C6, C4, C2, two, zero, fltMax, half, max, inf;
232  pol_bound = (vec) 1.0f;
233  bound = (vec) 16.0f;
234  C8 = (vec) 2.75573192239859e-6f;
235  C6 = (vec) 0.000198412698412698f;
236  C4 = (vec) 0.00833333333333333f;
237  C2 = (vec) 0.166666666666667f;
238  two = (vec) 2.0f;
239  zero = (vec) 0.0f;
240  fltMax = (vec) 3.40282347e+38f;
241  half = (vec) 0.5f;
242  max = (vec) 89.41598629f;
243  inf = (vec) 0x7F800000;
244 
245  for (size_t i = 0; i < numBlocks; i++) {
246  vec expOut = c7x::strm_eng<1, vec>::get_adv();
247 
248  /**********************************************************************************************/
249  /* Create variables employed on sinh polynomial and reciprocal computation */
250  /**********************************************************************************************/
251  vec inVec_abs, x2, x4, x6, x8, pol, x1, recip, expOut2, expRecip, outVec, sign;
252 
253  sign = (vec) 1.0;
254 
255  /**************************************************************/
256  /* Reciprocal computation done for inputs 1 < |input| <= 16 */
257  /**************************************************************/
258 
259  expOut2 = expOut * two; // e^x
260  x1 = __recip(expOut2);
261  x1 = x1 * (two - (expOut2 * x1));
262  recip = x1 * (two - (expOut2 * x1)); // e^-x
263 
264  /************************************************/
265  /* Boundary checking for reciprocal computation */
266  /************************************************/
267 
268  // if (a == zero) {
269  // y = zero;
270  // }
271  __vpred cmp_eq_zero = __cmp_eq_pred(expOut2, zero);
272  recip = __select(cmp_eq_zero, zero, recip);
273 
274  // if (__abs(expOut2) > max) {
275  // recip = 0;
276  // }
277  __vpred cmp_gt_flt = __cmp_lt_pred(fltMax, __abs(expOut2));
278  recip = __select(cmp_gt_flt, zero, recip);
279 
280  expRecip = (expOut2 - recip) * half; // (e^x + e^-x)/2
281 
282  /**************************************************************/
283  /* Polynomial computation done for inputs -1 < input <= 1 */
284  /**************************************************************/
285  vec inVec = c7x::strm_eng<0, vec>::get_adv();
286  inVec_abs = __abs(inVec);
287  // calculate the power of input vector
288  x2 = inVec_abs * inVec_abs;
289  x4 = x2 * x2;
290  x6 = x2 * x4;
291  x8 = x4 * x4;
292 
293  pol = ((C2 * x2) + (C4 * x4)) + ((C6 * x6) + (C8 * x8));
294  pol = (pol * inVec_abs) + inVec_abs;
295 
296  /**************************************************************/
297  /* Determining sinh computation for each input */
298  /**************************************************************/
299 
300  /**************************************************************************/
301  /* The type of calculation for sinh(x) depends on the value of x: */
302  /* */
303  /* for x_abs <= 1, res = pol_est_sinhsp_i (input x) */
304  /* for x_abs > 16, res = sign * expsp_sinhsp_i (input x_abs), */
305  /* e^-|x| is negligible */
306  /* for 1 < x_abs <= 16, res = (e^x - e^-x)/2, */
307  /* e^x = 2 * expsp_sinhsp_i (input x) */
308  /* where x_abs is the absolute value of the input, sign has a value of 1 */
309  /* or -1 depending on the sign of the input, and res is the value */
310  /* for sinh(x). */
311  /**************************************************************************/
312 
313  // if(x <= pol_bound){
314  // compute polynomial
315  // }
316  __vpred cmp_le_pol = __cmp_le_pred(inVec_abs, pol_bound); // |x| <= 1
317 
318  // if(x > bound){
319  // exp computation with |x|
320  // }
321  __vpred cmp_lt_exp = __cmp_lt_pred(bound, inVec_abs); // |x| > 16
322 
323  __vpred cmp_else_exp = __negate(__or(cmp_lt_exp, cmp_le_pol)); // 1 < |x| <= 16
324 
325  outVec = __select(cmp_else_exp, expRecip, expOut);
326  outVec = __select(cmp_le_pol, pol, outVec);
327 
328  // if (x_abs > Max) {
329  // res = _itof(0x7F800000u); // large x, res = INF
330  // }
331 
332  __vpred cmp_lt_max = __cmp_lt_pred(max, inVec_abs);
333  outVec = __select(cmp_lt_max, inf, outVec);
334 
335  __vpred cmp_sign = __cmp_lt_pred(inVec, zero);
336  sign = __select(cmp_sign, -sign, sign);
337 
338  outVec = outVec * sign;
339 
340  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
341  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
342  __vstore_pred(tmp, addr, outVec);
343  }
344 }
345 
346 // this method performs sinh computation of input vector
347 template <typename T> static inline void MATHLIB_sinh_vector(size_t length, T *pSrc, T *pDst)
348 {
349  // variables
350  size_t numBlocks = 0; // compute loop's iteration count
351  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
352 
353  // derive c7x vector type from template typename
354  typedef typename c7x::make_full_vector<T>::type vec;
355 
356  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
357  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
358 
359  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
360 
361  // calculate compute loop's iteration counter
362  numBlocks = length / c7x::element_count_of<vec>::value;
363  remNumBlocks = length % c7x::element_count_of<vec>::value;
364  if (remNumBlocks) {
365  numBlocks++;
366  }
367 
368  /***********************************************************************/
369  /* Exp computation for sinh */
370  /***********************************************************************/
371 
372  MATHLIB_sinh_exp(&se0Params, &sa0Params, pSrc, pDst, numBlocks);
373 
374  /***********************************************************************/
375  /* Polynomial and reciprocal computation for sinh */
376  /***********************************************************************/
377  MATHLIB_sinh_pol(&se0Params, &sa0Params, pSrc, pDst, numBlocks);
378 }
379 
380 // this method performs exponential computation of input vector
381 template <typename T> MATHLIB_STATUS MATHLIB_sinh(size_t length, T *pSrc, T *pDst)
382 {
383  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
384 
385  // check for null pointers and non-zero length
386  status = MATHLIB_checkParams(length, pSrc, pDst);
387 
388  if (status == MATHLIB_SUCCESS) {
389  MATHLIB_sinh_vector(length, pSrc, pDst);
390  }
391  return status;
392 }
393 
394 /******************************************************************************/
395 /* */
396 /* Explicit templatization for datatypes supported by MATHLIB_sinh */
397 /* */
398 /******************************************************************************/
399 
400 // single precision
401 template MATHLIB_STATUS MATHLIB_sinh<float>(size_t length, float *pSrc, float *pDst);
402 
403 /******************************************************************************/
404 /* */
405 /* C-interface wrapper functions */
406 /* */
407 /******************************************************************************/
408 
409 extern "C" {
410 
411 // single-precision wrapper
412 MATHLIB_STATUS MATHLIB_sinh_sp(size_t length, float *pSrc, float *pDst)
413 {
414  MATHLIB_STATUS status = MATHLIB_sinh(length, pSrc, pDst);
415  return status;
416 }
417 
418 } // extern "C"
static void MATHLIB_sinh_exp(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst, size_t numBlocks)
static void MATHLIB_sinh_vector(size_t length, T *pSrc, T *pDst)
template MATHLIB_STATUS MATHLIB_sinh< float >(size_t length, float *pSrc, float *pDst)
static void MATHLIB_sinh_pol(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc, T *pDst, size_t numBlocks)
#define MATHLIB_KTABLE_OFFSET
Definition: MATHLIB_lut.h:64
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:111
#define MATHLIB_JTABLE_OFFSET
Definition: MATHLIB_lut.h:65
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:86
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS MATHLIB_sinh(size_t length, T *pSrc, T *pDst)
Performs the elementwise hyperbolic sine of an input vector. Function can be overloaded with float po...
MATHLIB_STATUS MATHLIB_sinh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_sinh. Function accepts float pointers.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS