MATHLIB User Guide
MATHLIB_cosh.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_cosh_scalar.h"
43 #include "MATHLIB_ilut.h"
44 #include "MATHLIB_lut.h"
45 #include "MATHLIB_permute.h"
46 #include "MATHLIB_types.h"
47 #include "MATHLIB_utility.h"
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_cosh */
52 /* */
53 /******************************************************************************/
54 
55 template <typename T>
56 static inline void MATHLIB_cosh_exp(__SE_TEMPLATE_v1 *restrict se0Params,
57  __SA_TEMPLATE_v1 *restrict sa0Params,
58  T *restrict pSrc,
59  T *restrict pDst,
60  size_t numBlocks)
61 {
62  // derive c7x vector type from template typename
63  typedef typename c7x::make_full_vector<T>::type vec;
64 
65  // open SE0, SE1, and SA0 for reading and writing operands
66  MATHLIB_SE0SA0Open(se0Params, sa0Params, pSrc);
67 
68  /**********************************************************************/
69  /* Create and assign values for constants employed on exp computation */
70  /**********************************************************************/
71 
72  vec bound;
73  bound = (vec) 16.0;
74 
75  vec log2_base_x16, half, negativeHalf, LnMax, Max, C0, C1, C2, ln2;
76  c7x::uint_vec mask;
77  c7x::double_vec p;
78 
79  log2_base_x16 = (vec) 23.0831206542234f; // 1.442695041 * 16.0
80  half = (vec) 0.5f;
81  negativeHalf = (vec) -0.5f;
82  LnMax = (vec) 88.72283905f;
83  Max = (vec) 3.402823466E+38f;
84  mask = (c7x::uint_vec) 0x3u;
85  p = (c7x::double_vec) 0.0433216987816623; // 1/log2_base_x16
86  ln2 = (vec) 0.693147180559945f; // log(2)
87 
88  // coefficients to approximate the decimal part of the result
89  C0 = (vec) 0.166668549286041f;
90  C1 = (vec) 0.500016170012920f;
91  C2 = (vec) 0.999999998618401f;
92 
93  for (size_t i = 0; i < numBlocks; i++) {
94  vec inVec = c7x::strm_eng<0, vec>::get_adv();
95 
96  /*****************************************************************/
97  /* Create variables employed on exp computation */
98  /*****************************************************************/
99  vec inVec_abs;
100 
101  vec pol, r, r2, r3, outVec, Nf, absNf, rVals_odd, rVals_even;
102  c7x::uint_vec J, K, uN, dTAdjusted_32_63, dT_32_63, dT_0_31, upperBitsK, lowerBitsK, upperBitsJ, lowerBitsJ;
103  c7x::int_vec N, minusN;
104  c7x::double_vec KVals_8_15, KVals_0_7, JVals_8_15, JVals_0_7, dTVals_8_15, dTVals_0_7, pol_0_7, pol_8_15,
105  outVec_0_7, outVec_8_15, inVecVals_odd, inVecVals_even, NVals_odd, NVals_even;
106 
107  inVec_abs = __abs(inVec);
108 
109  // if(x > bound){
110  // select absolute value of input or standard input for exp computation
111  // }
112  __vpred cmp_lt_exp = __cmp_lt_pred(bound, inVec_abs);
113  inVec = __select(cmp_lt_exp, inVec_abs, inVec);
114 
115  // Get N such that |N - inVec*16/ln(2)| is minimized
116  Nf = inVec * log2_base_x16;
117  absNf = Nf + half;
118  N = c7x::convert<c7x::int_vec>(absNf);
119  minusN = N - 1;
120 
121  // if ((x * log2_base_x16) < - half) {
122  // N--;
123  // }
124  __vpred cmp_N = __cmp_lt_pred(Nf, negativeHalf);
125  N = __select(cmp_N, minusN, N);
126 
127  /**********************************************************************/
128  /* Calculate Taylor series approximation for exp */
129  /**********************************************************************/
130 
131  // Split vectors to compute r with double precision
132  inVecVals_odd = __high_float_to_double(inVec);
133  inVecVals_even = __low_float_to_double(inVec);
134  NVals_odd = __high_int_to_double(N);
135  NVals_even = __low_int_to_double(N);
136  rVals_odd = __double_to_float((inVecVals_odd - (p * NVals_odd)));
137  rVals_even = __double_to_float((inVecVals_even - (p * NVals_even)));
138 
139  // Rejoin vectors to create r with floating point precision
140  r = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_interweave_0_63,
141  c7x::as_uchar_vec(rVals_odd), c7x::as_uchar_vec(rVals_even)));
142 
143  // Taylor series approximation
144  r2 = r * r;
145  r3 = r2 * r;
146  pol = (r * C2) + ((r3 * C0) + (r2 * C1));
147 
148  // substract 16 in order to get (e^x)/2 as a result
149  N = N - 16;
150 
151  /**********************************************************************/
152  /* Get index of LUT and 2^M values */
153  /**********************************************************************/
154 
155  // Create vectors of LUT indices
156  uN = c7x::convert<c7x::uint_vec>(N);
157  K = ((uN << 28u) >> 30) + MATHLIB_KTABLE_OFFSET;
158  J = (uN & mask) + MATHLIB_JTABLE_OFFSET;
159 
160  // Read values from LUT and convert and store as doubles in split vectors
161  upperBitsK = MATHLIB_LUTReadUpperBits(K);
162  lowerBitsK = MATHLIB_LUTReadLowerBits(K);
163  upperBitsJ = MATHLIB_LUTReadUpperBits(J);
164  lowerBitsJ = MATHLIB_LUTReadLowerBits(J);
165 
166  KVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
167  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
168  KVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
169  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
170  JVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
171  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
172  JVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
173  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
174 
175  // Multiply LUT values
176  dTVals_8_15 = KVals_8_15 * JVals_8_15;
177  dTVals_0_7 = KVals_0_7 * JVals_0_7;
178 
179  /**********************************************************************/
180  /* Scale exponent to adjust for 2^M */
181  /**********************************************************************/
182 
183  // Upper 32 bits of all dT values and lower 32 bits of all dT values
184  dT_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(
185  MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
186  dT_0_31 = c7x::reinterpret<c7x::uint_vec>(__permute_even_even_int(
187  MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15), c7x::as_uchar_vec(dTVals_0_7)));
188 
189  uN = (uN >> 4) << 20;
190  dTAdjusted_32_63 = dT_32_63 + uN;
191 
192  // Concatenate the adjusted upper 32 bits of dT values to the lower 32 bits, convert dT to float
193  dTVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
194  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
195  dTVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
196  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
197 
198  pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
199  c7x::as_uchar_vec(__high_float_to_double(pol)),
200  c7x::as_uchar_vec(__low_float_to_double(pol))));
201  pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
202  c7x::as_uchar_vec(__high_float_to_double(pol)),
203  c7x::as_uchar_vec(__low_float_to_double(pol))));
204 
205  outVec_0_7 = dTVals_0_7 * (1.0f + pol_0_7);
206  outVec_8_15 = dTVals_8_15 * (1.0f + pol_8_15);
207 
208  outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
209  c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
210  c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
211 
212  /**********************************************************************/
213  /* Bounds checking */
214  /**********************************************************************/
215 
216  // > LnMax returns MAX (inf)
217  // if (x > LnMax) {
218  // res = Max;
219  // }
220  // Ln2 adjusts the new boundary for exp(x)/2
221  __vpred cmp_max = __cmp_lt_pred(LnMax, (inVec - ln2));
222  outVec = __select(cmp_max, Max, outVec);
223 
224  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
225  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
226  __vstore_pred(tmp, addr, outVec);
227  }
228 
230 }
231 
232 template <typename T>
233 static inline void MATHLIB_cosh_pol(__SE_TEMPLATE_v1 *restrict se0Params,
234  __SA_TEMPLATE_v1 *restrict sa0Params,
235  T *restrict pSrc,
236  T *restrict pDst,
237  size_t numBlocks)
238 {
239  // derive c7x vector type from template typename
240  typedef typename c7x::make_full_vector<T>::type vec;
241 
242  // open SE0, SE1, and SA0 for reading and writing operands
243  MATHLIB_SE0SE1SA0Open(se0Params, sa0Params, pSrc, pDst);
244 
245  /*************************************************************************************************/
246  /* Create and assign values for constants employed on cosh polynomial and reciprocal computation */
247  /*************************************************************************************************/
248  vec pol_bound, bound, C1, C2, C3, C4, two, zero, fltMax, half, max, inf;
249  pol_bound = (vec) 1.0f;
250  bound = (vec) 16.0f;
251  C1 = (vec) 2.48015873015873e-5f;
252  C2 = (vec) 0.00138888888888889f;
253  C3 = (vec) 0.0416666666666667f;
254  C4 = (vec) 0.5000000f;
255  two = (vec) 2.0f;
256  zero = (vec) 0.0f;
257  fltMax = (vec) 3.40282347e+38f;
258  half = (vec) 0.5f;
259  max = (vec) 89.41598629f;
260  inf = (vec) 0x7F800000;
261 
262  for (size_t i = 0; i < numBlocks; i++) {
263  vec inVec = c7x::strm_eng<0, vec>::get_adv();
264  vec expOut = c7x::strm_eng<1, vec>::get_adv();
265 
266  /**********************************************************************************************/
267  /* Create variables employed on cosh polynomial and reciprocal computation */
268  /**********************************************************************************************/
269  vec inVec_abs, x2, x4, x6, x8, pol, x1, recip, expOut2, expRecip, outVec;
270 
271  /**************************************************************/
272  /* Reciprocal computation done for inputs 1 < |input| <= 16 */
273  /**************************************************************/
274 
275  expOut2 = expOut * two; // e^x
276  x1 = __recip(expOut2);
277  x1 = x1 * (two - (expOut2 * x1));
278  recip = x1 * (two - (expOut2 * x1)); // e^-x
279 
280  /************************************************/
281  /* Boundary checking for reciprocal computation */
282  /************************************************/
283 
284  // if (a == zero) {
285  // y = zero;
286  // }
287  __vpred cmp_eq_zero = __cmp_eq_pred(expOut2, zero);
288  recip = __select(cmp_eq_zero, zero, recip);
289 
290  // if (__abs(expOut2) > max) {
291  // recip = 0;
292  // }
293  __vpred cmp_gt_flt = __cmp_lt_pred(fltMax, __abs(expOut2));
294  recip = __select(cmp_gt_flt, zero, recip);
295 
296  expRecip = (expOut2 + recip) * half; // (e^x + e^-x)/2
297 
298  /**************************************************************/
299  /* Polynomial computation done for inputs -1 < input <= 1 */
300  /**************************************************************/
301 
302  // calculate the power of input vector
303  x2 = inVec * inVec;
304  x4 = x2 * x2;
305  x6 = x2 * x4;
306  x8 = x4 * x4;
307 
308  pol = ((C4 * x2) + (C3 * x4)) + ((C1 * x8) + (C2 * x6));
309  pol = pol + 1.0f;
310 
311  /**************************************************************/
312  /* Determining cosh computation for each input */
313  /**************************************************************/
314 
315  /**************************************************************************/
316  /* The type of calculation for cosh(x) depends on the value of x: */
317  /* */
318  /* for x_abs <= 1, res = pol_est_coshsp_i (input x) */
319  /* for x_abs > 16, res = expsp_coshsp_i (input x_abs), */
320  /* e^-|x| is negligible */
321  /* for 1 < x_abs <= 16, res = (e^x + e^-x)/2, */
322  /* e^x = 2 * expsp_coshsp_i (input x) */
323  /* where x_abs is the absolute value of the input, sign has a value of 1 */
324  /* or -1 depending on the sign of the input, and res is the value */
325  /* for cosh(x). */
326  /* ************************************************************************/
327 
328  inVec_abs = __abs(inVec);
329 
330  // if(x <= pol_bound){
331  // compute polynomial
332  // }
333  __vpred cmp_le_pol = __cmp_le_pred(inVec_abs, pol_bound); // |x| <= 1
334 
335  // if(x > bound){
336  // exp computation with |x|
337  // }
338  __vpred cmp_lt_exp = __cmp_lt_pred(bound, inVec_abs); // |x| > 16
339 
340  __vpred cmp_else_exp = __negate(__or(cmp_lt_exp, cmp_le_pol)); // 1 < |x| <= 16
341 
342  outVec = __select(cmp_else_exp, expRecip, expOut);
343  outVec = __select(cmp_le_pol, pol, outVec);
344 
345  // if (x_abs > Max) {
346  // res = _itof(0x7F800000u); // large x, res = INF
347  // }
348 
349  __vpred cmp_lt_max = __cmp_lt_pred(max, inVec_abs);
350  outVec = __select(cmp_lt_max, inf, outVec);
351 
352  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
353  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
354  __vstore_pred(tmp, addr, outVec);
355  }
356 }
357 
358 // this method performs cosh computation of input vector
359 template <typename T> static inline void MATHLIB_cosh_vector(size_t length, T *restrict pSrc, T *restrict pDst)
360 {
361  // variables
362  size_t numBlocks = 0; // compute loop's iteration count
363  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
364 
365  // derive c7x vector type from template typename
366  typedef typename c7x::make_full_vector<T>::type vec;
367 
368  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
369  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
370 
371  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
372 
373  // calculate compute loop's iteration counter
374  numBlocks = length / c7x::element_count_of<vec>::value;
375  remNumBlocks = length % c7x::element_count_of<vec>::value;
376  if (remNumBlocks) {
377  numBlocks++;
378  }
379 
380  /***********************************************************************/
381  /* Exp computation for cosh */
382  /***********************************************************************/
383 
384  MATHLIB_cosh_exp(&se0Params, &sa0Params, pSrc, pDst, numBlocks);
385 
386  /***********************************************************************/
387  /* Polynomial and reciprocal computation for cosh */
388  /***********************************************************************/
389  MATHLIB_cosh_pol(&se0Params, &sa0Params, pSrc, pDst, numBlocks);
390 }
391 
392 // this method performs exponential computation of input vector
393 template <typename T> MATHLIB_STATUS MATHLIB_cosh(size_t length, T *restrict pSrc, T *restrict pDst)
394 {
395  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
396 
397  // check for null pointers and non-zero length
398  status = MATHLIB_checkParams(length, pSrc, pDst);
399 
400  if (status == MATHLIB_SUCCESS) {
401  MATHLIB_cosh_vector(length, pSrc, pDst);
402  }
403  return status;
404 }
405 
406 /******************************************************************************/
407 /* */
408 /* Explicit templatization for datatypes supported by MATHLIB_cosh */
409 /* */
410 /******************************************************************************/
411 
412 // single precision
413 template MATHLIB_STATUS MATHLIB_cosh<float>(size_t length, float *pSrc, float *pDst);
414 
415 /******************************************************************************/
416 /* */
417 /* C-interface wrapper functions */
418 /* */
419 /******************************************************************************/
420 
421 extern "C" {
422 
423 // single-precision wrapper
424 MATHLIB_STATUS MATHLIB_cosh_sp(size_t length, float *pSrc, float *pDst)
425 {
426  MATHLIB_STATUS status = MATHLIB_cosh(length, pSrc, pDst);
427  return status;
428 }
429 
430 } // extern "C"
static void MATHLIB_cosh_pol(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, T *restrict pSrc, T *restrict pDst, size_t numBlocks)
static void MATHLIB_cosh_exp(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, T *restrict pSrc, T *restrict pDst, size_t numBlocks)
static void MATHLIB_cosh_vector(size_t length, T *restrict pSrc, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_cosh< float >(size_t length, float *pSrc, float *pDst)
MATHLIB_STATUS MATHLIB_cosh(size_t length, T *restrict pSrc, T *restrict pDst)
MATHLIB_STATUS MATHLIB_cosh_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_cosh. Function accepts float pointers.
#define MATHLIB_KTABLE_OFFSET
Definition: MATHLIB_lut.h:64
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:111
#define MATHLIB_JTABLE_OFFSET
Definition: MATHLIB_lut.h:65
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:86
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS