MATHLIB User Guide
MATHLIB_exp.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_exp_scalar.h"
43 #include "MATHLIB_lut.h"
44 #include "MATHLIB_permute.h"
45 #include "MATHLIB_types.h"
46 #include "MATHLIB_utility.h"
47 /******************************************************************************/
48 /* */
49 /* MATHLIB_exp */
50 /* */
51 /******************************************************************************/
52 
53 template <typename T> static inline void MATHLIB_exp_vector(size_t length, T *pSrc, T *pDst)
54 {
55  size_t numBlocks = 0; // compute loop's iteration count
56  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
57 
58  // derive c7x vector type from template typename
59  typedef typename c7x::make_full_vector<T>::type vec;
60 
61  // Compile-time decision: float_vec => int_vec and double_vec=> long_vec
62  typedef
63  typename std::conditional<ELEMENT_COUNT(c7x::float_vec) == ELEMENT_COUNT(vec), c7x::int_vec, c7x::long_vec>::type
64  vec_type;
65 
66  typedef typename std::conditional<ELEMENT_COUNT(c7x::float_vec) == ELEMENT_COUNT(vec), c7x::uint_vec,
67  c7x::ulong_vec>::type uvec_type;
68 
69  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
70  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
71 
72  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
73 
74  // calculate compute loop's iteration counter
75  numBlocks = length / c7x::element_count_of<vec>::value;
76  remNumBlocks = length % c7x::element_count_of<vec>::value;
77  if (remNumBlocks) {
78  numBlocks++;
79  }
80 
81  // open SE0, SE1, and SA0 for reading and writing operands
82  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
83 
84  /**********************************************************************/
85  /* Create and assign values for constants employed on exp computation */
86  /**********************************************************************/
87 
88  vec log2_base_x16, half, negativeHalf, zero, LnMin, LnMax, Max, C0, C1, C2;
89  uvec_type mask;
90  c7x::double_vec p;
91 
92  log2_base_x16 = (vec) 23.083120654f;
93  half = (vec) 0.5f;
94  negativeHalf = (vec) -0.5f;
95  zero = (vec) 0.0f;
96  LnMin = (vec) -87.33654475f;
97  LnMax = (vec) 88.72283905f;
98  Max = (vec) 3.402823466E+38f;
99  C0 = (vec) 0.166668549286041f;
100  C1 = (vec) 0.500016170012920f;
101  C2 = (vec) 0.999999998618401f;
102  mask = (uvec_type) 0x3u;
103  p = (c7x::double_vec) 0.0433216987816623;
104 
105  // compute loop to perform vector exp
106  for (size_t i = 0; i < numBlocks; i++) {
107  vec inVec = c7x::strm_eng<0, vec>::get();
108 
109  /**********************************************************************/
110  /* Create variables employed on exp computation */
111  /**********************************************************************/
112 
113  vec pol, r, r2, r3, outVec, Nf, absNf, dT, rVals_odd, rVals_even;
114  uvec_type J, K, uN, dTAdjusted_32_63, dT_32_63, dT_0_31;
115  vec_type N, minusN;
116  c7x::double_vec inVecVals_odd, inVecVals_even, NVals_odd, NVals_even, KVals_8_15, KVals_0_7, JVals_8_15,
117  JVals_0_7, dTVals_8_15, dTVals_0_7;
118  c7x::uint_vec upperBitsK, lowerBitsK, upperBitsJ, lowerBitsJ;
119 
120  // Get N such that |N - inVec*16/ln(2)| is minimized
121  Nf = inVec * log2_base_x16;
122  absNf = Nf + half;
123  N = c7x::convert<vec_type>(absNf);
124  minusN = N - 1;
125 
126  // if ((x * log2_base_x16) < - half) {
127  // N--;
128  // }
129  __vpred cmp_N = __cmp_lt_pred(Nf, negativeHalf);
130  N = __select(cmp_N, minusN, N);
131 
132  /**********************************************************************/
133  /* Calculate Taylor series approximation for exp */
134  /**********************************************************************/
135 
136  // Split vectors to compute r with double precision
137  inVecVals_odd = __high_float_to_double(inVec);
138  inVecVals_even = __low_float_to_double(inVec);
139  NVals_odd = __high_int_to_double(N);
140  NVals_even = __low_int_to_double(N);
141  rVals_odd = __double_to_float((inVecVals_odd - (p * NVals_odd)));
142  rVals_even = __double_to_float((inVecVals_even - (p * NVals_even)));
143 
144  // Rejoin vectors to create r with floating point precision
145  r = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_interweave_0_63,
146  c7x::as_uchar_vec(rVals_odd),
147  c7x::as_uchar_vec(rVals_even)));
148 
149  // Taylor series approximation
150  r2 = r * r;
151  r3 = r2 * r;
152  pol = (r * C2) + ((r3 * C0) + (r2 * C1));
153 
154  /**********************************************************************/
155  /* Get index of LUT and 2^M values */
156  /**********************************************************************/
157 
158  // Create vectors of LUT indices
159  uN = c7x::convert<uvec_type>(N);
160  K = ((uN << 28u) >> 30) + MATHLIB_KTABLE_OFFSET;
161  J = (uN & mask) + MATHLIB_JTABLE_OFFSET;
162 
163  // Read values from LUT and convert and store as doubles in split vectors
164  upperBitsK = MATHLIB_LUTReadUpperBits(K);
165  lowerBitsK = MATHLIB_LUTReadLowerBits(K);
166  upperBitsJ = MATHLIB_LUTReadUpperBits(J);
167  lowerBitsJ = MATHLIB_LUTReadLowerBits(J);
168 
169  KVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_interweave_0_63,
170  c7x::as_uchar_vec(upperBitsK),
171  c7x::as_uchar_vec(lowerBitsK)));
172  KVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_interweave_0_63,
173  c7x::as_uchar_vec(upperBitsK),
174  c7x::as_uchar_vec(lowerBitsK)));
175  JVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_interweave_0_63,
176  c7x::as_uchar_vec(upperBitsJ),
177  c7x::as_uchar_vec(lowerBitsJ)));
178  JVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_interweave_0_63,
179  c7x::as_uchar_vec(upperBitsJ),
180  c7x::as_uchar_vec(lowerBitsJ)));
181 
182  // Multiply LUT values
183  dTVals_8_15 = KVals_8_15 * JVals_8_15;
184  dTVals_0_7 = KVals_0_7 * JVals_0_7;
185 
186  /**********************************************************************/
187  /* Scale exponent to adjust for 2^M */
188  /**********************************************************************/
189 
190  // Upper 32 bits of all dT values and lower 32 bits of all dT values
191 
192  dT_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(MATHLIB_vperm_data_0_63,
193  c7x::as_uchar_vec(dTVals_8_15),
194  c7x::as_uchar_vec(dTVals_0_7)));
195  dT_0_31 = c7x::reinterpret<c7x::uint_vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
196  c7x::as_uchar_vec(dTVals_8_15),
197  c7x::as_uchar_vec(dTVals_0_7)));
198 
199  uN = (uN >> 4) << 20;
200  dTAdjusted_32_63 = dT_32_63 + uN;
201 
202  // Concatenate the adjusted upper 32 bits of dT values to the lower 32 bits, convert dT to float
203  dTVals_8_15 = c7x::reinterpret<c7x::double_vec>(
204  __permute_high_high(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63),
205  c7x::as_uchar_vec(dT_0_31)));
206  dTVals_0_7 = c7x::reinterpret<c7x::double_vec>(
207  __permute_low_low(MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63),
208  c7x::as_uchar_vec(dT_0_31)));
209  dT = c7x::reinterpret<vec>(__permute_even_even_int(
210  MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(__double_to_float(dTVals_8_15)),
211  c7x::as_uchar_vec(__double_to_float(dTVals_0_7))));
212 
213  outVec = dT * (1.0f + pol);
214 
215  /**********************************************************************/
216  /* Bounds checking */
217  /**********************************************************************/
218  // Read inVec again for ii optimization
219  inVec = c7x::strm_eng<0, vec>::get_adv();
220 
221  // < LnMin returns 0
222  // if (x < LnMin) {
223  // res = 0.0f;
224  // }
225  __vpred cmp_min = __cmp_lt_pred(inVec, LnMin);
226  outVec = __select(cmp_min, zero, outVec);
227 
228  // > LnMax returns MAX
229  // if (x > LnMax) {
230  // res = Max;
231  // }
232  __vpred cmp_max = __cmp_lt_pred(LnMax, inVec);
233  outVec = __select(cmp_max, Max, outVec);
234 
235  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
236  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
237  __vstore_pred(tmp, addr, outVec);
238  }
239 
241 }
242 // this method performs exponential computation of input vector
243 template <typename T> MATHLIB_STATUS MATHLIB_exp(size_t length, T *pSrc, T *pDst)
244 {
245  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
246 
247  // check for null pointers and non-zero length
248  status = MATHLIB_checkParams(length, pSrc, pDst);
249 
250  if (status == MATHLIB_SUCCESS) {
251  MATHLIB_exp_vector(length, pSrc, pDst);
252  }
253  return status;
254 }
255 
256 /******************************************************************************/
257 /* */
258 /* Explicit templatization for datatypes supported by MATHLIB_exp */
259 /* */
260 /******************************************************************************/
261 
262 // single precision
263 template MATHLIB_STATUS MATHLIB_exp<float>(size_t length, float *pSrc, float *pDst);
264 
265 // double precision
266 // template MATHLIB_STATUS MATHLIB_exp<double>(size_t length, double *pSrc0, double *pDst);
267 
268 /******************************************************************************/
269 /* */
270 /* C-interface wrapper functions */
271 /* */
272 /******************************************************************************/
273 
274 extern "C" {
275 
276 // single-precision wrapper
277 MATHLIB_STATUS MATHLIB_exp_sp(size_t length, float *pSrc, float *pDst)
278 {
279  MATHLIB_STATUS status = MATHLIB_exp(length, pSrc, pDst);
280  return status;
281 }
282 
283 // double-precision wrapper
284 // MATHLIB_STATUS MATHLIB_exp_dp(size_t length, double *pSrc, double *pDst)
285 // {
286 // MATHLIB_STATUS status = MATHLIB_exp(length, pSrc, pDst);
287 // return status;
288 // }
289 } // extern "C"
#define ELEMENT_COUNT(x)
Definition: MATHLIB_exp.cpp:34
static void MATHLIB_exp_vector(size_t length, T *pSrc, T *pDst)
Definition: MATHLIB_exp.cpp:53
template MATHLIB_STATUS MATHLIB_exp< float >(size_t length, float *pSrc, float *pDst)
MATHLIB_STATUS MATHLIB_exp_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_exp. Function accepts float pointers.
MATHLIB_STATUS MATHLIB_exp(size_t length, T *pSrc, T *pDst)
Performs the elementwise exponentialization of an input vector. Function can be overloaded with float...
#define MATHLIB_KTABLE_OFFSET
Definition: MATHLIB_lut.h:64
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:111
#define MATHLIB_JTABLE_OFFSET
Definition: MATHLIB_lut.h:65
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:86
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS