MATHLIB User Guide
MATHLIB_exp10.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_exp10_scalar.h"
43 #include "MATHLIB_ilut.h"
44 #include "MATHLIB_lut.h"
45 #include "MATHLIB_permute.h"
46 #include "MATHLIB_types.h"
47 #include "MATHLIB_utility.h"
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_exp10 */
52 /* */
53 /******************************************************************************/
54 
55 template <typename T>
56 static inline void MATHLIB_exp10_cond(__SE_TEMPLATE_v1 *restrict se0Params,
57  __SA_TEMPLATE_v1 *restrict sa0Params,
58  size_t length,
59  T *restrict pSrc0,
60  T *restrict pDst)
61 {
62  // variables
63  size_t numBlocks = 0; // compute loop's iteration count
64  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
65 
66  // derive c7x vector type from template typename
67  typedef typename c7x::make_full_vector<T>::type vec;
68 
69  // calculate compute loop's iteration counter
70  numBlocks = length / c7x::element_count_of<vec>::value;
71  remNumBlocks = length % c7x::element_count_of<vec>::value;
72  if (remNumBlocks) {
73  numBlocks++;
74  }
75 
76  // open SE0, SE1, and SA0 for reading and writing operands
77  MATHLIB_SE0SE1SA0Open(se0Params, sa0Params, pDst, pSrc0);
78 
79  /**********************************************************************/
80  /* Create and assign values for constants employed on pow computation */
81  /**********************************************************************/
82 
83  vec k10e, LnMin, LnMax, Max, zero;
84  c7x::uint_vec inVec_min;
85 
86  k10e = (vec) 2.302585093f;
87  LnMin = (vec) -87.33654475f;
88  LnMax = (vec) 88.72283905f;
89  Max = (vec) 3.402823466E+38f;
90  zero = (vec) 0.0f;
91  inVec_min = (c7x::uint_vec) 114u;
92 
93  // compute loop to perform vector pow
94  for (size_t i = 0; i < numBlocks; i++) {
95  vec outVec = c7x::strm_eng<0, vec>::get_adv();
96  vec inVec = c7x::strm_eng<1, vec>::get_adv();
97 
98  /**********************************************************************/
99  /* Create variables employed on exp computation */
100  /**********************************************************************/
101  vec Ye;
102  c7x::uint_vec inVec_small;
103 
104  /**********************************************************************/
105  /* Bounds checking */
106  /**********************************************************************/
107  Ye = k10e * inVec;
108  // Early exit for small input
109  // if (_extu(_ftoi(Ye), 1u, 24u) < 114u) {
110  // res = 1.0f + Ye;
111  // }
112  inVec_small = ((c7x::as_uint_vec(Ye) << 1U) >> 24U);
113  __vpred cmp_inVec = __cmp_gt_pred(inVec_min, inVec_small);
114  outVec = __select(cmp_inVec, 1.0f + Ye, outVec);
115 
116  // < LnMin returns 0
117  // if (x < LnMin) {
118  // res = 0.0f;
119  // }
120  __vpred cmp_min = __cmp_lt_pred(Ye, LnMin);
121  outVec = __select(cmp_min, zero, outVec);
122 
123  // > LnMax returns MAX
124  // if (x > LnMax) {
125  // res = Max;
126  // }
127  __vpred cmp_max = __cmp_lt_pred(LnMax, Ye);
128  outVec = __select(cmp_max, Max, outVec);
129 
130  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
131  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
132  __vstore_pred(tmp, addr, outVec);
133  }
135 }
136 
137 template <typename T> static inline void MATHLIB_exp10_vector(size_t length, T *restrict pSrc, T *restrict pDst)
138 {
139  size_t numBlocks = 0; // compute loop's iteration count
140  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
141 
142  // derive c7x vector type from template typename
143  typedef typename c7x::make_full_vector<T>::type vec;
144 
145  // Compile-time decision: float_vec => int_vec and double_vec=> long_vec
146  typedef
147  typename std::conditional<ELEMENT_COUNT(c7x::float_vec) == ELEMENT_COUNT(vec), c7x::int_vec, c7x::long_vec>::type
148  vec_type;
149 
150  typedef typename std::conditional<ELEMENT_COUNT(c7x::float_vec) == ELEMENT_COUNT(vec), c7x::uint_vec,
151  c7x::ulong_vec>::type uvec_type;
152 
153  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
154  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
155 
156  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
157 
158  // calculate compute loop's iteration counter
159  numBlocks = length / c7x::element_count_of<vec>::value;
160  remNumBlocks = length % c7x::element_count_of<vec>::value;
161  if (remNumBlocks) {
162  numBlocks++;
163  }
164 
165  // open SE0, SE1, and SA0 for reading and writing operands
166  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
167 
168  /**********************************************************************/
169  /* Create and assign values for constants employed on exp10 computation */
170  /**********************************************************************/
171 
172  vec log2_base_x16, half, negativeHalf, C0, C1, C2, P1, P2, k10e;
173  uvec_type mask;
174 
175  log2_base_x16 = (vec) (16.0f * 3.321928095f);
176  half = (vec) 0.5f;
177  negativeHalf = (vec) -0.5f;
178  C0 = (vec) 0.1667361910f;
179  C1 = (vec) 0.4999999651f;
180  C2 = (vec) 0.9999998881f;
181  P1 = (vec) 0.04331970214844f;
182  P2 = (vec) 1.99663646e-6f;
183  k10e = (vec) 2.302585093f;
184  mask = (uvec_type) 0x3u;
185 
186  // compute loop to perform vector exp10
187  for (size_t i = 0; i < numBlocks; i++) {
188  vec inVec = c7x::strm_eng<0, vec>::get_adv();
189 
190  /**********************************************************************/
191  /* Create variables employed on exp10 computation */
192  /**********************************************************************/
193 
194  vec pol, r, r2, r3, outVec, Nf, absNf, Ye;
195  uvec_type J, K, uN, dTAdjusted_32_63, dT_32_63, dT_0_31;
196  vec_type N, minusN;
197  c7x::double_vec KVals_8_15, KVals_0_7, JVals_8_15, JVals_0_7, dTVals_8_15, dTVals_0_7, pol_0_7, pol_8_15,
198  outVec_0_7, outVec_8_15;
199  c7x::uint_vec upperBitsK, lowerBitsK, upperBitsJ, lowerBitsJ;
200 
201  Ye = k10e * inVec;
202 
203  // Get N such that |N - inVec*16/ln(2)| is minimized
204  Nf = inVec * log2_base_x16;
205  absNf = Nf + half;
206  N = c7x::convert<vec_type>(absNf);
207  minusN = N - 1;
208 
209  // if ((x * log2_base_x16) < - half) {
210  // N--;
211  // }
212  __vpred cmp_N = __cmp_lt_pred(Nf, negativeHalf);
213  N = __select(cmp_N, minusN, N);
214 
215  /**********************************************************************/
216  /* Calculate Taylor series approximation for exp10 */
217  /**********************************************************************/
218  r = (Ye - (P1 * __int_to_float(N))) - (P2 * __int_to_float(N));
219  // Taylor series approximation
220  r2 = r * r;
221  r3 = r2 * r;
222  pol = (r * C2) + ((r3 * C0) + (r2 * C1));
223 
224  /**********************************************************************/
225  /* Get index of LUT and 2^M values */
226  /**********************************************************************/
227 
228  // Create vectors of LUT indices
229  uN = c7x::convert<uvec_type>(N);
230  K = ((uN << 28u) >> 30) + MATHLIB_KTABLE_OFFSET;
231  J = (uN & mask) + MATHLIB_JTABLE_OFFSET;
232 
233  // Read values from LUT and convert and store as doubles in split vectors
234  upperBitsK = MATHLIB_LUTReadUpperBits(K);
235  lowerBitsK = MATHLIB_LUTReadLowerBits(K);
236  upperBitsJ = MATHLIB_LUTReadUpperBits(J);
237  lowerBitsJ = MATHLIB_LUTReadLowerBits(J);
238 
239  KVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
240  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
241  KVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
242  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsK), c7x::as_uchar_vec(lowerBitsK)));
243  JVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
244  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
245  JVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
246  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsJ), c7x::as_uchar_vec(lowerBitsJ)));
247 
248  // Multiply LUT values
249  dTVals_8_15 = KVals_8_15 * JVals_8_15;
250  dTVals_0_7 = KVals_0_7 * JVals_0_7;
251 
252  /**********************************************************************/
253  /* Scale exponent to adjust for 2^M */
254  /**********************************************************************/
255 
256  // Upper 32 bits of all dT values and lower 32 bits of all dT values
257  dT_32_63 = c7x::as_uint_vec(__permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15),
258  c7x::as_uchar_vec(dTVals_0_7)));
259  dT_0_31 = c7x::as_uint_vec(__permute_even_even_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(dTVals_8_15),
260  c7x::as_uchar_vec(dTVals_0_7)));
261 
262  uN = (uN >> 4) << 20;
263  dTAdjusted_32_63 = dT_32_63 + uN;
264 
265  // Concatenate the adjusted upper 32 bits of dT values to the lower 32 bits, convert dT to float
266  dTVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
267  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
268  dTVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
269  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(dTAdjusted_32_63), c7x::as_uchar_vec(dT_0_31)));
270 
271  // TODO: adjust calculation so that DT and POL are doubles then out is float
272 
273  pol_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
274  c7x::as_uchar_vec(__high_float_to_double(pol)),
275  c7x::as_uchar_vec(__low_float_to_double(pol))));
276  pol_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
277  c7x::as_uchar_vec(__high_float_to_double(pol)),
278  c7x::as_uchar_vec(__low_float_to_double(pol))));
279 
280  outVec_0_7 = dTVals_0_7 * (1.0f + pol_0_7);
281  outVec_8_15 = dTVals_8_15 * (1.0f + pol_8_15);
282 
283  outVec = c7x::reinterpret<vec>(__permute_even_even_int(MATHLIB_vperm_data_0_63,
284  c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
285  c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
286 
287  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
288  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
289  __vstore_pred(tmp, addr, outVec);
290  }
291 
293  MATHLIB_exp10_cond(&se0Params, &sa0Params, length, pSrc, pDst);
294 }
295 // this method performs exponential 10 computation of input vector
296 template <typename T> MATHLIB_STATUS MATHLIB_exp10(size_t length, T *restrict pSrc, T *restrict pDst)
297 {
298  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
299 
300  // check for null pointers and non-zero length
301  status = MATHLIB_checkParams(length, pSrc, pDst);
302 
303  if (status == MATHLIB_SUCCESS) {
304  MATHLIB_exp10_vector(length, pSrc, pDst);
305  }
306  return status;
307 }
308 
309 /******************************************************************************/
310 /* */
311 /* Explicit templatization for datatypes supported by MATHLIB_exp10 */
312 /* */
313 /******************************************************************************/
314 
315 // single precision
316 template MATHLIB_STATUS MATHLIB_exp10<float>(size_t length, float *pSrc, float *pDst);
317 
318 // double precision
319 // template MATHLIB_STATUS MATHLIB_exp10<double>(size_t length, double *pSrc0, double *pDst);
320 
321 /******************************************************************************/
322 /* */
323 /* C-interface wrapper functions */
324 /* */
325 /******************************************************************************/
326 
327 extern "C" {
328 
329 // single-precision wrapper
330 MATHLIB_STATUS MATHLIB_exp10_sp(size_t length, float *pSrc, float *pDst)
331 {
332  MATHLIB_STATUS status = MATHLIB_exp10(length, pSrc, pDst);
333  return status;
334 }
335 
336 // double-precision wrapper
337 // MATHLIB_STATUS MATHLIB_exp10_dp(size_t length, double *pSrc, double *pDst)
338 // {
339 // MATHLIB_STATUS status = MATHLIB_exp10(length, pSrc, pDst);
340 // return status;
341 // }
342 } // extern "C"
static void MATHLIB_exp10_vector(size_t length, T *restrict pSrc, T *restrict pDst)
MATHLIB_STATUS MATHLIB_exp10(size_t length, T *restrict pSrc, T *restrict pDst)
#define ELEMENT_COUNT(x)
static void MATHLIB_exp10_cond(__SE_TEMPLATE_v1 *restrict se0Params, __SA_TEMPLATE_v1 *restrict sa0Params, size_t length, T *restrict pSrc0, T *restrict pDst)
template MATHLIB_STATUS MATHLIB_exp10< float >(size_t length, float *pSrc, float *pDst)
MATHLIB_STATUS MATHLIB_exp10_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_exp10. Function accepts float pointers.
#define MATHLIB_KTABLE_OFFSET
Definition: MATHLIB_lut.h:64
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:111
#define MATHLIB_JTABLE_OFFSET
Definition: MATHLIB_lut.h:65
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:86
static void MATHLIB_SE0SE1SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc0, T *pSrc1)
This method performs SE0, SE1, and SA0 open.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SE1SA0Close()
This method performs SE0, SE1, and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS