MATHLIB User Guide
MATHLIB_log2.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_ilut.h"
43 #include "MATHLIB_log2_scalar.h"
44 #include "MATHLIB_lut.h"
45 #include "MATHLIB_permute.h"
46 #include "MATHLIB_types.h"
47 #include "MATHLIB_utility.h"
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_log2 */
52 /* */
53 /******************************************************************************/
54 // this method performs log2 computation of input vector
55 template <typename T> static inline void MATHLIB_log2_vector(size_t length, T *restrict pSrc, T *restrict pDst)
56 {
57  // variables
58  size_t numBlocks = 0; // compute loop's iteration count
59  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
60 
61  // derive c7x vector type from template typename
62  typedef typename c7x::make_full_vector<float>::type vec;
63 
64  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
65  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
66 
67  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
68 
69  // calculate compute loop's iteration counter
70  numBlocks = length / c7x::element_count_of<vec>::value;
71  remNumBlocks = length % c7x::element_count_of<vec>::value;
72  if (remNumBlocks) {
73  numBlocks++;
74  }
75 
76  // open SE0, SE1, and SA0 for reading and writing operands
77  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
78 
79  /***********************************************************************/
80  /* Create and assign values for constants employed on log2 computation */
81  /***********************************************************************/
82 
83  vec C1, C2, C3, C4, C5, eMax, outVecMin, outVecMax;
84  c7x::double_vec ln2, base;
85  c7x::uint_vec zero;
86  zero = (c7x::uint_vec) 0;
87 
88  ln2 = (c7x::double_vec) 0.693147180559945;
89  base = (c7x::double_vec) 1.4426950408890;
90  C1 = (vec) -0.2302894f;
91  C2 = (vec) 0.1908169f;
92  C3 = (vec) -0.2505905f;
93  C4 = (vec) 0.3333164f;
94  C5 = (vec) -0.5000002f;
95  eMax = (vec) 3.402823466e+38f;
96  outVecMin = (vec) 0xFF800000u;
97  outVecMax = (vec) 1024.0f;
98 
99  // compute loop to perform vector log2
100  for (size_t i = 0; i < numBlocks; i++) {
101  vec inVec = c7x::strm_eng<0, vec>::get_adv();
102 
103  /**********************************************************************/
104  /* Create variables employed on log2 computation */
105  /**********************************************************************/
106  vec pol, r1, r2, r3, r4;
107  c7x::double_vec inVecVals_odd, inVecVals_even, inVecVals_oddReciprocal, inVecVals_evenReciprocal,
108  inVecReciprocalApprox_8_15, inVecReciprocalApprox_0_7, inVecVals_8_15, inVecVals_0_7, rVals_0_7, rVals_8_15,
109  TVals_8_15, TVals_0_7, NVals_odd, NVals_even, NVals_0_7, NVals_8_15, outVec_8_15, outVec_0_7;
110  c7x::uint_vec inVecReciprocal_32_63, inVecReciprocalClr_32_63, inVecReciprocalApprox_32_63, indexT;
111  c7x::int_vec N;
112  vec outVec;
113 
114  /**********************************************************************/
115  /* Calculate Taylor series approximation for log2 */
116  /**********************************************************************/
117 
118  // Split vectors to compute r with double precision
119  inVecVals_odd = __high_float_to_double(inVec);
120  inVecVals_even = __low_float_to_double(inVec);
121  inVecVals_oddReciprocal = __recip(inVecVals_odd);
122  inVecVals_evenReciprocal = __recip(inVecVals_even);
123 
124  // Create floating point reciprocal approximation
125  // Upper 32 bits of all inVec reciprocal values
126  inVecReciprocal_32_63 = c7x::reinterpret<c7x::uint_vec>(__permute_odd_odd_int(
127  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecVals_oddReciprocal),
128  c7x::as_uchar_vec(inVecVals_evenReciprocal)));
129 
130  // Clear bits 0-16 inclusive
131  inVecReciprocalClr_32_63 = inVecReciprocal_32_63 & 0xFFFE0000u;
132 
133  // Concatenate cleared bit reciprocal with zero bits
134  inVecReciprocalApprox_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
135  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63),
136  c7x::as_uchar_vec(zero)));
137  inVecReciprocalApprox_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
138  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(inVecReciprocalClr_32_63),
139  c7x::as_uchar_vec(zero)));
140 
141  // Split inVec into two vectors with double precision
142  inVecVals_0_7 = c7x::reinterpret<c7x::double_vec>(
143  __permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd),
144  c7x::as_uchar_vec(inVecVals_even)));
145  inVecVals_8_15 = c7x::reinterpret<c7x::double_vec>(
146  __permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63, c7x::as_uchar_vec(inVecVals_odd),
147  c7x::as_uchar_vec(inVecVals_even)));
148 
149  // Calculate r in double precision
150  rVals_0_7 = (inVecReciprocalApprox_0_7 * inVecVals_0_7) - 1.0;
151  rVals_8_15 = (inVecReciprocalApprox_8_15 * inVecVals_8_15) - 1.0;
152 
153  // Convert r to float, compute r to the power of 2, 3, 4
154  r1 = c7x::reinterpret<vec>(__permute_even_even_int(
155  MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(__double_to_float(rVals_8_15)),
156  c7x::as_uchar_vec(__double_to_float(rVals_0_7))));
157  r2 = r1 * r1;
158  r3 = r1 * r2;
159  r4 = r2 * r2;
160 
161  // Compute Taylor series polynomial
162  pol = (C5 * r2) + ((C4 * r3) + ((((C2 * r1) + C3) + (C1 * r2)) * r4));
163  pol = pol * __double_to_float(base);
164 
165  /**********************************************************************/
166  /* Calculate N */
167  /**********************************************************************/
168 
169  // Upper 32 bits of all inVec reciprocal approximation values
170  inVecReciprocalApprox_32_63 = c7x::reinterpret<c7x::uint_vec>(
171  __permute_odd_odd_int(MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(inVecReciprocalApprox_8_15),
172  c7x::as_uchar_vec(inVecReciprocalApprox_0_7)));
173 
174  N = c7x::convert<c7x::int_vec>(((inVecReciprocalApprox_32_63 << 1) >> 21) - 1023);
175 
176  // Covert N to double precision for later calculation with LUT values
177  NVals_odd = __high_int_to_double(N);
178  NVals_even = __low_int_to_double(N);
179  NVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(MATHLIB_vperm_data_dp_interweave_0_63,
180  c7x::as_uchar_vec(NVals_odd),
181  c7x::as_uchar_vec(NVals_even)));
182  NVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(MATHLIB_vperm_data_dp_interweave_0_63,
183  c7x::as_uchar_vec(NVals_odd),
184  c7x::as_uchar_vec(NVals_even)));
185 
186  /**********************************************************************/
187  /* Determine LUT values */
188  /**********************************************************************/
189 
190  // Calculate LUT index
191  indexT = (((inVecReciprocalApprox_32_63 << 12) >> 29) + MATHLIB_LOGTABLE_OFFSET);
192 
193  c7x::uint_vec upperBitsIndexT = MATHLIB_LUTReadUpperBits(indexT);
194  c7x::uint_vec lowerBitsIndexT = MATHLIB_LUTReadLowerBits(indexT);
195 
196  // Read from LUT and reconstruct double values split into two vectors
197  TVals_8_15 = c7x::reinterpret<c7x::double_vec>(__permute_high_high(
198  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
199  TVals_0_7 = c7x::reinterpret<c7x::double_vec>(__permute_low_low(
200  MATHLIB_vperm_data_interweave_0_63, c7x::as_uchar_vec(upperBitsIndexT), c7x::as_uchar_vec(lowerBitsIndexT)));
201 
202  // Calculate an adjusted T
203  TVals_8_15 = (TVals_8_15 - (ln2 * NVals_8_15)) * base;
204  TVals_0_7 = (TVals_0_7 - (ln2 * NVals_0_7)) * base;
205 
206  /**********************************************************************/
207  /* Calculate output with adjusted LUT and Taylor series values */
208  /**********************************************************************/
209 
210  // Add LUT values and Taylor series values
211  // TODO: Multiple by base
212  outVec_0_7 = (rVals_0_7 * base) + TVals_0_7;
213  outVec_8_15 = (rVals_8_15 * base) + TVals_8_15;
214 
215  // Combine output vector into one floating point result
216  outVec = c7x::reinterpret<vec>(__permute_even_even_int(
217  MATHLIB_vperm_data_0_63, c7x::as_uchar_vec(__double_to_float(outVec_8_15)),
218  c7x::as_uchar_vec(__double_to_float(outVec_0_7))));
219  outVec = outVec + pol;
220 
221  /**********************************************************************/
222  /* Bounds checking */
223  /**********************************************************************/
224 
225  // if (a <= 0.0f) {
226  // res = _itof(0xFF800000u);
227  // }
228  __vpred cmp_min = __cmp_le_pred(inVec, c7x::convert<vec>(zero));
229  outVec = __select(cmp_min, outVecMin, outVec);
230 
231  // if (a > MAXe) {
232  // res = 709.7827f;
233  // }
234  __vpred cmp_max = __cmp_lt_pred(eMax, inVec);
235  outVec = __select(cmp_max, outVecMax, outVec);
236 
237  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
238  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
239  __vstore_pred(tmp, addr, outVec);
240  }
241 
243 }
244 
245 // this method performs exponential computation of input vector
246 template <typename T> MATHLIB_STATUS MATHLIB_log2(size_t length, T *restrict pSrc, T *restrict pDst)
247 {
248  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
249 
250  // check for null pointers and non-zero length
251  status = MATHLIB_checkParams(length, pSrc, pDst);
252 
253  if (status == MATHLIB_SUCCESS) {
254  MATHLIB_log2_vector(length, pSrc, pDst);
255  }
256  return status;
257 }
258 
259 /******************************************************************************/
260 /* */
261 /* Explicit templatization for datatypes supported by MATHLIB_log2 */
262 /* */
263 /******************************************************************************/
264 
265 // single precision
266 template MATHLIB_STATUS MATHLIB_log2<float>(size_t length, float *restrict pSrc, float *restrict pDst);
267 
268 /******************************************************************************/
269 /* */
270 /* C-interface wrapper functions */
271 /* */
272 /******************************************************************************/
273 
274 extern "C" {
275 
276 // single-precision wrapper
277 MATHLIB_STATUS MATHLIB_log2_sp(size_t length, float *restrict pSrc, float *restrict pDst)
278 {
279  MATHLIB_STATUS status = MATHLIB_log2(length, pSrc, pDst);
280  return status;
281 }
282 
283 } // extern "C"
template MATHLIB_STATUS MATHLIB_log2< float >(size_t length, float *restrict pSrc, float *restrict pDst)
MATHLIB_STATUS MATHLIB_log2_sp(size_t length, float *restrict pSrc, float *restrict pDst)
static void MATHLIB_log2_vector(size_t length, T *restrict pSrc, T *restrict pDst)
MATHLIB_STATUS MATHLIB_log2(size_t length, T *restrict pSrc, T *restrict pDst)
#define MATHLIB_LOGTABLE_OFFSET
Definition: MATHLIB_lut.h:66
static c7x::uint_vec MATHLIB_LUTReadLowerBits(vecType vecOffset)
This method reads bits 31-0 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:111
static c7x::uint_vec MATHLIB_LUTReadUpperBits(vecType vecOffset)
This method reads bits 63-32 of LUT value at vecOffset.
Definition: MATHLIB_lut.h:86
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS