MATHLIB User Guide
MATHLIB_cos.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 
36 /******************************************************************************/
37 /* */
38 /* Includes */
39 /* */
40 /******************************************************************************/
41 
42 #include "MATHLIB_types.h"
43 #include "MATHLIB_utility.h"
44 
45 /******************************************************************************/
46 /* */
47 /* MATHLIB_cos */
48 /* */
49 /******************************************************************************/
50 
51 // this method performs cosine coputation of input vector
52 template <typename T> MATHLIB_STATUS MATHLIB_cos(size_t length, T *pSrc, T *pDst);
53 
54 template <> MATHLIB_STATUS MATHLIB_cos<float>(size_t length, float *pSrc, float *pDst)
55 {
56 
57  // variables
58  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
59  size_t numBlocks = 0; // compute loop's iteration count
60  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
61 
62  // derive c7x vector type from template typename
63  typedef typename c7x::make_full_vector<float>::type vec;
64 
65  // Compile-time decision: float_vec => int_vec and double_vec=> long_vec
66  typedef
67  typename std::conditional<ELEMENT_COUNT(c7x::float_vec) == ELEMENT_COUNT(vec), c7x::int_vec, c7x::long_vec>::type
68  vec_type;
69 
70  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
71  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
72 
73  // check for null pointers and non-zero length
74  status = MATHLIB_checkParams(length, pSrc, pDst);
75 
76  if (status == MATHLIB_SUCCESS) {
77 
78  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
79 
80  // calculate compute loop's iteration counter
81  numBlocks = length / c7x::element_count_of<vec>::value;
82  remNumBlocks = length % c7x::element_count_of<vec>::value;
83  if (remNumBlocks) {
84  numBlocks++;
85  }
86 
87  // open SE0, SE1, and SA0 for reading and writing operands
88  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
89 
90  /**********************************************************************/
91  /* Create and assign values for constants employed on cos computation */
92  /**********************************************************************/
93 
94  vec InvPI, HalfPI, One, MAX, Zero, s1, s2, s3, s4, C1, C2;
95 
96  InvPI = (vec) 0.318309886183791;
97  HalfPI = (vec) 1.5707963268;
98  One = (vec) 1.0;
99  MAX = (vec) 1048576.0;
100 
101  Zero = (vec) 0.0;
102  s1 = (vec) -1.666665668e-1;
103  s2 = (vec) 8.333025139e-3;
104  s3 = (vec) -1.980741872e-4;
105  s4 = (vec) 2.601903036e-6;
106  C1 = (vec) 3.140625;
107  C2 = (vec) 9.67653589793e-4;
108 
109  // compute loop to perform vector cos
110  for (size_t i = 0; i < numBlocks; i++) {
111  vec inVec = c7x::strm_eng<0, vec>::get_adv();
112 
113  /**********************************************************************/
114  /* Create and assign values for variables employed on cos computation */
115  /**********************************************************************/
116 
117  vec Sign, X, Y, Z, F, G, R;
118 
119  vec_type int_one = (vec_type) 1;
120  vec_type N;
121 
122  vec negativeOne = (vec) -1;
123  vec negativeR;
124  Sign = One;
125 
126  // if (Y > MAX) {
127  // Y = HalfPI;
128  // }
129  Y = __abs(inVec) + HalfPI;
130  __vpred cmp_gt = __cmp_lt_pred((vec) MAX, Y);
131  Y = __select(cmp_gt, HalfPI, Y);
132 
133  // X = Y * (1/PI)
134  X = Y * InvPI;
135 
136  N = __float_to_int(X);
137  Z = c7x::convert<vec>(N);
138 
139  /**********************************************************************/
140  /* Sign checking for quadrant 3 or 4 */
141  /**********************************************************************/
142 
143  // if ((N % 2) != 0) {
144  // Sign = -Sign;
145  // }
146  vec_type andN = N & int_one;
147  vec convert_andN = c7x::convert<vec>(andN);
148  __vpred cmp_mod = __cmp_le_pred(convert_andN, Zero);
149  vec Sign_T = __select(cmp_mod, Sign, negativeOne);
150 
151  F = (Y - (Z * C1)) - (Z * C2);
152  R = F;
153 
154  // if (F < Zero) {
155  // R = -R;
156  // }
157  negativeR = -R;
158  __vpred cmp_F = __cmp_lt_pred(F, Zero);
159  R = __select(cmp_F, R, negativeR);
160 
161  G = F * F;
162 
163  __vpred cmp_RMin = __cmp_lt_pred(R, Zero);
164 
165  vec outputRMin = R * Sign_T;
166  R = ((((((s4 * G) + s3) * G) + s2) * G) + s1) * G;
167  vec outVec = ((F + (F * R)) * Sign_T);
168 
169  outVec = __select(cmp_RMin, outVec, outputRMin);
170 
171  // outVec.print();
172 
173  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
174  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
175  __vstore_pred(tmp, addr, outVec);
176  }
177 
179  }
180 
181  return status;
182 }
183 
184 template <> MATHLIB_STATUS MATHLIB_cos<double>(size_t length, double *pSrc, double *pDst)
185 {
186 
187  // variables
188  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
189  size_t numBlocks = 0; // compute loop's iteration count
190  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
191 
192  // derive c7x vector type from template typename
193  typedef typename c7x::make_full_vector<c7x::double_vec>::type vec;
194  typedef typename c7x::make_full_vector<c7x::int_vec>::type vec_type;
195 
196  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
197  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
198 
199  // check for null pointers and non-zero length
200  status = MATHLIB_checkParams(length, pSrc, pDst);
201 
202  if (status == MATHLIB_SUCCESS) {
203 
204  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
205 
206  // calculate compute loop's iteration counter
207  numBlocks = length / c7x::element_count_of<vec>::value;
208  remNumBlocks = length % c7x::element_count_of<vec>::value;
209  if (remNumBlocks) {
210  numBlocks++;
211  }
212 
213  // open SE0, SE1, and SA0 for reading and writing operands
214  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
215 
216  /**********************************************************************/
217  /* Create and assign values for constants employed on cos computation */
218  /**********************************************************************/
219 
220  vec InvPI, HalfPI, Zero, negativeOne, MAX, Sign, r1, r2, r3, r4, r5, r6, r7, r8, C1, C2;
221 
222  C1 = (vec) 3.1416015625;
223  C2 = (vec) -8.908910206761537356617e-6;
224  r8 = (vec) 2.7204790957888846175e-15;
225  r7 = (vec) -7.6429178068910467734e-13;
226  r6 = (vec) 1.6058936490371589114e-10;
227  r5 = (vec) -2.5052106798274584544e-8;
228  r4 = (vec) 2.7557319210152756119e-6;
229  r3 = (vec) -1.9841269841201840457e-4;
230  r2 = (vec) 8.3333333333331650314e-3;
231  r1 = (vec) -1.6666666666666665052e-1;
232  MAX = (vec) 1.073741824e+09;
233  HalfPI = (vec) 1.57079632679489661923;
234  InvPI = (vec) 0.31830988618379067154;
235  Sign = (vec) 1.0;
236  Zero = (vec) 0;
237  negativeOne = (vec) -1.0;
238 
239  vec_type int_one = (vec_type) 1;
240 
241  vec X, Z, F, F2, G, R;
242  vec_type N;
243 
244  // compute loop to perform vector cos
245  for (size_t i = 0; i < numBlocks; i++) {
246  vec inVec = c7x::strm_eng<0, vec>::get_adv();
247 
248  F = __abs(inVec) + HalfPI;
249  __vpred cmp_gt = __cmp_lt_pred(MAX, F);
250  F = __select(cmp_gt, HalfPI, F);
251 
252  X = F * InvPI;
253  N = __double_to_int(X);
254  Z = __low_int_to_double(N);
255 
256  vec_type andN = N & int_one;
257  vec convert_andN = __low_int_to_double(andN);
258  __vpred cmp_mod = __cmp_le_pred(convert_andN, Zero);
259  vec sign_vec = __select(cmp_mod, Sign, negativeOne);
260 
261  F = (F - (Z * C1)) - (Z * C2);
262  R = __abs(F);
263  F2 = F * F;
264  G = F2 * F2;
265  R = ((((((G * r8) + r6) * G) + r4) * G) + r2) * G;
266  X = ((((((G * r7) + r5) * G) + r3) * G) + r1) * F2;
267  R = R + X;
268  G = (F + (F * R)) * sign_vec;
269 
270  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
271  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
272  __vstore_pred(tmp, addr, G);
273  }
274 
276  }
277 
278  return status;
279 }
280 
281 /******************************************************************************/
282 /* */
283 /* C-interface wrapper functions */
284 /* */
285 /******************************************************************************/
286 
287 extern "C" {
288 
289 // single-precision wrapper
290 MATHLIB_STATUS MATHLIB_cos_sp(size_t length, float *pSrc, float *pDst)
291 {
292  MATHLIB_STATUS status = MATHLIB_cos<float>(length, pSrc, pDst);
293  return status;
294 }
295 
296 // double-precision wrapper
297 MATHLIB_STATUS MATHLIB_cos_dp(size_t length, double *pSrc, double *pDst)
298 {
299  MATHLIB_STATUS status = MATHLIB_cos<double>(length, pSrc, pDst);
300  return status;
301 }
302 
303 
304 } // extern "C"
MATHLIB_STATUS MATHLIB_cos< float >(size_t length, float *pSrc, float *pDst)
Definition: MATHLIB_cos.cpp:54
MATHLIB_STATUS MATHLIB_cos< double >(size_t length, double *pSrc, double *pDst)
#define ELEMENT_COUNT(x)
Definition: MATHLIB_cos.cpp:34
MATHLIB_STATUS MATHLIB_cos(size_t length, T *pSrc, T *pDst)
MATHLIB_STATUS MATHLIB_cos_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_cos. Function accepts float pointers.
MATHLIB_STATUS MATHLIB_cos_dp(size_t length, double *pSrc, double *pDst)
This function is the C interface for MATHLIB_cos. Function accepts double pointers.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS