MATHLIB User Guide
MATHLIB_sqrt.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 #define ELEMENT_TYPE(x) typename c7x::element_type_of<x>::type
36 
37 /******************************************************************************/
38 /* */
39 /* Includes */
40 /* */
41 /******************************************************************************/
42 
43 #include "MATHLIB_sqrt_scalar.h"
44 #include "MATHLIB_types.h"
45 #include "MATHLIB_utility.h"
46 #include <cstddef>
47 #include <limits>
48 
49 /******************************************************************************/
50 /* */
51 /* MATHLIB_sqrt */
52 /* */
53 /******************************************************************************/
54 template <typename T> static inline void MATHLIB_sqrt_vector(size_t length, T *pSrc, T *pDst);
55 
56 
57 // this method performs sqrt computation of input vector
58 template <> inline void MATHLIB_sqrt_vector<float>(size_t length, float *pSrc, float *pDst)
59 {
60 
61  // variables
62  size_t numBlocks = 0; // compute loop's iteration count
63  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
64 
65  // derive c7x vector type from template typename
66  typedef typename c7x::make_full_vector<c7x::float_vec>::type vec;
67 
68  /* define type of elements vec vector holds as elemType */
69  typedef ELEMENT_TYPE(vec) elemType;
70 
71  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
72  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
73 
74  // status = MATHLIB_checkParams(length, pSrc, pDst);
75 
76  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
77 
78  // calculate compute loop's iteration counter
79  numBlocks = length / c7x::element_count_of<vec>::value;
80  remNumBlocks = length % c7x::element_count_of<vec>::value;
81  if (remNumBlocks) {
82  numBlocks++;
83  }
84 
85  // open SE0, SE1, and SA0 for reading and writing operands
86  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
87 
88  /***********************************************************************/
89  /* Create and assign values for constants employed on sqrt computation */
90  /***********************************************************************/
91 
92  vec half, OneP5, zero, maxValue;
93 
94  half = (vec) 0.5;
95  OneP5 = (vec) 1.5;
96  zero = (vec) 0.0;
97  maxValue = (vec) std::numeric_limits<elemType>::max();
98 
99  // compute loop to perform vector sqrt
100  for (size_t i = 0; i < numBlocks; i++) {
101  vec inVec = c7x::strm_eng<0, vec>::get_adv();
102 
103  /**********************************************************************/
104  /* Create variables employed on sqrt computation */
105  /**********************************************************************/
106 
107  vec y, p0, p1, d0;
108 
109  /**********************************************************************/
110  /* Sqrt computation */
111  /**********************************************************************/
112 
113  // Reciprocal square root calculation
114  p0 = __recip_sqrt(inVec);
115  d0 = p0 * inVec;
116  p1 = OneP5 - d0 * p0 * half;
117  y = inVec * p0 * p1;
118 
119  /**********************************************************************/
120  /* Bounds checking */
121  /**********************************************************************/
122 
123  // If input is <= 0, output defaults to 0
124  __vpred cmp_lezero = __cmp_le_pred(inVec, zero);
125  y = __select(cmp_lezero, zero, y);
126 
127  // If input is greater than maximum allowed by data type, then output is max of datatype
128  __vpred cmp_gtmax = __cmp_le_pred(maxValue, inVec);
129  vec outVec = __select(cmp_gtmax, maxValue, y);
130 
131  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
132  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
133  __vstore_pred(tmp, addr, outVec);
134  }
135 
137 }
138 
139 // this method performs sqrt computation of input vector
140 template <> inline void MATHLIB_sqrt_vector<double>(size_t length, double *pSrc, double *pDst)
141 {
142  // variables
143  size_t numBlocks = 0; // compute loop's iteration count
144  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
145 
146  // derive c7x vector type from template typename
147  typedef typename c7x::make_full_vector<c7x::double_vec>::type vec;
148 
149  /* define type of elements vec vector holds as elemType */
150  typedef ELEMENT_TYPE(vec) elemType;
151 
152  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
153  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
154 
155  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
156 
157  // calculate compute loop's iteration counter
158  numBlocks = length / c7x::element_count_of<vec>::value;
159  remNumBlocks = length % c7x::element_count_of<vec>::value;
160  if (remNumBlocks) {
161  numBlocks++;
162  }
163 
164  // open SE0, and SA0 for reading and writing operands
165  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
166 
167  /***********************************************************************/
168  /* Create and assign values for constants employed on sqrt computation */
169  /***********************************************************************/
170 
171  vec half, OneP5, zero, maxValue;
172 
173  half = (vec) 0.5;
174  OneP5 = (vec) 1.5;
175  zero = (vec) 0.0;
176  maxValue = (vec) std::numeric_limits<elemType>::max();
177 
178  // compute loop to perform vector sqrt
179  for (size_t i = 0; i < numBlocks; i++) {
180 
181  vec invec = c7x::strm_eng<0, vec>::get_adv();
182 
183  vec x = __recip_sqrt(invec);
184 
185  x = x * (OneP5 - (invec * x * x * half));
186  x = x * (OneP5 - (invec * x * x * half));
187  x = x * (OneP5 - (invec * x * x * half));
188 
189  vec y = invec * x;
190 
191  __vpred cond1 = __cmp_le_pred(invec, zero);
192  y = __select(cond1, zero, y);
193 
194  __vpred cond2 = __cmp_lt_pred(maxValue, invec);
195  y = __select(cond2, maxValue, y);
196 
197  __vpred temp = c7x::strm_agen<0, vec>::get_vpred();
198  vec * addr = c7x::strm_agen<0, vec>::get_adv(pDst);
199  __vstore_pred(temp, addr, y);
200  }
202 }
203 
204 template <typename T> MATHLIB_STATUS MATHLIB_sqrt(size_t length, T* pSrc, T* pDst)
205 {
206 
207  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
208 
209  // check for null pointers and non-zero length
210  status = MATHLIB_checkParams(length, pSrc, pDst);
211 
212  if (status == MATHLIB_SUCCESS) {
213 
214  if (length < 2) {
215  pDst[0] = MATHLIB_sqrt_scalar_ci<T>(pSrc[0]);
216  }
217  else {
218  MATHLIB_sqrt_vector<T>(length, pSrc, pDst);
219  }
220  }
221  return status;
222 }
223 
224 template MATHLIB_STATUS MATHLIB_sqrt<float>(size_t length, float* pSrc, float* pDst);
225 template MATHLIB_STATUS MATHLIB_sqrt<double>(size_t length, double* pSrc, double* pDst);
226 
227 
228 /******************************************************************************/
229 /* */
230 /* C-interface wrapper functions */
231 /* */
232 /******************************************************************************/
233 
234 extern "C" {
235 
236 // single-precision wrapper
237 MATHLIB_STATUS MATHLIB_sqrt_sp(size_t length, float *pSrc, float *pDst)
238 {
239  MATHLIB_STATUS status = MATHLIB_sqrt<float>(length, pSrc, pDst);
240  return status;
241 }
242 
243 // double-precision wrapper
244 MATHLIB_STATUS MATHLIB_sqrt_dp(size_t length, double *pSrc, double *pDst)
245 {
246  MATHLIB_STATUS status = MATHLIB_sqrt<double>(length, pSrc, pDst);
247  return status;
248 }
249 
250 
251 } // extern "C"
template MATHLIB_STATUS MATHLIB_sqrt< double >(size_t length, double *pSrc, double *pDst)
static void MATHLIB_sqrt_vector(size_t length, T *pSrc, T *pDst)
#define ELEMENT_TYPE(x)
template MATHLIB_STATUS MATHLIB_sqrt< float >(size_t length, float *pSrc, float *pDst)
void MATHLIB_sqrt_vector< double >(size_t length, double *pSrc, double *pDst)
void MATHLIB_sqrt_vector< float >(size_t length, float *pSrc, float *pDst)
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS MATHLIB_sqrt_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_sqrt. Function accepts float pointers.
MATHLIB_STATUS MATHLIB_sqrt_dp(size_t length, double *pSrc, double *pDst)
This function is the C interface for MATHLIB_sqrt. Function accepts double pointers.
MATHLIB_STATUS MATHLIB_sqrt(size_t length, T *pSrc, T *pDst)
Performs the elementwise square root of an input vectors. Function can be overloaded with float and d...
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS