MATHLIB User Guide
MATHLIB_rsqrt.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the
14  * distribution.
15  *
16  * Neither the name of Texas Instruments Incorporated nor the names of
17  * its contributors may be used to endorse or promote products derived
18  * from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  ******************************************************************************/
33 
34 #define ELEMENT_COUNT(x) c7x::element_count_of<x>::value
35 #define ELEMENT_TYPE(x) typename c7x::element_type_of<x>::type
36 
37 /******************************************************************************/
38 /* */
39 /* Includes */
40 /* */
41 /******************************************************************************/
42 
43 #include "MATHLIB_types.h"
44 #include "MATHLIB_utility.h"
45 #include <cstddef>
46 
47 /******************************************************************************/
48 /* */
49 /* MATHLIB_rsqrt */
50 /* */
51 /******************************************************************************/
52 
53 // this method performs rsqrt computation of input vector
54 template <typename T> MATHLIB_STATUS MATHLIB_rsqrt(size_t length, T *pSrc, T *pDst)
55 {
56 
57  // variables
58  MATHLIB_STATUS status = MATHLIB_SUCCESS; // return function status
59  size_t numBlocks = 0; // compute loop's iteration count
60  size_t remNumBlocks = 0; // when numBlocks is not a multiple of SIMD width
61 
62  // derive c7x vector type from template typename
63  typedef typename c7x::make_full_vector<T>::type vec;
64 
65  /* define type of elements vec vector holds as elemType */
66  typedef ELEMENT_TYPE(vec) elemType;
67 
68  __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
69  __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
70 
71  status = MATHLIB_checkParams(length, pSrc, pDst);
72 
73  if (status == MATHLIB_SUCCESS) {
74  MATHLIB_SE0SA01DSequentialInit(&se0Params, &sa0Params, length, pSrc, pDst);
75 
76  // calculate compute loop's iteration counter
77  numBlocks = length / c7x::element_count_of<vec>::value;
78  remNumBlocks = length % c7x::element_count_of<vec>::value;
79  if (remNumBlocks) {
80  numBlocks++;
81  }
82 
83  // open SE0, SE1, and SA0 for reading and writing operands
84  MATHLIB_SE0SA0Open(&se0Params, &sa0Params, pSrc);
85 
86  /***********************************************************************/
87  /* Create and assign values for constants employed on rsqrt computation */
88  /***********************************************************************/
89 
90  vec half, OneP5, small, inf;
91 
92  half = (vec) 0.5;
93  OneP5 = (vec) 1.5;
94  small = (vec) 1.17549435e-38f;
95  inf = (vec) 0x7F800000u;
96 
97  // compute loop to perform vector rsqrt
98  for (size_t i = 0; i < numBlocks; i++) {
99  vec inVec = c7x::strm_eng<0, vec>::get_adv();
100 
101  /**********************************************************************/
102  /* Create variables employed on rsqrt computation */
103  /**********************************************************************/
104 
105  vec x0, x1, x2, x3, outVec;
106 
107  /**********************************************************************/
108  /* rsqrt computation */
109  /**********************************************************************/
110 
111  // Reciprocal square root calculation
112  x0 = __recip_sqrt(inVec);
113  x1 = x0 * inVec;
114  x3 = OneP5 - (x1 * x0 * half);
115  x1 = x0 * x3;
116  x2 = x1 * (OneP5 - (inVec * x1 * x1 * half));
117 
118  /**********************************************************************/
119  /* Bounds checking */
120  /**********************************************************************/
121 
122  // If input is <= 0, output defaults to 0
123  __vpred cmp_lezero = __cmp_lt_pred(inVec, small);
124  outVec = __select(cmp_lezero, inf, x2);
125 
126  __vpred tmp = c7x::strm_agen<0, vec>::get_vpred();
127  vec *addr = c7x::strm_agen<0, vec>::get_adv(pDst);
128  __vstore_pred(tmp, addr, outVec);
129  }
130 
132  }
133 
134  return status;
135 }
136 
137 /******************************************************************************/
138 /* */
139 /* Explicit templatization for datatypes supported by MATHLIB_rsqrt */
140 /* */
141 /******************************************************************************/
142 
143 // single precision
144 template MATHLIB_STATUS MATHLIB_rsqrt<float>(size_t length, float *pSrc, float *pDst);
145 
146 /******************************************************************************/
147 /* */
148 /* C-interface wrapper functions */
149 /* */
150 /******************************************************************************/
151 
152 extern "C" {
153 
154 // single-precision wrapper
155 MATHLIB_STATUS MATHLIB_rsqrt_sp(size_t length, float *pSrc, float *pDst)
156 {
157  MATHLIB_STATUS status = MATHLIB_rsqrt(length, pSrc, pDst);
158  return status;
159 }
160 
161 } // extern "C"
template MATHLIB_STATUS MATHLIB_rsqrt< float >(size_t length, float *pSrc, float *pDst)
#define ELEMENT_TYPE(x)
MATHLIB_STATUS MATHLIB_rsqrt(size_t length, T *pSrc, T *pDst)
Performs the elementwise reciprocal square root of an input vectors. Function can be overloaded with ...
MATHLIB_STATUS MATHLIB_rsqrt_sp(size_t length, float *pSrc, float *pDst)
This function is the C interface for MATHLIB_rsqrt. Function accepts float pointers.
static void MATHLIB_SE0SA0Close()
This method performs SE0 and SA0 close.
static void MATHLIB_SE0SA01DSequentialInit(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, size_t length, T *pSrc, T *pDst)
static MATHLIB_STATUS MATHLIB_checkParams(size_t length, T *pSrc, T *pDst)
This method performs parameter checks for MATHLIB function.
static void MATHLIB_SE0SA0Open(__SE_TEMPLATE_v1 *se0Params, __SA_TEMPLATE_v1 *sa0Params, T *pSrc)
This method performs SE0 and SA0 open.
MATHLIB_STATUS_NAME
The enumeration of all status codes.
@ MATHLIB_SUCCESS