MMALIB User Guide
MMALIB_utility.h
Go to the documentation of this file.
1 #ifndef COMMON_MMALIB_UTILITY_H_
2 #define COMMON_MMALIB_UTILITY_H_ 1
3 
4 /******************************************************************************/
8 /* Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  *
17  * Redistributions in binary form must reproduce the above copyright
18  * notice, this list of conditions and the following disclaimer in the
19  * documentation and/or other materials provided with the
20  * distribution.
21  *
22  * Neither the name of Texas Instruments Incorporated nor the names of
23  * its contributors may be used to endorse or promote products derived
24  * from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  *
38  ******************************************************************************/
39 
40 
41 /*******************************************************************************
42  *
43  * INCLUDES
44  *
45  ******************************************************************************/
46 
47 #include <float.h> // for max float, double values
48 #include <limits.h> // for min, max integer values
49 #include <math.h>
50 
51 #include "MMALIB_bufParams.h"
52 #include "MMALIB_types.h"
53 
54 #include "c71/MMALIB_utility.h"
55 #if MMALIB_DEBUGPRINT >= 1
56 #include "c71/MMALIB_debug.h"
57 #endif
58 
59 #include "c7524/MMALIB_utility.h"
60 
61 /*******************************************************************************
62  *
63  * EXTERNAL VARIABLES
64  *
65  ******************************************************************************/
66 #ifdef __cplusplus
67 extern "C" {
68 #endif /* __cplusplus */
69 extern uint64_t beg_count; /* Begin cycle count for profiling */
70 extern uint64_t end_count; /* End cycle count for profiling */
71 extern uint64_t overhead; /* Cycle profiling overhead */
72 #ifdef __cplusplus
73 }
74 #endif /* __cplusplus */
75 
76 /*******************************************************************************
77  *
78  * Inline functions
79  *
80  ******************************************************************************/
81 
82 /*******************************************************************************
83  *
84  * Arithmetic with 128-bit signed integers
85  *
86  ******************************************************************************/
87 static inline void MMALIB_UTIL_mult(int64_t *ph, int64_t *pl, int64_t a, int64_t b);
88 
89 static inline void MMALIB_UTIL_mult(int64_t *ph , // result
90  int64_t *pl ,
91  int64_t a , // left operand
92  int64_t b // right operand
93  ){
94  //sum += A[k + m*K] * B[n + k*N];
95  *pl = a * b ;
96 
97  // sign extend the product
98  *ph = ((((uint64_t)*pl) & 0x8000000000000000ULL) != 0ULL)?(int64_t)0xffffffffffffffffULL:(int64_t)0ULL ;
99 }
100 
101 
102 #ifdef __cplusplus
103 
104 /*******************************************************************************
105 *
106 * Definition and arithmetic for MMALIB_int128_t class
107 *
108 ******************************************************************************/
109 
110 // Define a 128-bit integer class to allow natural-c implementations of MMALIB
111 // 32-bit input/output functions to be templated. The class is implemented in
112 // a header file for easy sharing. All member functions, including constructors
113 // are declared inline for two reasons: (1) performance and (2) necessary for
114 // implementing the class in a multiple-inclusion header file.
115 
116 class MMALIB_int128_t
117 {
118 public:
119  int64_t hi;
120  int64_t lo;
121  MMALIB_int128_t(int64_t h, int64_t l); // constructor for both high and low specified
122  MMALIB_int128_t(int64_t l); // constructor for just low specified (sign extends to high)
123  MMALIB_int128_t(); // constructor for neither field specified
124  MMALIB_int128_t operator+ (const MMALIB_int128_t&) const; // operator +
125  MMALIB_int128_t operator>> (const int8_t&) const; // operator >>
126 };
127 
128 // define constructor
129 inline MMALIB_int128_t::MMALIB_int128_t(int64_t h, int64_t l)
130 {
131  hi = h;
132  lo = l;
133 }
134 
135 // define constructor
136 inline MMALIB_int128_t::MMALIB_int128_t(int64_t l)
137 {
138  // sign extend l
139  hi = (((uint64_t)l & 0x8000000000000000ULL) != 0LL)?(int64_t)0xffffffffffffffffULL:(int64_t)0x0000000000000000ULL;
140  lo = l;
141 }
142 
143 // define constructor
144 inline MMALIB_int128_t::MMALIB_int128_t()
145 {
146  hi = 0x0000000000000000LL;
147  lo = 0x0000000000000000LL;
148 }
149 
150 static inline void MMALIB_UTIL_shiftRight128(
151  uint64_t *rh , // result
152  uint64_t *rl ,
153  uint64_t ah , // operand
154  uint64_t al ,
155  int32_t sh , // shift amount
156  int32_t s ) // signed
157 {
158  uint64_t h ;
159  uint64_t l ;
160  int32_t i;
161 
162  h = ah ;
163  l = al ;
164  for(i = 0 ; i < sh ; i++ ) {
165  l = (uint64_t) __shift_right(l, (uint32_t)1) | (uint64_t) __shift_left(h, (uint32_t)63) ;
166  h = (uint64_t) __shift_right(h, (uint32_t)1) | ((s!=0)?(h&0x8000000000000000ULL):0ULL) ;
167  }
168 
169  *rh = h ;
170  *rl = l ;
171 }
172 
173 static inline void MMALIB_UTIL_Add128(
174  uint64_t *rh , // result
175  uint64_t *rl ,
176  uint64_t ah , // left operand
177  uint64_t al ,
178  uint64_t bh , // right operand
179  uint64_t bl )
180 {
181  // break up the operands into 4 32b chunks packed into 64b uints
182  uint64_t all ;
183  uint64_t alh ;
184  uint64_t ahl ;
185  uint64_t ahh ;
186  uint64_t bll ;
187  uint64_t blh ;
188  uint64_t bhl ;
189  uint64_t bhh ;
190  uint64_t s0 ;
191  uint64_t s1 ;
192  uint64_t s2 ;
193  uint64_t s3 ;
194  uint64_t sh ;
195  uint64_t sl ;
196 
197  all = (uint64_t) __shift_right(al, (uint32_t) 0) & 0x0ffffffffULL ;
198  alh = (uint64_t) __shift_right(al, (uint32_t)32) & 0x0ffffffffULL ;
199  ahl = (uint64_t) __shift_right(ah, (uint32_t) 0) & 0x0ffffffffULL ;
200  ahh = (uint64_t) __shift_right(ah, (uint32_t)32) & 0x0ffffffffULL ;
201 
202  bll = (uint64_t) __shift_right(bl, (uint32_t) 0) & 0x0ffffffffULL ;
203  blh = (uint64_t) __shift_right(bl, (uint32_t)32) & 0x0ffffffffULL ;
204  bhl = (uint64_t) __shift_right(bh, (uint32_t) 0) & 0x0ffffffffULL ;
205  bhh = (uint64_t) __shift_right(bh, (uint32_t)32) & 0x0ffffffffULL ;
206 
207  // the adds
208  s0 = all + bll ;
209  s1 = alh + blh + __shift_right(s0, (uint32_t)32) ;
210  s2 = ahl + bhl + __shift_right(s1, (uint32_t)32) ;
211  s3 = ahh + bhh + __shift_right(s2, (uint32_t)32) ;
212 
213  // pack the results
214  sl = (uint64_t) __shift_left(s1, (uint32_t)32) | (s0 & 0x0ffffffffULL) ;
215  sh = (uint64_t) __shift_left(s3, (uint32_t)32) | (s2 & 0x0ffffffffULL) ;
216 
217  *rl = sl ;
218  *rh = sh ;
219 }
220 
221 // define overloaded + (plus) operator
222 inline MMALIB_int128_t MMALIB_int128_t::operator+ (const MMALIB_int128_t& b) const
223 {
224  MMALIB_int128_t result;
225 
226  MMALIB_UTIL_Add128((uint64_t *)&(result.hi), (uint64_t *)&(result.lo), this->hi, this->lo, b.hi, b.lo);
227 
228  return result;
229 }
230 
231 // define overloaded >> (bit shift right) operator
232 inline MMALIB_int128_t MMALIB_int128_t::operator>> (const int8_t& shift) const
233 {
234  MMALIB_int128_t result;
235 
236  MMALIB_UTIL_shiftRight128((uint64_t *)&result.hi, (uint64_t *)&result.lo, this->hi, this->lo, (int32_t)shift, 1);
237  return result;
238 }
239 
240 /*******************************************************************************
241  *
242  * We need special utility to do negation because range of values is from
243  * -2^(bit-width-1) to 2^(bit-width-1)-1. For example, with int16_t, the
244  * range is from -32768 to 32767 - that is, -0x8000 to 0x7FFF. Now, if we want
245  * to evaluate negation of -32768 and we try simply -(-32768) and store the
246  * result in int16_t, we would get -32768 itself. Instead, we want to get 32767.
247  *
248  ******************************************************************************/
249 
250 static inline int16_t MMALIB_UTIL_negate(int16_t a)
251 {
252  int16_t result;
253 
254  result = (a == -32768) ? 32767 : -a;
255  return result;
256 }
257 
258 static inline int32_t MMALIB_UTIL_negate(int32_t a)
259 {
260  int32_t result;
261 
262  result = (a == -2147483648) ? 2147483647 : -a;
263  return result;
264 }
265 
266 /*******************************************************************************
267  *
268  * Inline multiply with higher bit-width output type
269  *
270  ******************************************************************************/
271 
272 static inline int32_t MMALIB_UTIL_mult(uint8_t a, uint8_t b)
273 {
274  return (int16_t)a * (int16_t)b;
275 }
276 
277 static inline int32_t MMALIB_UTIL_mult(int8_t a, int8_t b)
278 {
279  return (int16_t)a * (int16_t)b;
280 }
281 
282 static inline int32_t MMALIB_UTIL_mult(uint8_t a, int8_t b)
283 {
284  return (int16_t)a * (int16_t)b;
285 }
286 
287 static inline int32_t MMALIB_UTIL_mult(int8_t a, uint8_t b)
288 {
289  return (int16_t)a * (int16_t)b;
290 }
291 
292 
293 static inline int64_t MMALIB_UTIL_mult(uint16_t a, uint16_t b)
294 {
295  return (int32_t)a * (int32_t)b;
296 }
297 
298 static inline int64_t MMALIB_UTIL_mult(int16_t a, int16_t b)
299 {
300  return (int32_t)a * (int32_t)b;
301 }
302 
303 static inline int64_t MMALIB_UTIL_mult(uint16_t a, int16_t b)
304 {
305  return (int32_t)a * (int32_t)b;
306 }
307 
308 static inline int64_t MMALIB_UTIL_mult(int16_t a, uint16_t b)
309 {
310  return (int32_t)a * (int32_t)b;
311 }
312 
313 
314 // for floats when performing multiplication no need to promote or demote data type
316 {
317  return a * b;
318 }
319 
320 
321 static inline MMALIB_int128_t MMALIB_UTIL_mult(uint32_t a, uint32_t b)
322 {
323  MMALIB_int128_t result(0,0);
324 
325  result.lo = (int64_t)a * (int64_t)b ;
326  // sign extend the product
327  result.hi = (int64_t)0ULL ;
328 
329  return result;
330 }
331 
332 static inline MMALIB_int128_t MMALIB_UTIL_mult(int32_t a, int32_t b)
333 {
334  MMALIB_int128_t result(0,0);
335 
336  result.lo = (int64_t)a * (int64_t)b ;
337  // sign extend the product
338  result.hi = (((uint64_t)result.lo & 0x8000000000000000ULL) != 0LL)?(int64_t)0xffffffffffffffffULL:(int64_t)0ULL ;
339 
340  return result;
341 }
342 
343 static inline MMALIB_int128_t MMALIB_UTIL_mult(uint32_t a, int32_t b)
344 {
345  MMALIB_int128_t result(0,0);
346 
347  result.lo = (int64_t)a * (int64_t)b ;
348  // sign extend the product
349  result.hi = (((uint64_t)result.lo & 0x8000000000000000ULL) != 0LL)?(int64_t)0xffffffffffffffffULL:(int64_t)0ULL ;
350 
351  return result;
352 }
353 
354 static inline MMALIB_int128_t MMALIB_UTIL_mult(int32_t a, uint32_t b)
355 {
356  MMALIB_int128_t result(0,0);
357 
358  result.lo = (int64_t)a * (int64_t)b ;
359  // sign extend the product
360  result.hi = (((uint64_t)result.lo & 0x8000000000000000ULL) != 0LL)?(int64_t)0xffffffffffffffffULL:(int64_t)0ULL ;
361 
362  return result;
363 }
364 
365 /*******************************************************************************
366  *
367  * Inline saturate with ReLU operation
368  *
369  ******************************************************************************/
370 
371 static inline void MMALIB_UTIL_saturate_relu(int32_t x, int8_t *y)
372 {
373  if (x > 0x7F) {
374  *y = 0x7F;
375  } else if (x < 0) {
376  *y = 0;
377  } else {
378  *y = (int8_t)x;
379  }
380 
381  return;
382 }
383 
384 static inline void MMALIB_UTIL_saturate_relu(int32_t x, uint8_t *y)
385 {
386  if (x > 0xFF) {
387  *y = 0xFF;
388  } else if (x < 0) {
389  *y = 0;
390  } else {
391  *y = (uint8_t)x;
392  }
393 
394  return;
395 }
396 
397 static inline void MMALIB_UTIL_saturate_relu(uint32_t x, uint8_t *y)
398 {
399  if (x > 0xFF) {
400  *y = 0xFF;
401  } else {
402  *y = (uint8_t)x;
403  }
404 
405  return;
406 }
407 
408 static inline void MMALIB_UTIL_saturate_relu(int64_t x, int16_t *y)
409 {
410  if (x > 0x7FFF) {
411  *y = 0x7FFF;
412  } else if (x < 0x0000) {
413  *y = 0x0000;
414  } else {
415  *y = (int16_t)x;
416  }
417 
418  return;
419 }
420 
421 static inline void MMALIB_UTIL_saturate_relu(int64_t x, uint16_t *y)
422 {
423  if (x > 0xFFFF) {
424  *y = 0xFFFF;
425  } else if (x < 0x0000) {
426  *y = 0x0000;
427  } else {
428  *y = (uint16_t)x;
429  }
430 
431  return;
432 }
433 
434 static inline void MMALIB_UTIL_saturate_relu(uint64_t x, uint16_t *y)
435 {
436  if (x > 0xFFFF) {
437  *y = 0xFFFF;
438  } else {
439  *y = (uint16_t)x;
440  }
441 
442  return;
443 }
444 
445 /*******************************************************************************
446  *
447  * Inline shift, round and ReLU operation
448  *
449  ******************************************************************************/
450 
451 template <typename dataType, typename returnType>
452 static inline returnType MMALIB_UTIL_shiftRoundAndReLU(dataType inVal, uint8_t shift){
453  returnType result;
454 
455  if(shift == 0){
456  // remove the rounding, which doesn't make sense with no shift but causes C code problems
457  MMALIB_UTIL_saturate_relu(inVal, &result);
458  } else {
459  // round and shift
460  // Method requires right shift of signed integers be an arithmetic shift, but right
461  // shift >> on signed integer types is implementation dependent on whether the shift is
462  // arithmetic or logical. There's no simple way in C to specify the shift type as arithmetic.
463  // Instead, we use the __shift_right intrinsic, which is defined to be arithmetic shift.
464  dataType temp;
465  temp = __shift_right(inVal, (shift - 1)) + 1;
466  temp = __shift_right(temp, 1);
467  MMALIB_UTIL_saturate_relu(temp, &result);
468  }
469 
470  return result;
471 }
472 
473 template int8_t MMALIB_UTIL_shiftRoundAndReLU<int32_t, int8_t> (int32_t inVal, uint8_t shift);
474 template int16_t MMALIB_UTIL_shiftRoundAndReLU<int64_t, int16_t> (int64_t inVal, uint8_t shift);
475 // added for unsigned C matrix values inside MMA
476 //template uint8_t MMALIB_UTIL_shiftRoundAndReLU<uint32_t, uint8_t> (uint32_t inVal, uint8_t shift);
477 //template uint16_t MMALIB_UTIL_shiftRoundAndReLU<uint64_t, uint16_t> (uint64_t inVal, uint8_t shift);
478 
479 template <>
480 inline uint8_t MMALIB_UTIL_shiftRoundAndReLU<int32_t, uint8_t>(int32_t inVal, uint8_t shift){
481  uint8_t result;
482 
483  if(shift == 0){
484  // remove the rounding, which doesn't make sense with no shift but causes C code problems
485  MMALIB_UTIL_saturate_relu(inVal, &result);
486  } else {
487  // round and shift
488  // Method requires right shift of signed integers be an arithmetic shift, but right
489  // shift >> on signed integer types is implementation dependent on whether the shift is
490  // arithmetic or logical. There's no simple way in C to specify the shift type as arithmetic.
491  // Instead, we use the __shift_right intrinsic, which is defined to be arithmetic shift.
492  int32_t temp;
493  temp = __shift_right( inVal, (shift - 1) ) + 1;
494  temp = __shift_right(temp, 1);
495  MMALIB_UTIL_saturate_relu(temp, &result);
496 
497  }
498 
499  return result;
500 }
501 
502 template <>
503 inline uint8_t MMALIB_UTIL_shiftRoundAndReLU<uint32_t, uint8_t>(uint32_t inVal, uint8_t shift){
504  uint8_t result;
505 
506  if(shift == 0){
507  // remove the rounding, which doesn't make sense with no shift but causes C code problems
508  MMALIB_UTIL_saturate_relu(inVal, &result);
509  } else {
510  uint32_t temp;
511  //Subtracting two unsigned values of the same size will result in an unsigned value.
512  //If the first operand is less than the second the result will be arithmetically in correct.
513  //But if the size of the unsigned types is less than that of an unsigned int, C/C++ will promote the types to
514  //signed int before subtracting resulting in an correct result. In either case,
515  //there is no indication of an error.
516  uint32_t shift32_t = (uint32_t) shift;
517  temp = (inVal >> (shift32_t - (uint32_t)1) ) + 1;
518  temp = temp >> 1;
519  MMALIB_UTIL_saturate_relu(temp, &result);
520  }
521 
522  return result;
523 }
524 
525 template <>
526 inline uint16_t MMALIB_UTIL_shiftRoundAndReLU<int64_t, uint16_t>(int64_t inVal, uint8_t shift){
527  uint16_t result;
528 
529  if(shift == 0){
530  // remove the rounding, which doesn't make sense with no shift but causes C code problems
531  MMALIB_UTIL_saturate_relu(inVal, &result);
532  } else {
533  // round and shift
534  // Method requires right shift of signed integers be an arithmetic shift, but right
535  // shift >> on signed integer types is implementation dependent on whether the shift is
536  // arithmetic or logical. There's no simple way in C to specify the shift type as arithmetic.
537  // Instead, we use the __shift_right intrinsic, which is defined to be arithmetic shift.
538  int64_t temp;
539  temp = __shift_right( inVal, (shift - 1) ) + 1;
540  temp = __shift_right(temp, 1);
541  MMALIB_UTIL_saturate_relu(temp, &result);
542  }
543 
544  return result;
545 }
546 
547 template <>
548 inline uint16_t MMALIB_UTIL_shiftRoundAndReLU<uint64_t, uint16_t>(uint64_t inVal, uint8_t shift){
549  uint16_t result;
550 
551  if(shift == 0){
552  // remove the rounding, which doesn't make sense with no shift but causes C code problems
553  MMALIB_UTIL_saturate_relu(inVal, &result);
554  } else {
555  uint64_t temp;
556  uint32_t shift32_t = (uint32_t) shift;
557  temp = (inVal >> (shift32_t - (uint32_t)1) ) + 1;
558  temp = (temp >> 1);
559  MMALIB_UTIL_saturate_relu(temp, &result);
560  }
561 
562  return result;
563 }
564 
565 
566 /*******************************************************************************
567  *
568  * Inline saturate operation
569  *
570  ******************************************************************************/
571 
572 static inline int8_t MMALIB_UTIL_saturate(int32_t x)
573 {
574  int8_t retVal;
575  if (x > 0x7F) {
576  retVal = 0x7F;
577  } else if (x < -0x80) {
578  retVal = -0x80;
579  } else {
580  retVal = (int8_t)x;
581  }
582  return retVal;
583 }
584 
585 static inline uint8_t MMALIB_UTIL_saturate(uint32_t x)
586 {
587  uint8_t retVal;
588  if (x > 0xFF) {
589  retVal = 0xFF;
590  } else {
591  retVal = (uint8_t)x;
592  }
593  return retVal;
594 }
595 
596 static inline int16_t MMALIB_UTIL_saturate(int64_t x)
597 {
598  int16_t retVal;
599  if (x > 0x7FFF) {
600  retVal = 0x7FFF;
601  } else if (x < -0x8000) {
602  retVal = -0x8000;
603  } else {
604  retVal = (int16_t)x;
605  }
606  return retVal;
607 }
608 
609 static inline uint16_t MMALIB_UTIL_saturate(uint64_t x)
610 {
611  uint16_t retVal;
612  if (x > 0xFFFF) {
613  retVal = 0xFFFF;
614  } else {
615  retVal = (uint16_t)x;
616  }
617  return retVal;
618 }
619 
620 static inline int32_t MMALIB_UTIL_saturate(int64_t xh, int64_t xl)
621 {
622  int32_t retVal;
623  //printf("%s: xh = %" PRIx64 ", xl = %" PRIx64 "\n", __FUNCTION__, xh, xl);
624  // if negative
625  if(((uint64_t)xh & 0x8000000000000000ULL) != 0LL){
626  if( ((~(uint64_t)xh & 0xFFFFFFFFFFFFFFFFULL) != 0LL) || ((~(uint64_t)xl & 0xFFFFFFFF80000000ULL) != 0LL)){
627  retVal = ((int32_t)0x80000000U);
628  } else {
629  retVal = (int32_t)xl;
630  }
631  } else if (((uint64_t)xl & 0xFFFFFFFF80000000ULL) != 0LL){
632  //(xl > 0x000000007FFFFFFFLL){ // positive and saturated
633  retVal = ((int32_t)0x7FFFFFFFU);
634  } else {
635  retVal = (int32_t)xl;
636  }
637  return retVal;
638 }
639 
640 static inline int32_t MMALIB_UTIL_saturate(MMALIB_int128_t x)
641 {
642  return MMALIB_UTIL_saturate(x.hi, x.lo);
643 }
644 
645 /*******************************************************************************
646  *
647  * Inline shift and round operation
648  *
649  ******************************************************************************/
650 
651 template <typename dataType, typename returnType>
652 inline returnType MMALIB_UTIL_shiftAndRound(dataType inVal, uint8_t shift){
653  returnType result;
654 
655  if(shift == 0){
656  // remove the rounding, which doesn't make sense with no shift but causes C code problems
657  result = MMALIB_UTIL_saturate(inVal);
658  } else {
659  // round and shift
660  dataType temp;
661  temp = (__shift_right(inVal, (shift - 1)) + 1);
662  temp = __shift_right(temp , 1);
663  result = MMALIB_UTIL_saturate(temp);
664  }
665 
666  return result;
667 }
668 
669 /*******************************************************************************
670  *
671  * Specialized shift and round operation for floating points
672  * Basically just returns input as it is
673  *
674  ******************************************************************************/
675 template <> inline MMALIB_F32 MMALIB_UTIL_shiftAndRound <MMALIB_F32, MMALIB_F32> (MMALIB_F32 inVal, uint8_t shift)
676 {
677  // DEBUG
678  // MMALIB_DEBUGPRINTFN(1, "Using empty shift and round function fot floats...%s", '\n');
679  return inVal;
680 }
681 
682 template MMALIB_F32 MMALIB_UTIL_shiftAndRound<MMALIB_F32, MMALIB_F32> (MMALIB_F32 inVal, uint8_t shift);
683 template int8_t MMALIB_UTIL_shiftAndRound<int32_t, int8_t> (int32_t inVal, uint8_t shift);
684 template int16_t MMALIB_UTIL_shiftAndRound<int64_t, int16_t> (int64_t inVal, uint8_t shift);
685 
686 
687 template <>
688 inline uint8_t MMALIB_UTIL_shiftAndRound(uint32_t inVal, uint8_t shift){
689  uint8_t result;
690 
691  if(shift == 0){
692  // remove the rounding, which doesn't make sense with no shift but causes C code problems
693  result = MMALIB_UTIL_saturate(inVal);
694  } else {
695  // round and shift
696  uint32_t temp;
697  uint32_t shift32_t = (uint32_t) shift;
698  temp = (inVal >> (shift32_t - (uint32_t)1) ) + 1;
699  temp = (temp >> 1);
700  result = MMALIB_UTIL_saturate(temp);
701  }
702 
703  return result;
704 }
705 
706 template <>
707 inline uint16_t MMALIB_UTIL_shiftAndRound(uint64_t inVal, uint8_t shift){
708  uint16_t result;
709 
710  if(shift == 0){
711  // remove the rounding, which doesn't make sense with no shift but causes C code problems
712  result = MMALIB_UTIL_saturate(inVal);
713  } else {
714  // round and shift
715  uint64_t temp;
716  uint32_t shift32_t = (uint32_t) shift;
717  temp = (inVal >> (shift32_t - (uint32_t)1) ) + 1;
718  temp = (temp >> 1);
719  result = MMALIB_UTIL_saturate(temp);
720  }
721 
722  return result;
723 }
724 
725 // MISRA-C prohibits using >> on signed integers because it is implementation dependent on whether
726 // that shift is arithmetic or logical. However, for MMALIB_int128_t, this code implements the shift in software
727 // and ensures that it is arithmetic. To avoid the MISRA-C violation, we use the function version of the shift
728 // rather than the >> operator.
729 template <>
730 inline int32_t MMALIB_UTIL_shiftAndRound<MMALIB_int128_t, int32_t>(MMALIB_int128_t inVal, uint8_t shift){
731  int32_t result;
732 
733  if(shift == 0){
734  // remove the rounding, which doesn't make sense with no shift but causes C code problems
735  result = MMALIB_UTIL_saturate(inVal);
736  } else {
737  // round and shift
738  //result = MMALIB_UTIL_saturate(((inVal >> ((uint8_t)(shift - 1))) + 1) >> 1);
739  MMALIB_int128_t temp;
740  // temp = inVal >> (shift - 1)
741  MMALIB_UTIL_shiftRight128((uint64_t *)&temp.hi, (uint64_t *)&temp.lo, inVal.hi, inVal.lo, (int32_t)(shift - 1), 1);
742  temp = temp + 1;
743  // temp = temp >> 1
744  MMALIB_UTIL_shiftRight128((uint64_t *)&temp.hi, (uint64_t *)&temp.lo, temp.hi, temp.lo, 1, 1);
745  result = MMALIB_UTIL_saturate(temp);
746  }
747 
748  return result;
749 }
750 
751 
752 /*******************************************************************************
753  *
754  * Convert a double-precision floating point number to 16-bit integer
755  *
756  ******************************************************************************/
757 
758 template <typename returnType>
759 static inline returnType MMALIB_UTIL_typeConv_i64f_oxX(MMALIB_D64 x)
760 {
761  int64_t xLocal, maxValue;
762  returnType returnValue;
763 
764  /* Set maxValue to the maximumum possible value for the returnType */
765 
766  // original code
767  // maxValue = (1 << (sizeof(returnType)*8-2)) - 1;
768  // maxValue += (1 << (sizeof(returnType)*8-2));
769  maxValue = ((int64_t)( (uint32_t)1 << ((uint32_t)(sizeof(returnType)*8-2)))) - 1;
770  maxValue += (int64_t)( (uint32_t)1 << ((uint32_t)(sizeof(returnType)*8-2)));
771 
772  xLocal = (int64_t)floor(0.5 + x); /* Explicit rounding to integer */
773  if (xLocal >= maxValue) {
774  returnValue = (returnType)maxValue;
775  } else if (xLocal <= -maxValue-1) {
776  returnValue = (returnType)(-maxValue-1);
777  } else {
778  returnValue = (returnType)xLocal;
779  }
780  return returnValue;
781 }
782 
783 template int16_t MMALIB_UTIL_typeConv_i64f_oxX<int16_t>(MMALIB_D64 x);
784 template int32_t MMALIB_UTIL_typeConv_i64f_oxX<int32_t>(MMALIB_D64 x);
785 
786 
787 /*******************************************************************************
788  *
789  * Evaluate cos function, and apply appropriate scale factor
790  *
791  ******************************************************************************/
792 
793 template <typename returnType>
794 static returnType MMALIB_UTIL_cos_i64f_oxX(MMALIB_D64 x,
795  MMALIB_D64 scaleFactor)
796 {
797  return MMALIB_UTIL_typeConv_i64f_oxX<returnType>(scaleFactor*cos(x));
798 }
799 
800 template int16_t MMALIB_UTIL_cos_i64f_oxX<int16_t>(MMALIB_D64 x, MMALIB_D64 scaleFactor);
801 template int32_t MMALIB_UTIL_cos_i64f_oxX<int32_t>(MMALIB_D64 x, MMALIB_D64 scaleFactor);
802 
803 
804 /*******************************************************************************
805  *
806  * Evaluate sin function, and apply appropriate scale factor
807  *
808  ******************************************************************************/
809 
810 template <typename returnType>
811 static inline returnType MMALIB_UTIL_sin_i64f_oxX(MMALIB_D64 x,
812  MMALIB_D64 scaleFactor)
813 {
814  return MMALIB_UTIL_typeConv_i64f_oxX<returnType>(scaleFactor*sin(x));
815 }
816 
817 template int16_t MMALIB_UTIL_sin_i64f_oxX<int16_t>(MMALIB_D64 x, MMALIB_D64 scaleFactor);
818 template int32_t MMALIB_UTIL_sin_i64f_oxX<int32_t>(MMALIB_D64 x, MMALIB_D64 scaleFactor);
819 
820 
821 /*******************************************************************************
822  *
823  * ILUT
824  *
825  ******************************************************************************/
826 
827 // "base" template covers the 8-bit variants
828 template <typename dataType>
829 inline dataType MMALIB_UTIL_iLUT(dataType inVal, const void *pLutValues){
830  const dataType *iLUT = (const dataType*)pLutValues;
831  uint8_t mask = 0xffU;
832 
833  dataType result;
834  uint8_t index = (uint8_t)inVal & mask;
835  result = iLUT[index];
836 
837  return result;
838 }
839 
840 template <>
841 inline int16_t MMALIB_UTIL_iLUT<int16_t>(int16_t inVal, const void *pLutValues){
842  const int16_t *iLUT = (const int16_t*)pLutValues;
843  uint16_t mask = 0x00ff;
844 
845  int16_t result;
846  uint8_t index = (int16_t) ((uint16_t)inVal & mask);
847  result = iLUT[index];
848 
849  return result;
850 }
851 
852 template <>
853 inline uint16_t MMALIB_UTIL_iLUT<uint16_t>(uint16_t inVal, const void *pLutValues){
854  const uint16_t *iLUT = (const uint16_t*)pLutValues;
855  uint16_t mask = 0x00ff;
856 
857  uint16_t result;
858  uint8_t index = inVal & mask;
859  result = iLUT[index];
860 
861  return result;
862 }
863 
864 #endif
865 
866 #endif
File to hold buffer parameter related info for MMALIB.
File to hold common structure, enums, macros and functions for MMALIB.
uint64_t end_count
static void MMALIB_UTIL_mult(int64_t *ph, int64_t *pl, int64_t a, int64_t b)
uint64_t overhead
uint64_t beg_count
float MMALIB_F32
Single precision floating point.
Definition: MMALIB_types.h:149
double MMALIB_D64
Double precision floating point.
Definition: MMALIB_types.h:148