Performance data was obtained on the J721E EVM. EVM warm cycle obtained by profiling the kernel's compute code execution after a cold run of the same code. Please refer to the kernel's documentation for more information about the parameters in the tables shown below.
DSPLIB kernels
This section contains tables that depict the expected performance numbers for the DSPLIB kernels.
DSPLIB Vector Operations
Kernel | Data Type | Data Size | EVM Cycles | Cycles/Sample
|
DSPLIB_add | float | 256 | 90 | 0.35
|
| float | 512 | 99 | 0.19
|
| float | 1024 | 131 | 0.13
|
| float | 2048 | 195 | 0.10
|
| float | 10240 | 721 | 0.07
|
| double | 256 | 106 | 0.41
|
| double | 512 | 131 | 0.26
|
| double | 1024 | 195 | 0.19
|
| double | 2048 | 337 | 0.16
|
| double | 10240 | 1361 | 0.13
|
| int32_t | 256 | 88 | 0.34
|
| int32_t | 512 | 97 | 0.19
|
| int32_t | 1024 | 129 | 0.13
|
| uint32_t | 2048 | 200 | 0.10
|
| uint32_t | 10240 | 719 | 0.07
|
| int16_t | 256 | 80 | 0.31
|
| int16_t | 512 | 81 | 0.16
|
| int16_t | 1024 | 97 | 0.09
|
| uint16_t | 2048 | 136 | 0.07
|
| uint16_t | 10240 | 399 | 0.04
|
| int8_t | 256 | 76 | 0.30
|
| int8_t | 512 | 73 | 0.14
|
| int8_t | 1024 | 81 | 0.08
|
| uint8_t | 2048 | 104 | 0.05
|
| uint8_t | 10240 | 225 | 0.02
|
| | | |
|
DSPLIB_addConstant | float | 256 | 106 | 0.41
|
| float | 512 | 118 | 0.23
|
| float | 1024 | 150 | 0.15
|
| float | 2048 | 214 | 0.10
|
| float | 10240 | 741 | 0.07
|
| double | 256 | 125 | 0.49
|
| double | 512 | 150 | 0.29
|
| double | 1024 | 214 | 0.21
|
| double | 2048 | 357 | 0.17
|
| double | 10240 | 1381 | 0.13
|
| int32_t | 256 | 104 | 0.41
|
| int32_t | 512 | 116 | 0.23
|
| int32_t | 1024 | 148 | 0.14
|
| uint32_t | 2048 | 219 | 0.11
|
| uint32_t | 10240 | 739 | 0.07
|
| int16_t | 256 | 96 | 0.38
|
| int16_t | 512 | 97 | 0.19
|
| int16_t | 1024 | 116 | 0.11
|
| uint16_t | 2048 | 155 | 0.08
|
| uint16_t | 10240 | 419 | 0.04
|
| int8_t | 256 | 92 | 0.36
|
| int8_t | 512 | 89 | 0.17
|
| int8_t | 1024 | 97 | 0.09
|
| uint8_t | 2048 | 123 | 0.06
|
| uint8_t | 10240 | 244 | 0.02
|
| | | |
|
DSPLIB_sub | float | 256 | 90 | 0.35
|
| float | 512 | 106 | 0.21
|
| float | 1024 | 131 | 0.13
|
| float | 2048 | 195 | 0.10
|
| float | 10240 | 721 | 0.07
|
| double | 256 | 106 | 0.41
|
| double | 512 | 131 | 0.26
|
| double | 1024 | 195 | 0.19
|
| double | 2048 | 337 | 0.16
|
| double | 10240 | 1361 | 0.13
|
| int32_t | 256 | 88 | 0.34
|
| int32_t | 512 | 97 | 0.19
|
| int32_t | 1024 | 129 | 0.13
|
| uint32_t | 2048 | 200 | 0.10
|
| uint32_t | 10240 | 719 | 0.07
|
| int16_t | 256 | 80 | 0.31
|
| int16_t | 512 | 81 | 0.16
|
| int16_t | 1024 | 97 | 0.09
|
| uint16_t | 2048 | 136 | 0.07
|
| uint16_t | 10240 | 399 | 0.04
|
| int8_t | 256 | 76 | 0.30
|
| int8_t | 512 | 73 | 0.14
|
| int8_t | 1024 | 81 | 0.08
|
| uint8_t | 2048 | 104 | 0.05
|
| uint8_t | 10240 | 225 | 0.02
|
| | | |
|
DSPLIB_subConstant | float | 256 | 106 | 0.41
|
| float | 512 | 118 | 0.23
|
| float | 1024 | 150 | 0.15
|
| float | 2048 | 214 | 0.10
|
| float | 10240 | 741 | 0.07
|
| double | 256 | 125 | 0.49
|
| double | 512 | 150 | 0.29
|
| double | 1024 | 214 | 0.21
|
| double | 2048 | 357 | 0.17
|
| double | 10240 | 1381 | 0.13
|
| int32_t | 256 | 104 | 0.41
|
| int32_t | 512 | 116 | 0.23
|
| int32_t | 1024 | 148 | 0.14
|
| uint32_t | 2048 | 219 | 0.11
|
| uint32_t | 10240 | 739 | 0.07
|
| int16_t | 256 | 96 | 0.38
|
| int16_t | 512 | 97 | 0.19
|
| int16_t | 1024 | 116 | 0.11
|
| uint16_t | 2048 | 155 | 0.08
|
| uint16_t | 10240 | 419 | 0.04
|
| int8_t | 256 | 92 | 0.36
|
| int8_t | 512 | 89 | 0.17
|
| int8_t | 1024 | 97 | 0.09
|
| uint8_t | 2048 | 123 | 0.06
|
| uint8_t | 10240 | 244 | 0.02
|
| | | |
|
DSPLIB_mul | float | 256 | 91 | 0.36
|
| float | 512 | 107 | 0.21
|
| float | 1024 | 139 | 0.14
|
| float | 2048 | 203 | 0.10
|
| float | 10240 | 722 | 0.07
|
| double | 256 | 107 | 0.42
|
| double | 512 | 132 | 0.26
|
| double | 1024 | 196 | 0.19
|
| double | 2048 | 338 | 0.17
|
| double | 10240 | 1362 | 0.13
|
| int32_t | 256 | 91 | 0.36
|
| int32_t | 512 | 100 | 0.20
|
| int32_t | 1024 | 132 | 0.13
|
| uint32_t | 2048 | 203 | 0.10
|
| uint32_t | 10240 | 722 | 0.07
|
| int16_t | 256 | 83 | 0.32
|
| int16_t | 512 | 91 | 0.18
|
| int16_t | 1024 | 107 | 0.10
|
| uint16_t | 2048 | 132 | 0.06
|
| uint16_t | 10240 | 402 | 0.04
|
| int8_t | 256 | 79 | 0.31
|
| int8_t | 512 | 76 | 0.15
|
| int8_t | 1024 | 84 | 0.08
|
| uint8_t | 2048 | 107 | 0.05
|
| uint8_t | 10240 | 228 | 0.02
|
| | | |
|
DSPLIB_mulConstant | float | 256 | 107 | 0.42
|
| float | 512 | 119 | 0.23
|
| float | 1024 | 151 | 0.15
|
| float | 2048 | 215 | 0.10
|
| float | 10240 | 742 | 0.07
|
| double | 256 | 126 | 0.49
|
| double | 512 | 151 | 0.29
|
| double | 1024 | 215 | 0.21
|
| double | 2048 | 358 | 0.17
|
| double | 10240 | 1382 | 0.13
|
| int32_t | 256 | 107 | 0.42
|
| int32_t | 512 | 119 | 0.23
|
| int32_t | 1024 | 151 | 0.15
|
| uint32_t | 2048 | 222 | 0.11
|
| uint32_t | 10240 | 742 | 0.07
|
| int16_t | 256 | 99 | 0.39
|
| int16_t | 512 | 100 | 0.20
|
| int16_t | 1024 | 119 | 0.12
|
| uint16_t | 2048 | 158 | 0.08
|
| uint16_t | 10240 | 422 | 0.04
|
| int8_t | 256 | 95 | 0.37
|
| int8_t | 512 | 92 | 0.18
|
| int8_t | 1024 | 100 | 0.10
|
| uint8_t | 2048 | 126 | 0.06
|
| uint8_t | 10240 | 247 | 0.02
|
| | | |
|
DSPLIB_sqr | float | 256 | 81 | 0.32
|
| float | 512 | 104 | 0.20
|
| float | 1024 | 136 | 0.13
|
| float | 2048 | 200 | 0.10
|
| float | 10240 | 719 | 0.07
|
| double | 256 | 104 | 0.41
|
| double | 512 | 129 | 0.25
|
| double | 1024 | 193 | 0.19
|
| double | 2048 | 335 | 0.16
|
| double | 10240 | 1359 | 0.13
|
| int32_t | 256 | 88 | 0.34
|
| int32_t | 512 | 97 | 0.19
|
| int32_t | 1024 | 129 | 0.13
|
| uint32_t | 2048 | 200 | 0.10
|
| uint32_t | 10240 | 719 | 0.07
|
| int16_t | 256 | 80 | 0.31
|
| int16_t | 512 | 81 | 0.16
|
| int16_t | 1024 | 97 | 0.09
|
| uint16_t | 2048 | 136 | 0.07
|
| uint16_t | 10240 | 399 | 0.04
|
| int8_t | 256 | 76 | 0.30
|
| int8_t | 512 | 73 | 0.14
|
| int8_t | 1024 | 81 | 0.08
|
| uint8_t | 2048 | 104 | 0.05
|
| uint8_t | 10240 | 225 | 0.02
|
| | | |
|
DSPLIB_max | float | 256 | 293 | 1.14
|
| float | 512 | 307 | 0.60
|
| float | 1024 | 348 | 0.34
|
| float | 2048 | 386 | 0.19
|
| float | 10240 | 654 | 0.06
|
| double | 256 | 295 | 1.15
|
| double | 512 | 336 | 0.66
|
| double | 1024 | 374 | 0.37
|
| double | 2048 | 444 | 0.22
|
| double | 10240 | 975 | 0.10
|
| int32_t | 256 | 276 | 1.08
|
| int32_t | 512 | 290 | 0.57
|
| int32_t | 1024 | 331 | 0.32
|
| uint32_t | 2048 | 369 | 0.18
|
| uint32_t | 10240 | 637 | 0.06
|
| int16_t | 256 | 223 | 0.87
|
| int16_t | 512 | 233 | 0.46
|
| int16_t | 1024 | 247 | 0.24
|
| uint16_t | 2048 | 288 | 0.14
|
| uint16_t | 10240 | 428 | 0.04
|
| int8_t | 256 | 217 | 0.85
|
| int8_t | 512 | 222 | 0.43
|
| int8_t | 1024 | 232 | 0.23
|
| uint8_t | 2048 | 246 | 0.12
|
| uint8_t | 10240 | 341 | 0.03
|
| | | |
|
DSPLIB_maxIndex | float | 256 | 1855 | 7.25
|
| float | 512 | 1913 | 3.74
|
| float | 1024 | 1901 | 1.86
|
| float | 2048 | 2009 | 0.98
|
| float | 10240 | 2493 | 0.24
|
| int32_t | 256 | 1864 | 7.28
|
| int32_t | 512 | 1878 | 3.67
|
| int32_t | 1024 | 1912 | 1.87
|
| uint32_t | 2048 | 1978 | 0.97
|
| uint32_t | 10240 | 2536 | 0.25
|
| int16_t | 256 | 1934 | 7.55
|
| int16_t | 512 | 1957 | 3.82
|
| int16_t | 1024 | 1975 | 1.93
|
| uint16_t | 2048 | 1990 | 0.97
|
| uint16_t | 10240 | 2248 | 0.22
|
| int8_t | 256 | 2219 | 8.67
|
| int8_t | 512 | 2670 | 5.21
|
| int8_t | 1024 | 3549 | 3.47
|
| uint8_t | 2048 | 5255 | 2.57
|
| uint8_t | 10240 | 19224 | 1.88
|
| | | |
|
DSPLIB_maxEvery | float | 256 | 82 | 0.32
|
| float | 512 | 98 | 0.19
|
| float | 1024 | 130 | 0.13
|
| float | 2048 | 194 | 0.09
|
| float | 10240 | 720 | 0.07
|
| double | 256 | 105 | 0.41
|
| double | 512 | 130 | 0.25
|
| double | 1024 | 194 | 0.19
|
| double | 2048 | 336 | 0.16
|
| double | 10240 | 1360 | 0.13
|
| int32_t | 256 | 88 | 0.34
|
| int32_t | 512 | 97 | 0.19
|
| int32_t | 1024 | 129 | 0.13
|
| uint32_t | 2048 | 200 | 0.10
|
| uint32_t | 10240 | 719 | 0.07
|
| int16_t | 256 | 80 | 0.31
|
| int16_t | 512 | 81 | 0.16
|
| int16_t | 1024 | 97 | 0.09
|
| uint16_t | 2048 | 136 | 0.07
|
| uint16_t | 10240 | 399 | 0.04
|
| int8_t | 256 | 76 | 0.30
|
| int8_t | 512 | 73 | 0.14
|
| int8_t | 1024 | 81 | 0.08
|
| uint8_t | 2048 | 104 | 0.05
|
| uint8_t | 10240 | 225 | 0.02
|
| | | |
|
DSPLIB_min | float | 256 | 293 | 1.14
|
| float | 512 | 307 | 0.60
|
| float | 1024 | 348 | 0.34
|
| float | 2048 | 386 | 0.19
|
| float | 10240 | 654 | 0.06
|
| double | 256 | 295 | 1.15
|
| double | 512 | 336 | 0.66
|
| double | 1024 | 374 | 0.37
|
| double | 2048 | 444 | 0.22
|
| double | 10240 | 975 | 0.10
|
| int32_t | 256 | 276 | 1.08
|
| int32_t | 512 | 290 | 0.57
|
| int32_t | 1024 | 331 | 0.32
|
| uint32_t | 2048 | 369 | 0.18
|
| uint32_t | 10240 | 637 | 0.06
|
| int16_t | 256 | 223 | 0.87
|
| int16_t | 512 | 233 | 0.46
|
| int16_t | 1024 | 247 | 0.24
|
| uint16_t | 2048 | 288 | 0.14
|
| uint16_t | 10240 | 428 | 0.04
|
| int8_t | 256 | 217 | 0.85
|
| int8_t | 512 | 222 | 0.43
|
| int8_t | 1024 | 232 | 0.23
|
| uint8_t | 2048 | 246 | 0.12
|
| uint8_t | 10240 | 341 | 0.03
|
| | | |
|
DSPLIB_minIndex | float | 256 | 1856 | 7.25
|
| float | 512 | 1862 | 3.64
|
| float | 1024 | 1892 | 1.85
|
| float | 2048 | 1958 | 0.96
|
| float | 10240 | 2484 | 0.24
|
| int32_t | 256 | 1863 | 7.28
|
| int32_t | 512 | 1915 | 3.74
|
| int32_t | 1024 | 1911 | 1.87
|
| uint32_t | 2048 | 2020 | 0.99
|
| uint32_t | 10240 | 2544 | 0.25
|
| int16_t | 256 | 1906 | 7.45
|
| int16_t | 512 | 1909 | 3.73
|
| int16_t | 1024 | 1960 | 1.91
|
| uint16_t | 2048 | 1965 | 0.96
|
| uint16_t | 10240 | 2215 | 0.22
|
| int8_t | 256 | 2070 | 8.09
|
| int8_t | 512 | 2523 | 4.93
|
| int8_t | 1024 | 3410 | 3.33
|
| uint8_t | 2048 | 5216 | 2.55
|
| uint8_t | 10240 | 18958 | 1.85
|
| | | |
|
DSPLIB_minEvery | float | 256 | 89 | 0.35
|
| float | 512 | 98 | 0.19
|
| float | 1024 | 130 | 0.13
|
| float | 2048 | 194 | 0.09
|
| float | 10240 | 720 | 0.07
|
| double | 256 | 98 | 0.38
|
| double | 512 | 130 | 0.25
|
| double | 1024 | 194 | 0.19
|
| double | 2048 | 336 | 0.16
|
| double | 10240 | 1360 | 0.13
|
| int32_t | 256 | 88 | 0.34
|
| int32_t | 512 | 97 | 0.19
|
| int32_t | 1024 | 129 | 0.13
|
| uint32_t | 2048 | 200 | 0.10
|
| uint32_t | 10240 | 719 | 0.07
|
| int16_t | 256 | 73 | 0.29
|
| int16_t | 512 | 81 | 0.16
|
| int16_t | 1024 | 97 | 0.09
|
| uint16_t | 2048 | 136 | 0.07
|
| uint16_t | 10240 | 399 | 0.04
|
| int8_t | 256 | 76 | 0.30
|
| int8_t | 512 | 73 | 0.14
|
| int8_t | 1024 | 81 | 0.08
|
| uint8_t | 2048 | 104 | 0.05
|
| uint8_t | 10240 | 225 | 0.02
|
DSPLIB_fir
Datatype | Data Size | Output Size | Filter Size | EVM Cycles | Taps/Cycle
|
float | 2048 | 1025 | 1024 | 41635 | 25.21
|
float | 1151 | 1024 | 128 | 4387 | 29.88
|
float | 1087 | 1024 | 64 | 2339 | 28.02
|
DSPLIB_cascadebiquad
Datatype | Data Size | Num Channels | Num Stages | EVM Cycles | Cycles/Biquad
|
float | 512 | 32 | 3 | 7529 | 0.15
|
float | 128 | 32 | 7 | 4775 | 0.17
|
DSPLIB_w_vec
Data Type | Data Size | Weight Style | EVM Cycles | Cycles/Sample
|
float | 256 | Scalar | 91 | 0.36
|
float | 512 | Scalar | 107 | 0.21
|
float | 1024 | Scalar | 139 | 0.14
|
float | 2048 | Scalar | 203 | 0.1
|
float | 10240 | Scalar | 732 | 0.07
|
int32_t | 256 | Scalar | 89 | 0.35
|
int32_t | 512 | Scalar | 105 | 0.21
|
int32_t | 1024 | Scalar | 137 | 0.13
|
uint32_t | 2048 | Scalar | 201 | 0.1
|
uint32_t | 10240 | Scalar | 730 | 0.07
|
int16_t | 256 | Scalar | 81 | 0.32
|
int16_t | 512 | Scalar | 89 | 0.17
|
int16_t | 1024 | Scalar | 105 | 0.1
|
uint16_t | 2048 | Scalar | 137 | 0.07
|
uint16_t | 10240 | Scalar | 410 | 0.04
|
int8_t | 256 | Scalar | 75 | 0.29
|
int8_t | 512 | Scalar | 81 | 0.16
|
int8_t | 1024 | Scalar | 89 | 0.09
|
uint8_t | 2048 | Scalar | 105 | 0.05
|
uint8_t | 10240 | Scalar | 233 | 0.02
|
double | 256 | Scalar | 107 | 0.42
|
double | 512 | Scalar | 139 | 0.27
|
double | 1024 | Scalar | 203 | 0.2
|
double | 2048 | Scalar | 348 | 0.17
|
double | 10240 | Scalar | 1372 | 0.13
|
float | 256 | Vector | 112 | 0.44
|
float | 512 | Vector | 142 | 0.28
|
float | 1024 | Vector | 214 | 0.21
|
float | 2048 | Vector | 344 | 0.17
|
float | 10240 | Vector | 1377 | 0.13
|
int32_t | 256 | Vector | 109 | 0.43
|
int32_t | 512 | Vector | 140 | 0.27
|
int32_t | 1024 | Vector | 209 | 0.2
|
uint32_t | 2048 | Vector | 364 | 0.18
|
uint32_t | 10240 | Vector | 1375 | 0.13
|
int16_t | 256 | Vector | 90 | 0.35
|
int16_t | 512 | Vector | 109 | 0.21
|
int16_t | 1024 | Vector | 140 | 0.14
|
uint16_t | 2048 | Vector | 236 | 0.12
|
uint16_t | 10240 | Vector | 735 | 0.07
|
int8_t | 256 | Vector | 81 | 0.32
|
int8_t | 512 | Vector | 90 | 0.18
|
int8_t | 1024 | Vector | 109 | 0.11
|
uint8_t | 2048 | Vector | 140 | 0.07
|
uint8_t | 10240 | Vector | 402 | 0.04
|
double | 256 | Vector | 142 | 0.55
|
double | 512 | Vector | 214 | 0.42
|
double | 1024 | Vector | 364 | 0.36
|
double | 2048 | Vector | 614 | 0.3
|
double | 10240 | Vector | 2684 | 0.26 |
DSPLIB_bexp
Datatype | Data Size | EVM Cycles | Cycles/Sample |
uint8_t | 256 | 243 | 0.949 |
uint8_t | 495 | 190 | 0.384 |
uint8_t | 4096 | 231 | 0.056 |
uint8_t | 6912 | 333 | 0.048 |
int8_t | 256 | 202 | 0.789 |
int8_t | 511 | 142 | 0.278 |
int8_t | 4096 | 241 | 0.059 |
int8_t | 6410 | 278 | 0.043 |
uint16_t | 256 | 279 | 1.09 |
uint16_t | 508 | 250 | 0.492 |
uint16_t | 4096 | 427 | 0.104 |
uint16_t | 6530 | 530 | 0.081 |
int16_t | 256 | 246 | 0.961 |
int16_t | 510 | 202 | 0.396 |
int16_t | 4096 | 324 | 0.079 |
int16_t | 8012 | 532 | 0.066 |
uint32_t | 256 | 248 | 0.969 |
uint32_t | 476 | 165 | 0.347 |
uint32_t | 4096 | 486 | 0.119 |
uint32_t | 7250 | 649 | 0.09 |
int32_t | 256 | 249 | 0.973 |
int32_t | 496 | 207 | 0.417 |
int32_t | 4096 | 537 | 0.131 |
int32_t | 7846 | 916 | 0.117 |
int64_t | 256 | 193 | 0.754 |
int64_t | 506 | 230 | 0.455 |
int64_t | 4096 | 870 | 0.212 |
int64_t | 8151 | 1221 | 0.15 |
uint64_t | 256 | 213 | 0.832 |
uint64_t | 449 | 280 | 0.624 |
uint64_t | 4096 | 873 | 0.213 |
uint64_t | 5435 | 730 | 0.134 |
DSPLIB_blk_eswap
Datatype | Data Size | EVM Cycles | Cycles/Sample |
int16_t | 256 | 184 | 0.719 |
int16_t | 1024 | 225 | 0.22 |
int16_t | 8194 | 461 | 0.056 |
int16_t | 131074 | 4309 | 0.033 |
int32_t | 256 | 156 | 0.609 |
int32_t | 1024 | 230 | 0.225 |
int32_t | 8194 | 671 | 0.082 |
int32_t | 65537 | 4275 | 0.065 |
int64_t | 256 | 187 | 0.73 |
int64_t | 1026 | 319 | 0.311 |
int64_t | 8194 | 1237 | 0.151 |
int64_t | 32768 | 4305 | 0.131 |
uint16_t | 256 | 125 | 0.488 |
uint16_t | 1024 | 149 | 0.146 |
uint16_t | 8194 | 435 | 0.053 |
uint16_t | 131074 | 4298 | 0.033 |
uint32_t | 256 | 151 | 0.59 |
uint32_t | 1026 | 223 | 0.217 |
uint32_t | 8193 | 654 | 0.08 |
uint32_t | 65538 | 4265 | 0.065 |
uint64_t | 258 | 215 | 0.833 |
uint64_t | 1024 | 323 | 0.315 |
uint64_t | 8192 | 1198 | 0.146 |
uint64_t | 32768 | 4268 | 0.13 |
float | 256 | 188 | 0.734 |
float | 1025 | 284 | 0.277 |
float | 8192 | 725 | 0.089 |
float | 65538 | 4344 | 0.066 |
double | 256 | 250 | 0.977 |
double | 1025 | 366 | 0.357 |
double | 8192 | 1238 | 0.151 |
double | 32768 | 4311 | 0.132 |
DSPLIB_blk_move
Datatype | Data Size | EVM Cycles | Cycles/Sample |
uint8_t | 256 | 179 | 0.699 |
uint8_t | 2050 | 172 | 0.084 |
uint8_t | 8194 | 276 | 0.034 |
uint8_t | 131073 | 2125 | 0.016 |
int8_t | 256 | 162 | 0.633 |
int8_t | 2048 | 187 | 0.091 |
int8_t | 8194 | 298 | 0.036 |
int8_t | 262146 | 4173 | 0.016 |
uint16_t | 256 | 138 | 0.539 |
uint16_t | 2048 | 222 | 0.108 |
uint16_t | 8193 | 375 | 0.046 |
uint16_t | 65537 | 2125 | 0.032 |
int16_t | 256 | 100 | 0.391 |
int16_t | 2048 | 178 | 0.087 |
int16_t | 8194 | 373 | 0.046 |
int16_t | 131074 | 4173 | 0.032 |
uint32_t | 256 | 148 | 0.578 |
uint32_t | 2049 | 231 | 0.113 |
uint32_t | 8194 | 629 | 0.077 |
uint32_t | 32769 | 2125 | 0.064 |
int32_t | 256 | 153 | 0.598 |
int32_t | 2049 | 271 | 0.132 |
int32_t | 8192 | 667 | 0.081 |
int32_t | 65536 | 4172 | 0.064 |
uint64_t | 258 | 130 | 0.504 |
uint64_t | 2049 | 373 | 0.182 |
uint64_t | 8194 | 1112 | 0.136 |
uint64_t | 16385 | 2125 | 0.127 |
int64_t | 256 | 132 | 0.516 |
int64_t | 2049 | 373 | 0.182 |
int64_t | 8194 | 1112 | 0.136 |
int64_t | 32770 | 4173 | 0.127 |
float | 256 | 147 | 0.574 |
float | 2049 | 275 | 0.134 |
float | 8192 | 671 | 0.082 |
float | 65538 | 4173 | 0.064 |
double | 258 | 171 | 0.663 |
double | 2049 | 417 | 0.204 |
double | 8193 | 1152 | 0.141 |
double | 32770 | 4173 | 0.127 |
DSPLIB_fltoq15
Input Datatype | Output Datatype | Data Size | EVM Cycles | Cycles/Sample |
float | int16_t | 287 | 317 | 1.105 |
float | int16_t | 2048 | 478 | 0.233 |
float | int16_t | 16384 | 1461 | 0.089 |
float | int16_t | 51200 | 1809 | 0.035 |
double | int16_t | 287 | 322 | 1.122 |
double | int16_t | 2048 | 653 | 0.319 |
double | int16_t | 16384 | 2890 | 0.176 |
double | int16_t | 51200 | 6557 | 0.128 |
DSPLIB_q15tofl
Input Datatype | Output Datatype | Data Size | EVM Cycles | Cycles/Sample |
int16_t | float | 256 | 110 | 0.43 |
int16_t | float | 1024 | 165 | 0.161 |
int16_t | float | 8192 | 589 | 0.072 |
int16_t | float | 32768 | 2125 | 0.065 |
int16_t | double | 256 | 144 | 0.563 |
int16_t | double | 1024 | 245 | 0.239 |
int16_t | double | 8192 | 1137 | 0.139 |
int16_t | double | 32768 | 4209 | 0.128 |
DSPLIB_minerror
Datatype | Data Size | EVM Cycles | Cycles/Sample |
int8_t | 256 | 953 | 3.72 |
int16_t | 256 | 1020 | 3.98 |
int32_t | 256 | 1154 | 4.5 |
int64_t | 256 | 1984 | 7.75 |
uint8_t | 256 | 995 | 3.88 |
uint16_t | 256 | 996 | 3.89 |
uint32_t | 256 | 1168 | 4.56 |
uint64_t | 256 | 1992 | 7.78 |
float | 256 | 979 | 3.82 |
double | 256 | 1740 | 6.67 |
DSPLIB_matMul
C = AxB, where C, A, and B are M×N, M×K, and K×N matrices, respectively
Dataype | M | N | K | EVM Cycles | GFLOPs |
float | 128 | 256 | 128 | 145238 | 57.76 |
float | 64 | 512 | 64 | 73108 | 57.37 |
float | 64 | 256 | 64 | 36831 | 56.94 |
|
DSPLIB_matMul_fixed
C = AxB, where C, A, and B are M×N, M×K, and K×N matrices, respectively
Datatype | M | N | K | EVM Cycles | MACs / Cycle |
int8_t | 16 | 16 | 16 | 322 | 12.72 |
int8_t | 32 | 32 | 32 | 845 | 38.78 |
int8_t | 64 | 64 | 64 | 5388 | 48.65 |
int8_t | 128 | 128 | 128 | 38593 | 54.34 |
int8_t | 256 | 256 | 256 | 295551 | 56.77 |
int16_t | 16 | 16 | 16 | 389 | 10.53 |
int16_t | 32 | 32 | 32 | 973 | 33.68 |
int16_t | 64 | 64 | 64 | 5740 | 45.67 |
int16_t | 128 | 128 | 128 | 39143 | 53.58 |
int16_t | 256 | 256 | 256 | 288071 | 58.24 |
DSPLIB_matTrans
Datatype | M | N | EVM Cycles | Cycles/Sample |
int8_t | 16 | 512 | 938 | 0.115 |
int8_t | 128 | 128 | 690 | 0.042 |
int8_t | 512 | 512 | 8384 | 0.032 |
int8_t | 1000 | 256 | 8480 | 0.033 |
int16_t | 16 | 512 | 876 | 0.107 |
int16_t | 128 | 128 | 626 | 0.038 |
int16_t | 800 | 64 | 1812 | 0.035 |
int16_t | 1000 | 128 | 4357 | 0.034 |
int32_t | 16 | 512 | 1440 | 0.176 |
int32_t | 128 | 128 | 1409 | 0.086 |
int32_t | 256 | 256 | 4843 | 0.074 |
int32_t | 800 | 64 | 3457 | 0.068 |
int64_t | 16 | 512 | 1218 | 0.149 |
int64_t | 128 | 128 | 2285 | 0.139 |
int64_t | 224 | 224 | 6766 | 0.135 |
int64_t | 800 | 64 | 6839 | 0.134 |
uint8_t | 16 | 512 | 932 | 0.114 |
uint8_t | 128 | 128 | 685 | 0.042 |
uint8_t | 512 | 512 | 8382 | 0.032 |
uint8_t | 1000 | 256 | 8481 | 0.033 |
uint16_t | 16 | 512 | 870 | 0.106 |
uint16_t | 128 | 128 | 673 | 0.041 |
uint16_t | 256 | 256 | 2704 | 0.041 |
uint16_t | 1000 | 128 | 4445 | 0.035 |
uint32_t | 16 | 512 | 1444 | 0.176 |
uint32_t | 128 | 128 | 1414 | 0.086 |
uint32_t | 256 | 256 | 4795 | 0.073 |
uint32_t | 800 | 64 | 3414 | 0.067 |
uint64_t | 16 | 512 | 1219 | 0.149 |
uint64_t | 128 | 128 | 2285 | 0.139 |
uint64_t | 224 | 224 | 6766 | 0.135 |
uint64_t | 800 | 64 | 6839 | 0.134 |
float | 16 | 512 | 1385 | 0.169 |
float | 128 | 128 | 1423 | 0.087 |
float | 256 | 256 | 4860 | 0.074 |
float | 800 | 64 | 3474 | 0.068 |
double | 16 | 512 | 1214 | 0.148 |
double | 128 | 128 | 2288 | 0.14 |
double | 224 | 224 | 6709 | 0.134 |
double | 800 | 64 | 6743 | 0.132 |
DSPLIB_mat_submat_copy
If dir = 0, copies from X(M,N) to Y(nRows,nCols) starting from the index (stRow,stCol). If dir = 1, copies from Y(nRows,nCols) to X(M,N) into the index (stRow,stCol).
Datatype | M | N | nRows | nCols | stRow | stCol | dir | EVM Cycles | Cycles/Sample |
int8_t | 64 | 64 | 64 | 64 | 0 | 0 | 0 | 144 | 2.25 |
int8_t | 128 | 128 | 82 | 112 | 4 | 5 | 1 | 281 | 1.71 |
int8_t | 256 | 256 | 128 | 196 | 5 | 4 | 0 | 654 | 1.28 |
int8_t | 512 | 512 | 512 | 512 | 0 | 0 | 0 | 4668 | 1.14 |
int16_t | 32 | 32 | 32 | 32 | 0 | 0 | 0 | 114 | 3.56 |
int16_t | 64 | 64 | 24 | 48 | 2 | 3 | 0 | 140 | 2.92 |
int16_t | 128 | 128 | 112 | 82 | 5 | 4 | 1 | 469 | 1.4 |
int16_t | 256 | 256 | 256 | 256 | 0 | 0 | 0 | 2348 | 1.15 |
int32_t | 32 | 32 | 32 | 32 | 0 | 0 | 0 | 148 | 2.31 |
int32_t | 64 | 64 | 48 | 24 | 3 | 2 | 1 | 240 | 2.5 |
int32_t | 128 | 128 | 128 | 128 | 0 | 0 | 0 | 1123 | 1.1 |
int32_t | 256 | 256 | 196 | 128 | 4 | 5 | 0 | 1805 | 1.15 |
int64_t | 16 | 16 | 16 | 16 | 0 | 0 | 0 | 116 | 3.63 |
int64_t | 32 | 32 | 15 | 24 | 1 | 1 | 1 | 168 | 3.73 |
int64_t | 64 | 64 | 24 | 48 | 2 | 3 | 0 | 230 | 1.6 |
int64_t | 128 | 128 | 128 | 128 | 0 | 0 | 0 | 2610 | 1.27 |
uint8_t | 64 | 64 | 64 | 64 | 0 | 0 | 0 | 144 | 2.25 |
uint8_t | 128 | 128 | 82 | 112 | 4 | 5 | 1 | 280 | 1.71 |
uint8_t | 256 | 256 | 128 | 196 | 5 | 4 | 0 | 654 | 1.28 |
uint8_t | 512 | 512 | 512 | 512 | 0 | 0 | 0 | 4668 | 1.14 |
uint16_t | 32 | 32 | 32 | 32 | 0 | 0 | 0 | 114 | 3.56 |
uint16_t | 64 | 64 | 24 | 48 | 2 | 3 | 0 | 140 | 2.92 |
uint16_t | 128 | 128 | 112 | 82 | 5 | 4 | 1 | 466 | 1.39 |
uint16_t | 256 | 256 | 256 | 256 | 0 | 0 | 0 | 2348 | 1.15 |
uint32_t | 32 | 32 | 32 | 32 | 0 | 0 | 0 | 148 | 2.31 |
uint32_t | 64 | 64 | 48 | 24 | 3 | 2 | 1 | 243 | 2.53 |
uint32_t | 128 | 128 | 128 | 128 | 0 | 0 | 0 | 1128 | 1.1 |
uint32_t | 256 | 256 | 196 | 128 | 4 | 5 | 0 | 1823 | 1.16 |
uint64_t | 16 | 16 | 16 | 16 | 0 | 0 | 0 | 116 | 3.63 |
uint64_t | 32 | 32 | 15 | 24 | 1 | 1 | 1 | 168 | 3.73 |
uint64_t | 64 | 64 | 24 | 48 | 2 | 3 | 0 | 230 | 1.6 |
uint64_t | 128 | 128 | 128 | 128 | 0 | 0 | 0 | 2821 | 1.38 |
float | 32 | 32 | 32 | 32 | 0 | 0 | 0 | 148 | 2.31 |
float | 64 | 64 | 48 | 24 | 3 | 2 | 1 | 240 | 2.5 |
float | 128 | 128 | 82 | 112 | 4 | 5 | 0 | 674 | 1.17 |
float | 256 | 256 | 256 | 256 | 0 | 0 | 0 | 5748 | 1.4 |
double | 16 | 16 | 16 | 16 | 0 | 0 | 0 | 116 | 3.63 |
double | 32 | 32 | 24 | 15 | 1 | 1 | 0 | 142 | 2.96 |
double | 64 | 64 | 48 | 24 | 3 | 2 | 1 | 276 | 1.92 |
double | 128 | 128 | 128 | 128 | 0 | 0 | 0 | 2821 | 1.38 |
DSPLIB_cholesky
Datatype | M | N | enableTest | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 1 | 2553 | 9.97 |
float | 32 | 32 | 1 | 5561 | 5.43 |
float | 64 | 64 | 1 | 16461 | 4.02 |
float | 80 | 80 | 1 | 24129 | 3.77 |
float | 96 | 96 | 1 | 32478 | 3.52 |
float | 128 | 128 | 1 | 55524 | 3.39 |
float | 16 | 16 | 0 | 2312 | 9.03 |
float | 32 | 32 | 0 | 5177 | 5.06 |
float | 64 | 64 | 0 | 15405 | 3.76 |
float | 80 | 80 | 0 | 22351 | 3.49 |
float | 96 | 96 | 0 | 30267 | 3.28 |
float | 128 | 128 | 0 | 52978 | 3.23 |
DSPLIB_cholesky_inplace
Datatype | M | N | enableTest | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 1 | 2622 | 10.24 |
float | 32 | 32 | 1 | 5390 | 5.26 |
float | 64 | 64 | 1 | 16316 | 3.98 |
float | 80 | 80 | 1 | 24138 | 3.77 |
float | 96 | 96 | 1 | 32933 | 3.57 |
float | 128 | 128 | 1 | 56674 | 3.46 |
float | 16 | 16 | 0 | 2450 | 9.57 |
float | 32 | 32 | 0 | 5190 | 5.07 |
float | 64 | 64 | 0 | 15983 | 3.9 |
float | 80 | 80 | 0 | 23615 | 3.69 |
float | 96 | 96 | 0 | 32343 | 3.51 |
float | 128 | 128 | 0 | 55851 | 3.41 |
DSPLIB_cholesky_solver
Datatype | M | N | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 1019 | 3.98 |
float | 32 | 32 | 1906 | 1.86 |
float | 64 | 64 | 3971 | 0.97 |
float | 80 | 80 | 5231 | 0.82 |
float | 96 | 96 | 6303 | 0.68 |
float | 128 | 128 | 8913 | 0.54 |
DSPLIB_svd
Datatype | M | N | reducedForm | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 0 | 142972 | 558.48 |
float | 32 | 32 | 0 | 373077 | 364.33 |
float | 64 | 64 | 0 | 1095520 | 267.46 |
float | 128 | 128 | 0 | 3816672 | 232.95 |
float | 64 | 128 | 0 | 1499955 | 183.1 |
float | 128 | 64 | 0 | 1536130 | 187.52 |
float | 16 | 16 | 1 | 141115 | 551.23 |
float | 32 | 32 | 1 | 377786 | 368.93 |
float | 64 | 64 | 1 | 1090114 | 266.14 |
float | 128 | 128 | 1 | 3928235 | 239.76 |
float | 64 | 128 | 1 | 1468546 | 179.27 |
float | 128 | 64 | 1 | 1473453 | 179.86 |
DSPLIB_qrd
Datatype | M | N | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 13209 | 51.6 |
float | 32 | 32 | 30946 | 30.22 |
float | 64 | 64 | 124650 | 30.43 |
float | 80 | 80 | 158725 | 24.8 |
float | 96 | 96 | 222337 | 24.13 |
float | 128 | 128 | 416886 | 25.44 |
DSPLIB_qrd_solver
Datatype | M | N | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 950 | 3.71 |
float | 32 | 32 | 1613 | 1.58 |
float | 64 | 64 | 3251 | 0.79 |
float | 80 | 80 | 4310 | 0.67 |
float | 96 | 96 | 5211 | 0.57 |
float | 128 | 128 | 7386 | 0.45 |
DSPLIB_qrd_inverse
DSPLIB_lud
Datatype | M | N | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 10386 | 40.57 |
float | 32 | 32 | 22598 | 22.07 |
float | 64 | 64 | 57235 | 13.97 |
float | 80 | 80 | 82056 | 12.82 |
float | 96 | 96 | 110141 | 11.95 |
float | 128 | 128 | 179610 | 10.96 |
DSPLIB_lud_solver
Datatype | M | N | EVM Warm | Cycles/Sample |
float | 16 | 16 | 1660 | 6.48 |
float | 32 | 32 | 2907 | 2.84 |
float | 64 | 64 | 5830 | 1.42 |
float | 80 | 80 | 7774 | 1.21 |
float | 96 | 96 | 9291 | 1.01 |
float | 128 | 128 | 13118 | 0.8 |
DSPLib_lud_inverse
Datatype | M | N | EVM Cycles | Cycles/Sample |
float | 16 | 16 | 21867 | 85.42 |
float | 32 | 32 | 57645 | 56.29 |
float | 64 | 64 | 230389 | 56.25 |
float | 80 | 80 | 410418 | 64.13 |
float | 96 | 96 | 641253 | 69.58 |
float | 128 | 128 | 1332306 | 81.32 |
DSPLIB_dotp_sqr
Data Type | Data Size | EVM Cycles | Cycles/Sample
|
float | 256 | 141 | 0.55
|
float | 512 | 157 | 0.31
|
float | 1024 | 189 | 0.18
|
float | 2048 | 253 | 0.12
|
float | 32768 | 2187 | 0.07
|
double | 256 | 183 | 0.71
|
double | 512 | 247 | 0.48
|
double | 1024 | 375 | 0.37
|
double | 2048 | 631 | 0.31
|
double | 16384 | 4229 | 0.26
|
int32_t | 256 | 106 | 0.41
|
int32_t | 512 | 138 | 0.27
|
int32_t | 1024 | 202 | 0.2
|
int32_t | 2048 | 330 | 0.16
|
int32_t | 32768 | 4184 | 0.13
|
uint32_t | 256 | 106 | 0.41
|
uint32_t | 512 | 138 | 0.27
|
uint32_t | 1024 | 202 | 0.2
|
uint32_t | 2048 | 330 | 0.16
|
uint32_t | 32768 | 4184 | 0.13
|
int16_t | 256 | 81 | 0.32
|
int16_t | 512 | 89 | 0.17
|
int16_t | 1024 | 105 | 0.1
|
int16_t | 2048 | 137 | 0.07
|
int16_t | 32768 | 1111 | 0.03
|
uint16_t | 256 | 81 | 0.32
|
uint16_t | 512 | 89 | 0.17
|
uint16_t | 1024 | 105 | 0.1
|
uint16_t | 2048 | 137 | 0.07
|
uint16_t | 32768 | 1111 | 0.03
|
int8_t | 256 | 80 | 0.31
|
int8_t | 512 | 88 | 0.17
|
int8_t | 1024 | 104 | 0.1
|
int8_t | 2048 | 136 | 0.07
|
int8_t | 32768 | 1110 | 0.03
|
uint8_t | 256 | 77 | 0.3
|
uint8_t | 512 | 81 | 0.16
|
uint8_t | 1024 | 89 | 0.09
|
uint8_t | 2048 | 105 | 0.05
|
uint8_t | 32768 | 599 | 0.02 |
DSPLIB_dotprod
Data Type | Data Size | EVM Cycles | Cycles/Sample
|
float | 256 | 138 | 0.54
|
float | 512 | 154 | 0.3
|
float | 1024 | 200 | 0.2
|
float | 2048 | 250 | 0.12
|
float | 32768 | 2184 | 0.07
|
double | 256 | 147 | 0.57
|
double | 512 | 179 | 0.35
|
double | 1024 | 243 | 0.24
|
double | 2048 | 371 | 0.18
|
double | 32768 | 2177 | 0.07
|
int32_t | 256 | 89 | 0.35
|
int32_t | 512 | 105 | 0.21
|
int32_t | 1024 | 137 | 0.13
|
int32_t | 2048 | 201 | 0.1
|
int32_t | 32768 | 2135 | 0.07
|
uint32_t | 256 | 89 | 0.35
|
uint32_t | 512 | 105 | 0.21
|
uint32_t | 1024 | 137 | 0.13
|
uint32_t | 2048 | 201 | 0.1
|
uint32_t | 32768 | 2135 | 0.07
|
int16_t | 256 | 80 | 0.31
|
int16_t | 512 | 88 | 0.17
|
int16_t | 1024 | 104 | 0.1
|
int16_t | 2048 | 136 | 0.07
|
int16_t | 32768 | 1110 | 0.03
|
uint16_t | 256 | 80 | 0.31
|
uint16_t | 512 | 88 | 0.17
|
uint16_t | 1024 | 104 | 0.1
|
uint16_t | 2048 | 136 | 0.07
|
uint16_t | 32768 | 1110 | 0.03
|
int8_t | 256 | 79 | 0.31
|
int8_t | 512 | 87 | 0.17
|
int8_t | 1024 | 103 | 0.1
|
int8_t | 2048 | 135 | 0.07
|
int8_t | 32768 | 1109 | 0.03
|
uint8_t | 256 | 76 | 0.3
|
uint8_t | 512 | 80 | 0.16
|
uint8_t | 1024 | 88 | 0.09
|
uint8_t | 2048 | 104 | 0.05
|
uint8_t | 32768 | 598 | 0.02 |
DSPLIB_negate
Data Type | Data Size | EVM Cycles | Cycles/Sample
|
int32_t | 1024 | 125 | 0.12
|
int32_t | 2048 | 197 | 0.1
|
int32_t | 10240 | 716 | 0.07
|
int8_t | 1024 | 77 | 0.07
|
int8_t | 2048 | 93 | 0.04
|
int8_t | 10240 | 221 | 0.01
|
int16_t | 1024 | 93 | 0.08
|
int16_t | 2048 | 125 | 0.05
|
int16_t | 10240 | 396 | 0.01 |
DSPLIB_recip
Data Type | Data Size | Store Style | EVM Cycles | Cycles/Sample
|
float | 256 | Linear | 102 | 0.4
|
float | 512 | Linear | 134 | 0.26
|
float | 1024 | Linear | 198 | 0.19
|
float | 2048 | Linear | 326 | 0.16
|
float | 10240 | Linear | 1368 | 0.13
|
double | 256 | Linear | 144 | 0.56
|
double | 512 | Linear | 201 | 0.39
|
double | 1024 | Linear | 329 | 0.32
|
double | 2048 | Linear | 600 | 0.29
|
double | 10240 | Linear | 2648 | 0.26
|
int16_t | 256 | Horizontal Stack | 269 | 1.05
|
int16_t | 512 | Horizontal Stack | 422 | 0.82
|
int16_t | 1024 | Horizontal Stack | 742 | 0.72
|
int16_t | 2048 | Horizontal Stack | 1382 | 0.67
|
int16_t | 10240 | Horizontal Stack | 6515 | 0.64
|
int16_t | 256 | Interleaved | 265 | 1.04
|
int16_t | 512 | Interleaved | 425 | 0.83
|
int16_t | 1024 | Interleaved | 745 | 0.73
|
int16_t | 2048 | Interleaved | 1385 | 0.68
|
int16_t | 10240 | Interleaved | 6523 | 0.64 |
DSPLIB_sqrAdd
Data Type | Data Size | EVM Cycles | Cycles/Sample
|
float | 256 | 132 | 0.52
|
float | 512 | 140 | 0.27
|
float | 1024 | 157 | 0.15
|
float | 2048 | 188 | 0.09
|
float | 32768 | 1162 | 0.04
|
double | 256 | 149 | 0.58
|
double | 512 | 181 | 0.35
|
double | 1024 | 245 | 0.24
|
double | 2048 | 373 | 0.18
|
double | 32768 | 4227 | 0.13
|
int32_t | 256 | 87 | 0.34
|
int32_t | 512 | 103 | 0.2
|
int32_t | 1024 | 135 | 0.13
|
int32_t | 2048 | 199 | 0.1
|
int32_t | 32768 | 2133 | 0.07
|
uint32_t | 256 | 87 | 0.34
|
uint32_t | 512 | 103 | 0.2
|
uint32_t | 1024 | 135 | 0.13
|
uint32_t | 2048 | 199 | 0.1
|
uint32_t | 32768 | 2133 | 0.07
|
int16_t | 256 | 75 | 0.29
|
int16_t | 512 | 79 | 0.15
|
int16_t | 1024 | 87 | 0.08
|
int16_t | 2048 | 103 | 0.05
|
int16_t | 32768 | 597 | 0.02
|
uint16_t | 256 | 75 | 0.29
|
uint16_t | 512 | 79 | 0.15
|
uint16_t | 1024 | 87 | 0.08
|
uint16_t | 2048 | 103 | 0.05
|
uint16_t | 32768 | 597 | 0.02
|
int8_t | 256 | 76 | 0.3
|
int8_t | 512 | 82 | 0.16
|
int8_t | 1024 | 89 | 0.09
|
int8_t | 2048 | 103 | 0.05
|
int8_t | 32768 | 597 | 0.02
|
uint8_t | 256 | 72 | 0.28
|
uint8_t | 512 | 75 | 0.15
|
uint8_t | 1024 | 79 | 0.08
|
uint8_t | 2048 | 87 | 0.04
|
uint8_t | 32768 | 341 | 0.01 |