FFTLIB User Guide
c7120/FFTLIB_configurations.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  **+--------------------------------------------------------------------------+**
3  **| **** |**
4  **| **** |**
5  **| ******o*** |**
6  **| ********_///_**** |**
7  **| ***** /_//_/ **** |**
8  **| ** ** (__/ **** |**
9  **| ********* |**
10  **| **** |**
11  **| *** |**
12  **| |**
13  **| Copyright (c) 2007-2012 Texas Instruments Incorporated |**
14  **| ALL RIGHTS RESERVED |**
15  **| |**
16  **| Permission to use, copy, modify, or distribute this software, |**
17  **| whether in part or in whole, for any purpose is forbidden without |**
18  **| a signed licensing agreement and NDA from Texas Instruments |**
19  **| Incorporated (TI). |**
20  **| |**
21  **| TI makes no representation or warranties with respect to the |**
22  **| performance of this computer program, and specifically disclaims |**
23  **| any responsibility for any damages, special or consequential, |**
24  **| connected with the use of this program. |**
25  **| |**
26  **+--------------------------------------------------------------------------+**
27  *******************************************************************************/
28 
29 #include "../FFTLIB_types.h"
30 
31 /* -------------------------------------------------------------------------- */
32 /* MISRAC Rule 4.9(DEFINE.FUNC) Deviation: The advisory is not being */
33 /* addressed so as not to lose portability across different platforms. */
34 /* -------------------------------------------------------------------------- */
35 #ifdef WIN32
36 #define ASSIGN(param, value) value
37 #else
38 #define ASSIGN(param, value) .param = value
39 #endif
40 
41 const __HWA_CONFIG_REG_v1 configRegisterStruct_i32s_i32s_o32s =
42 {
43  /* -------------------------------------------------------------------- */
44  /* MISRAC Rule 10.3(ETYPE.ASSIGN.2012) Deviation: The data types of */
45  /* fields and the enum values are set by compiler according to the */
46  /* hardware specification, and are used as is. */
47  /* -------------------------------------------------------------------- */
48  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_INT32),
49  ASSIGN(A_RSVD1 , 0),
50  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
51  ASSIGN(A_RSVD2 , 0),
52  // begin new
53  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
54  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
55  ASSIGN(A_RSVD3 , 0),
56  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
57  ASSIGN(A_RSVD4 , 0),
58  // end new
59  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_32_BIT), // 32 bits
60  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_32_BIT), // 8 bits
61  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE32), // 2 bits
62  // begin new
63  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
64  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
65  // end new
66  ASSIGN(B_RSVD1 , 0),
67  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
68  ASSIGN(B_RSVD2 , 0),
69  ASSIGN(B_BSTART , 0), // 1 bits
70  // begin new
71  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
72  // end new
73  ASSIGN(B_RSVD3 , 0),
74  ASSIGN(B_BOFFSET , 0), // 8 bits
75  ASSIGN(B_RSVD4 , 0),
76 
77  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_SA),
78  // begin new
79  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
80  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
81  // end new
82  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT32),
83  ASSIGN(C_RSVD2 , 0),
84  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
85  // begin new 2
86  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
87  // end new 2
88  ASSIGN(C_RSVD3 , 0),
89  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
90  // begin new 2
91  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
92  // end new 2
93  // begin new
94  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
95  // end new
96  ASSIGN(C_RSVD4 , 0),
97  // begin new
98  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
99  ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
100  // end new
101  ASSIGN(C_RSVD5 , 0),
102  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT32),
103  ASSIGN(C_RSVD6 , 0),
104 
105  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
106  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
107  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
108  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
109  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
110  ASSIGN(C_RSVD7 , 0),
111  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
112  ASSIGN(C_RSVD8 , 0),
113  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
114  ASSIGN(C_RSVD9 , 0),
115  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
116  ASSIGN(C_RSVD10 , 0),
117  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
118  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
119  ASSIGN(C_OP1PER , 0), // Operation 1 period
120  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_32_BIT), // Operation 0 period
121  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_32_BIT), // B bank switch period
122  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
123  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
124  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_32_BIT), // C read row offset reset period
125  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_32_BIT), // C write row offset reset period for computations
126 
127  // begin new
128  //ASSIGN(X_ReLU , 0x0), // Enable Rectified Linear Units non-linearity after optional saturation
129  //ASSIGN(X_RSVD1 , 0),
130  //ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
131  //ASSIGN(X_RSVD2 , 0),
132  //ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
133  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
134  // begin new 2
135  ASSIGN(X_PSAT, 0),
136  // end new 2
137  ASSIGN(X_SAT_MIN_5_0, 0),
138  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
139  ASSIGN(X_SAT_MIN_12_6, 0),
140  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
141  ASSIGN(X_SAT_MIN_15_13, 0),
142  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
143  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
144  // end new
145  ASSIGN(X_RSVD3 , 0),
146  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE field
147  // begin new
148  //ASSIGN(X_RSVD4 , 0),
149  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE), /* 4-bit packing control */
150  // end new
151  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_INT32), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
152  // begin new
153  //ASSIGN(X_RSVD5 , 0),
154  ASSIGN(X_SAT_MAX_3_0, 0),
155  // end new
156  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT128), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
157  // begin new
158  // ASSIGN(X_RSVD6 , 0),
159  ASSIGN(X_SAT_MAX_8_4, 0),
160  // end new
161  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_32_BIT), // C read bank switch period
162  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_32_BIT), // C read row offset reset period
163  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
164  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
165  // begin new
166  //ASSIGN(X_RSVD7 , 0x0), // Reserved
167  ASSIGN(X_SAT_MAX_15_9, 0),
168  // end new
169 
170  ASSIGN(RSVD , 0),
171  ASSIGN(PARITYCTRL , __MMA_NORMAL)
172 };
173 
174 /*********************************
175  * Typical 16-bit configurations *
176  *********************************/
177 
178 const __HWA_CONFIG_REG_v1 configRegisterStruct_i16s_i16s_o16s =
179 {
180  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_INT16),
181  ASSIGN(A_RSVD1 , 0),
182  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
183  ASSIGN(A_RSVD2 , 0),
184  // begin new
185  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
186  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
187  ASSIGN(A_RSVD3 , 0),
188  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
189  ASSIGN(A_RSVD4 , 0),
190  // end new
191  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // 32 bits
192  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_16_BIT), // 8 bits
193  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE16), // 2 bits
194  // begin new
195  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
196  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
197  // end new
198  ASSIGN(B_RSVD1 , 0),
199  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
200  ASSIGN(B_RSVD2 , 0),
201  ASSIGN(B_BSTART , 0), // 1 bits
202  // begin new
203  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
204  // end new
205  ASSIGN(B_RSVD3 , 0),
206  ASSIGN(B_BOFFSET , 0), // 8 bits
207  ASSIGN(B_RSVD4 , 0),
208 
209  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_SA),
210  // begin new
211  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
212  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
213  // end new
214 
215  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT16),
216  ASSIGN(C_RSVD2 , 0),
217  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
218  // begin new 2
219  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
220  // end new 2
221  ASSIGN(C_RSVD3 , 0),
222  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
223  // begin new 2
224  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
225  // end new 2
226  // begin new
227  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
228  // end new
229  ASSIGN(C_RSVD4 , 0),
230  // begin new
231  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
232  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
233  // end new
234  ASSIGN(C_RSVD5 , 0),
235  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT16),
236  ASSIGN(C_RSVD6 , 0),
237 
238  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
239  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
240  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
241  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
242  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
243  ASSIGN(C_RSVD7 , 0),
244  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
245  ASSIGN(C_RSVD8 , 0),
246  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
247  ASSIGN(C_RSVD9 , 0),
248  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
249  ASSIGN(C_RSVD10 , 0),
250  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
251  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
252  ASSIGN(C_OP1PER , 0), // Operation 1 period
253  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_16_BIT), // Operation 0 period
254  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // B bank switch period
255  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
256  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
257  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
258  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C write row offset reset period for computations
259 
260  // begin new
261  // ASSIGN(X_ReLU , 0x0), // Enable Rectified Linear Units non-linearity after optional saturation
262  // ASSIGN(X_RSVD1 , 0),
263  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
264  // ASSIGN(X_RSVD2 , 0),
265  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
266  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
267  // begin new 2
268  ASSIGN(X_PSAT, 0),
269  // end new 2
270  ASSIGN(X_SAT_MIN_5_0, 0),
271  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
272  ASSIGN(X_SAT_MIN_12_6, 0),
273  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
274  ASSIGN(X_SAT_MIN_15_13, 0),
275  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
276  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
277  // end new
278  ASSIGN(X_RSVD3 , 0),
279  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
280  // begin new
281  //ASSIGN(X_RSVD4 , 0),
282  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
283  // end new
284  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_INT16), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
285  // begin new
286  //ASSIGN(X_RSVD5 , 0),
287  ASSIGN(X_SAT_MAX_3_0, 0),
288  // end new
289  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT64), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
290  // begin new
291  // ASSIGN(X_RSVD6 , 0),
292  ASSIGN(X_SAT_MAX_8_4, 0),
293  // end new
294  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_16_BIT), // C read bank switch period
295  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
296  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
297  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
298  // begin new
299  //ASSIGN(X_RSVD7 , 0x0), // Reserved
300  ASSIGN(X_SAT_MAX_15_9, 0),
301  // end new
302 
303  ASSIGN(RSVD , 0),
304  ASSIGN(PARITYCTRL , __MMA_NORMAL)
305 };
306 
307 
308 const __HWA_CONFIG_REG_v1 configRegisterStruct_i16s_i16s_o16u =
309 {
310  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_INT16),
311  ASSIGN(A_RSVD1 , 0),
312  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
313  ASSIGN(A_RSVD2 , 0),
314  // begin new
315  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
316  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
317  ASSIGN(A_RSVD3 , 0),
318  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
319  ASSIGN(A_RSVD4 , 0),
320  // end new
321  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // 32 bits
322  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_16_BIT), // 8 bits
323  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE16), // 2 bits
324  // begin new
325  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
326  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
327  // end new
328  ASSIGN(B_RSVD1 , 0),
329  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
330  ASSIGN(B_RSVD2 , 0),
331  ASSIGN(B_BSTART , 0), // 1 bits
332  // begin new
333  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
334  // end new
335  ASSIGN(B_RSVD3 , 0),
336  ASSIGN(B_BOFFSET , 0), // 8 bits
337  ASSIGN(B_RSVD4 , 0),
338 
339  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_SA),
340  // begin new
341  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
342  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
343  // end new
344 
345  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT16),
346  ASSIGN(C_RSVD2 , 0),
347  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
348  // begin new 2
349  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
350  // end new 2
351  ASSIGN(C_RSVD3 , 0),
352  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
353  // begin new 2
354  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
355  // end new 2
356  // begin new
357  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
358  // end new
359  ASSIGN(C_RSVD4 , 0),
360  // begin new
361  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
362  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
363  // end new
364  ASSIGN(C_RSVD5 , 0),
365  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT16),
366  ASSIGN(C_RSVD6 , 0),
367 
368  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
369  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
370  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
371  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
372  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
373  ASSIGN(C_RSVD7 , 0),
374  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
375  ASSIGN(C_RSVD8 , 0),
376  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
377  ASSIGN(C_RSVD9 , 0),
378  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
379  ASSIGN(C_RSVD10 , 0),
380  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
381  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
382  ASSIGN(C_OP1PER , 0), // Operation 1 period
383  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_16_BIT), // Operation 0 period
384  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // B bank switch period
385  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
386  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
387  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
388  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C write row offset reset period for computations
389 
390  // begin new
391  // ASSIGN(X_ReLU , 0x1), // Enable Rectified Linear Units non-linearity after optional saturation
392  // ASSIGN(X_RSVD1 , 0),
393  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
394  // ASSIGN(X_RSVD2 , 0),
395  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
396  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
397  // begin new 2
398  ASSIGN(X_PSAT, 0),
399  // end new 2
400  ASSIGN(X_SAT_MIN_5_0, 0),
401  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
402  ASSIGN(X_SAT_MIN_12_6, 0),
403  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
404  ASSIGN(X_SAT_MIN_15_13, 0),
405  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
406  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
407  // end new
408  ASSIGN(X_RSVD3 , 0),
409  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
410  // begin new
411  //ASSIGN(X_RSVD4 , 0),
412  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
413  // end new
414  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_UINT16), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
415  // begin new
416  //ASSIGN(X_RSVD5 , 0),
417  ASSIGN(X_SAT_MAX_3_0, 0),
418  // end new
419  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT64), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
420  // begin new
421  // ASSIGN(X_RSVD6 , 0),
422  ASSIGN(X_SAT_MAX_8_4, 0),
423  // end new
424  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_16_BIT), // C read bank switch period
425  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
426  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
427  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
428  // begin new
429  //ASSIGN(X_RSVD7 , 0x0), // Reserved
430  ASSIGN(X_SAT_MAX_15_9, 0),
431  // end new
432 
433  ASSIGN(RSVD , 0),
434  ASSIGN(PARITYCTRL , __MMA_NORMAL)
435 };
436 
437 const __HWA_CONFIG_REG_v1 configRegisterStruct_i16u_i16s_o16s =
438 {
439  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_UINT16),
440  ASSIGN(A_RSVD1 , 0),
441  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
442  ASSIGN(A_RSVD2 , 0),
443  // begin new
444  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
445  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
446  ASSIGN(A_RSVD3 , 0),
447  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
448  ASSIGN(A_RSVD4 , 0),
449  // end new
450  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // 32 bits
451  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_16_BIT), // 8 bits
452  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE16), // 2 bits
453  // begin new
454  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
455  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
456  // end new
457  ASSIGN(B_RSVD1 , 0),
458  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
459  ASSIGN(B_RSVD2 , 0),
460  ASSIGN(B_BSTART , 0), // 1 bits
461  // begin new
462  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
463  // end new
464  ASSIGN(B_RSVD3 , 0),
465  ASSIGN(B_BOFFSET , 0), // 8 bits
466  ASSIGN(B_RSVD4 , 0),
467 
468  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_UA),
469  // begin new
470  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
471  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
472  // end new
473  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT16),
474  ASSIGN(C_RSVD2 , 0),
475  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
476  // begin new 2
477  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
478  // end new 2
479  ASSIGN(C_RSVD3 , 0),
480  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
481  // begin new 2
482  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
483  // end new 2
484  // begin new
485  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
486  // end new
487  ASSIGN(C_RSVD4 , 0),
488  // begin new
489  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
490  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
491  // end new
492  ASSIGN(C_RSVD5 , 0),
493  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT16),
494  ASSIGN(C_RSVD6 , 0),
495 
496  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
497  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
498  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
499  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
500  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
501  ASSIGN(C_RSVD7 , 0),
502  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
503  ASSIGN(C_RSVD8 , 0),
504  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
505  ASSIGN(C_RSVD9 , 0),
506  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
507  ASSIGN(C_RSVD10 , 0),
508  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
509  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
510  ASSIGN(C_OP1PER , 0), // Operation 1 period
511  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_16_BIT), // Operation 0 period
512  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // B bank switch period
513  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
514  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
515  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
516  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C write row offset reset period for computations
517 
518  //begin new
519  // ASSIGN(X_ReLU , 0x0), // Enable Rectified Linear Units non-linearity after optional saturation
520  // ASSIGN(X_RSVD1 , 0),
521  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
522  // ASSIGN(X_RSVD2 , 0),
523  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
524  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
525  // begin new 2
526  ASSIGN(X_PSAT, 0),
527  // end new 2
528  ASSIGN(X_SAT_MIN_5_0, 0),
529  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
530  ASSIGN(X_SAT_MIN_12_6, 0),
531  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
532  ASSIGN(X_SAT_MIN_15_13, 0),
533  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
534  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
535  // end new
536  ASSIGN(X_RSVD3 , 0),
537  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
538  // begin new
539  //ASSIGN(X_RSVD4 , 0),
540  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
541  // end new
542  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_INT16), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
543  // begin new
544  //ASSIGN(X_RSVD5 , 0),
545  ASSIGN(X_SAT_MAX_3_0, 0),
546  // end new
547  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT64), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
548  // begin new
549  // ASSIGN(X_RSVD6 , 0),
550  ASSIGN(X_SAT_MAX_8_4, 0),
551  // end new
552  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_16_BIT), // C read bank switch period
553  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
554  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
555  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
556  // begin new
557  //ASSIGN(X_RSVD7 , 0x0), // Reserved
558  ASSIGN(X_SAT_MAX_15_9, 0),
559  // end new
560 
561  ASSIGN(RSVD , 0),
562  ASSIGN(PARITYCTRL , __MMA_NORMAL)
563 };
564 
565 const __HWA_CONFIG_REG_v1 configRegisterStruct_i16u_i16s_o16u =
566 {
567  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_UINT16),
568  ASSIGN(A_RSVD1 , 0),
569  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
570  ASSIGN(A_RSVD2 , 0),
571  // begin new
572  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
573  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
574  ASSIGN(A_RSVD3 , 0),
575  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
576  ASSIGN(A_RSVD4 , 0),
577  // end new
578  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // 32 bits
579  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_16_BIT), // 8 bits
580  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE16), // 2 bits
581  // begin new
582  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
583  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
584  // end new
585  ASSIGN(B_RSVD1 , 0),
586  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
587  ASSIGN(B_RSVD2 , 0),
588  ASSIGN(B_BSTART , 0), // 1 bits
589  // begin new
590  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
591  // end new
592  ASSIGN(B_RSVD3 , 0),
593  ASSIGN(B_BOFFSET , 0), // 8 bits
594  ASSIGN(B_RSVD4 , 0),
595 
596  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_UA),
597  // begin new
598  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
599  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
600  // end new
601  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT16),
602  ASSIGN(C_RSVD2 , 0),
603  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
604  // begin new 2
605  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
606  // end new 2
607  ASSIGN(C_RSVD3 , 0),
608  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
609  // begin new 2
610  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
611  // end new 2
612  // begin new
613  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
614  // end new
615  ASSIGN(C_RSVD4 , 0),
616  // begin new
617  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
618  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
619  // end new
620  ASSIGN(C_RSVD5 , 0),
621  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT16),
622  ASSIGN(C_RSVD6 , 0),
623  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
624  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
625  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
626  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
627  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
628  ASSIGN(C_RSVD7 , 0),
629  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
630  ASSIGN(C_RSVD8 , 0),
631  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
632  ASSIGN(C_RSVD9 , 0),
633  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
634  ASSIGN(C_RSVD10 , 0),
635  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
636  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
637  ASSIGN(C_OP1PER , 0), // Operation 1 period
638  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_16_BIT), // Operation 0 period
639  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_16_BIT), // B bank switch period
640  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
641  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
642  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
643  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C write row offset reset period for computations
644 
645  // begin new
646  // ASSIGN(X_ReLU , 0x1), // Enable Rectified Linear Units non-linearity after optional saturation
647  // ASSIGN(X_RSVD1 , 0),
648  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
649  // ASSIGN(X_RSVD2 , 0),
650  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
651  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
652  // begin new 2
653  ASSIGN(X_PSAT, 0),
654  // end new 2
655  ASSIGN(X_SAT_MIN_5_0, 0),
656  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
657  ASSIGN(X_SAT_MIN_12_6, 0),
658  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
659  ASSIGN(X_SAT_MIN_15_13, 0),
660  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
661  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
662  // end new
663  ASSIGN(X_RSVD3 , 0),
664  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
665  // begin new
666  //ASSIGN(X_RSVD4 , 0),
667  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
668  // end new
669  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_UINT16), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
670  // begin new
671  //ASSIGN(X_RSVD5 , 0),
672  ASSIGN(X_SAT_MAX_3_0, 0),
673  // end new
674  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT64), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
675  // begin new
676  // ASSIGN(X_RSVD6 , 0),
677  ASSIGN(X_SAT_MAX_8_4, 0),
678  // end new
679  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_16_BIT), // C read bank switch period
680  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_16_BIT), // C read row offset reset period
681  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
682  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
683  // begin new
684  //ASSIGN(X_RSVD7 , 0x0), // Reserved
685  ASSIGN(X_SAT_MAX_15_9, 0),
686  // end new
687 
688  ASSIGN(RSVD , 0),
689  ASSIGN(PARITYCTRL , __MMA_NORMAL)
690 };
691 
692 
693 /********************************
694  * Typical 8-bit configurations *
695  ********************************/
696 
697 const __HWA_CONFIG_REG_v1 configRegisterStruct_i8s_i8s_o8s =
698 {
699  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_INT8),
700  ASSIGN(A_RSVD1 , 0),
701  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
702  ASSIGN(A_RSVD2 , 0),
703  // begin new
704  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
705  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
706  ASSIGN(A_RSVD3 , 0),
707  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
708  ASSIGN(A_RSVD4 , 0),
709  // end new
710  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // 32 bits
711  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_8_BIT), // 8 bits
712  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE8), // 2 bits
713  // begin new
714  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
715  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
716  // end new
717  ASSIGN(B_RSVD1 , 0),
718  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
719  ASSIGN(B_RSVD2 , 0),
720  ASSIGN(B_BSTART , 0), // 1 bits
721  // begin new
722  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
723  // end new
724  ASSIGN(B_RSVD3 , 0),
725  ASSIGN(B_BOFFSET , 0), // 8 bits
726  ASSIGN(B_RSVD4 , 0),
727 
728  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_SA),
729  // begin new
730  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
731  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
732  // end new
733  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT8),
734  ASSIGN(C_RSVD2 , 0),
735  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
736  // begin new 2
737  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
738  // end new 2
739  ASSIGN(C_RSVD3 , 0),
740  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
741  // begin new 2
742  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
743  // end new 2
744  // begin new
745  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
746  // end new
747  ASSIGN(C_RSVD4 , 0),
748  // begin new
749  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
750  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
751  // end new
752  ASSIGN(C_RSVD5 , 0),
753  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT8),
754  ASSIGN(C_RSVD6 , 0),
755 
756  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
757  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
758  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
759  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
760  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
761  ASSIGN(C_RSVD7 , 0),
762  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
763  ASSIGN(C_RSVD8 , 0),
764  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
765  ASSIGN(C_RSVD9 , 0),
766  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
767  ASSIGN(C_RSVD10 , 0),
768  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
769  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
770  ASSIGN(C_OP1PER , 0), // Operation 1 period
771  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_8_BIT), // Operation 0 period
772  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // B bank switch period
773  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
774  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
775  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
776  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C write row offset reset period for computations
777 
778  // begin new
779  // ASSIGN(X_ReLU , 0x0), // Enable Rectified Linear Units non-linearity after optional saturation
780  // ASSIGN(X_RSVD1 , 0),
781  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
782  // ASSIGN(X_RSVD2 , 0),
783  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
784  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
785  // begin new 2
786  ASSIGN(X_PSAT, 0),
787  // end new 2
788  ASSIGN(X_SAT_MIN_5_0, 0),
789  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
790  ASSIGN(X_SAT_MIN_12_6, 0),
791  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
792  ASSIGN(X_SAT_MIN_15_13, 0),
793  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
794  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
795  // end new
796  ASSIGN(X_RSVD3 , 0),
797  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
798  // begin new
799  //ASSIGN(X_RSVD4 , 0),
800  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
801  // end new
802  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_INT8), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
803  // begin new
804  //ASSIGN(X_RSVD5 , 0),
805  ASSIGN(X_SAT_MAX_3_0, 0),
806  // end new
807  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT32), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
808  // begin new
809  // ASSIGN(X_RSVD6 , 0),
810  ASSIGN(X_SAT_MAX_8_4, 0),
811  // end new
812  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_8_BIT), // C read bank switch period
813  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
814  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
815  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
816  // begin new
817  //ASSIGN(X_RSVD7 , 0x0), // Reserved
818  ASSIGN(X_SAT_MAX_15_9, 0),
819  // end new
820 
821  ASSIGN(RSVD , 0),
822  ASSIGN(PARITYCTRL , __MMA_NORMAL)
823 };
824 
825 
826 const __HWA_CONFIG_REG_v1 configRegisterStruct_i8s_i8s_o8u =
827 {
828  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_INT8),
829  ASSIGN(A_RSVD1 , 0),
830  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
831  ASSIGN(A_RSVD2 , 0),
832  // begin new
833  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
834  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
835  ASSIGN(A_RSVD3 , 0),
836  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
837  ASSIGN(A_RSVD4 , 0),
838  // end new
839  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // 32 bits
840  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_8_BIT), // 8 bits
841  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE8), // 2 bits
842  // begin new
843  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
844  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
845  // end new
846  ASSIGN(B_RSVD1 , 0),
847  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
848  ASSIGN(B_RSVD2 , 0),
849  ASSIGN(B_BSTART , 0), // 1 bits
850  // begin new
851  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
852  // end new
853  ASSIGN(B_RSVD3 , 0),
854  ASSIGN(B_BOFFSET , 0), // 8 bits
855  ASSIGN(B_RSVD4 , 0),
856 
857  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_SA),
858  // begin new
859  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
860  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
861  // end new
862  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT8),
863  ASSIGN(C_RSVD2 , 0),
864  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
865  // begin new 2
866  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
867  // end new 2
868  ASSIGN(C_RSVD3 , 0),
869  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
870  // begin new 2
871  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
872  // end new 2
873  // begin new
874  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
875  // end new
876  ASSIGN(C_RSVD4 , 0),
877  // begin new
878  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
879  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
880  // end new
881  ASSIGN(C_RSVD5 , 0),
882  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT8),
883  ASSIGN(C_RSVD6 , 0),
884 
885  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
886  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
887  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
888  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
889  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
890  ASSIGN(C_RSVD7 , 0),
891  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
892  ASSIGN(C_RSVD8 , 0),
893  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
894  ASSIGN(C_RSVD9 , 0),
895  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
896  ASSIGN(C_RSVD10 , 0),
897  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
898  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
899  ASSIGN(C_OP1PER , 0), // Operation 1 period
900  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_8_BIT), // Operation 0 period
901  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // B bank switch period
902  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
903  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
904  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
905  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C write row offset reset period for computations
906 
907  // begin new
908  // ASSIGN(X_ReLU , 0x1), // Enable Rectified Linear Units non-linearity after optional saturation
909  // ASSIGN(X_RSVD1 , 0),
910  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
911  // ASSIGN(X_RSVD2 , 0),
912  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
913  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
914  // begin new 2
915  ASSIGN(X_PSAT, 0),
916  // end new 2
917  ASSIGN(X_SAT_MIN_5_0, 0),
918  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
919  ASSIGN(X_SAT_MIN_12_6, 0),
920  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
921  ASSIGN(X_SAT_MIN_15_13, 0),
922  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
923  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
924  // end new
925  ASSIGN(X_RSVD3 , 0),
926  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
927  // begin new
928  //ASSIGN(X_RSVD4 , 0),
929  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
930  // end new
931  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_UINT8), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
932  // begin new
933  //ASSIGN(X_RSVD5 , 0),
934  ASSIGN(X_SAT_MAX_3_0, 0),
935  // end new
936  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT32), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
937  // begin new
938  // ASSIGN(X_RSVD6 , 0),
939  ASSIGN(X_SAT_MAX_8_4, 0),
940  // end new
941  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_8_BIT), // C read bank switch period
942  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
943  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
944  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
945  // begin new
946  //ASSIGN(X_RSVD7 , 0x0), // Reserved
947  ASSIGN(X_SAT_MAX_15_9, 0),
948  // end new
949 
950  ASSIGN(RSVD , 0),
951  ASSIGN(PARITYCTRL , __MMA_NORMAL)
952 };
953 
954 
955 const __HWA_CONFIG_REG_v1 configRegisterStruct_i8u_i8s_o8s =
956 {
957  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_UINT8),
958  ASSIGN(A_RSVD1 , 0),
959  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
960  ASSIGN(A_RSVD2 , 0),
961  // begin new
962  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
963  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
964  ASSIGN(A_RSVD3 , 0),
965  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
966  ASSIGN(A_RSVD4 , 0),
967  // end new
968  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // 32 bits
969  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_8_BIT), // 8 bits
970  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE8), // 2 bits
971  // begin new
972  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
973  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
974  // end new
975  ASSIGN(B_RSVD1 , 0),
976  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
977  ASSIGN(B_RSVD2 , 0),
978  ASSIGN(B_BSTART , 0), // 1 bits
979  // begin new
980  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
981  // end new
982  ASSIGN(B_RSVD3 , 0),
983  ASSIGN(B_BOFFSET , 0), // 8 bits
984  ASSIGN(B_RSVD4 , 0),
985 
986  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_UA),
987  // begin new
988  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
989  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
990  // end new
991  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT8),
992  ASSIGN(C_RSVD2 , 0),
993  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
994  // begin new 2
995  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
996  // end new 2
997  ASSIGN(C_RSVD3 , 0),
998  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
999  // begin new 2
1000  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
1001  // end new 2
1002  // begin new
1003  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
1004  // end new
1005  ASSIGN(C_RSVD4 , 0),
1006  // begin new
1007  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
1008  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
1009  // end new
1010  ASSIGN(C_RSVD5 , 0),
1011  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT8),
1012  ASSIGN(C_RSVD6 , 0),
1013 
1014  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
1015  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
1016  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
1017  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
1018  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
1019  ASSIGN(C_RSVD7 , 0),
1020  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
1021  ASSIGN(C_RSVD8 , 0),
1022  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
1023  ASSIGN(C_RSVD9 , 0),
1024  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
1025  ASSIGN(C_RSVD10 , 0),
1026  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
1027  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
1028  ASSIGN(C_OP1PER , 0), // Operation 1 period
1029  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_8_BIT), // Operation 0 period
1030  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // B bank switch period
1031  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
1032  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
1033  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
1034  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C write row offset reset period for computations
1035 
1036  // begin new
1037  // ASSIGN(X_ReLU , 0x0), // Enable Rectified Linear Units non-linearity after optional saturation
1038  // ASSIGN(X_RSVD1 , 0),
1039  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
1040  // ASSIGN(X_RSVD2 , 0),
1041  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
1042  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
1043  // begin new 2
1044  ASSIGN(X_PSAT, 0),
1045  // end new 2
1046  ASSIGN(X_SAT_MIN_5_0, 0),
1047  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
1048  ASSIGN(X_SAT_MIN_12_6, 0),
1049  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
1050  ASSIGN(X_SAT_MIN_15_13, 0),
1051  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
1052  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
1053  // end new
1054  ASSIGN(X_RSVD3 , 0),
1055  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
1056  // begin new
1057  //ASSIGN(X_RSVD4 , 0),
1058  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
1059  // end new
1060  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_INT8), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
1061  // begin new
1062  //ASSIGN(X_RSVD5 , 0),
1063  ASSIGN(X_SAT_MAX_3_0, 0),
1064  // end new
1065  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT32), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
1066  // begin new
1067  // ASSIGN(X_RSVD6 , 0),
1068  ASSIGN(X_SAT_MAX_8_4, 0),
1069  // end new
1070  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_8_BIT), // C read bank switch period
1071  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
1072  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
1073  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
1074  // begin new
1075  //ASSIGN(X_RSVD7 , 0x0), // Reserved
1076  ASSIGN(X_SAT_MAX_15_9, 0),
1077  // end new
1078 
1079  ASSIGN(RSVD , 0),
1080  ASSIGN(PARITYCTRL , __MMA_NORMAL)
1081 };
1082 
1083 const __HWA_CONFIG_REG_v1 configRegisterStruct_i8u_i8s_o8u =
1084 {
1085  ASSIGN(A_ATYPE , __MMA_A_CONFIG_ATYPE_UINT8),
1086  ASSIGN(A_RSVD1 , 0),
1087  ASSIGN(A_ALUTEN , __MMA_A_CONFIG_NOLUT),
1088  ASSIGN(A_RSVD2 , 0),
1089  // begin new
1090  ASSIGN(A_ARF_CTRL , __MMA_A_CONFIG_ARF_DISABLE), // disable A register file
1091  ASSIGN(A_ARF_BASE , 0), /* disable A register file */
1092  ASSIGN(A_RSVD3 , 0),
1093  ASSIGN(A_ARF_SIZE , 64), /* ARF array size for read and write operations */
1094  ASSIGN(A_RSVD4 , 0),
1095  // end new
1096  ASSIGN(B_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // 32 bits
1097  ASSIGN(B_BRSTPER , FFTLIB_MMA_SIZE_8_BIT), // 8 bits
1098  ASSIGN(B_BTYPE , __MMA_B_CONFIG_SIZE8), // 2 bits
1099  // begin new
1100  ASSIGN(B_LDBOPT , __MMA_LDBOPT_MMA1), /* Control for enhanced B operand row loading */
1101  ASSIGN(B_B4EXP , __MMA_B_CONFIG_B4EXP_MMA1), /* B operand expansion control to conv 4-bit ops to 8-bit ops */
1102  // end new
1103  ASSIGN(B_RSVD1 , 0),
1104  ASSIGN(B_ORDER , __MMA_B_CONFIG_ROW), // 1 bit
1105  ASSIGN(B_RSVD2 , 0),
1106  ASSIGN(B_BSTART , 0), // 1 bits
1107  // begin new
1108  ASSIGN(B_BCNT1_ENABLE, 0),/* Enable bit for option B row write row cntr for B bank 1. */
1109  // end new
1110  ASSIGN(B_RSVD3 , 0),
1111  ASSIGN(B_BOFFSET , 0), // 8 bits
1112  ASSIGN(B_RSVD4 , 0),
1113 
1114  ASSIGN(C_ATYPE , __MMA_C_CONFIG_ATYPE_UA),
1115  // begin new
1116  ASSIGN(C_ARF_BASE , 0), /* ARF read pointer base value when ARG_C7 is cleared */
1117  ASSIGN(C_ARF_C7 , 1), /* ARF read addresses are supplied by the host C7 processor as an argument to the HWAOP or HWAOPXFER instructions. */
1118  // end new
1119  ASSIGN(C_BTYPE , __MMA_C_CONFIG_BTYPE_INT8),
1120  ASSIGN(C_RSVD2 , 0),
1121  ASSIGN(C_OPERATION0 , __MMA_C_CONFIG_MUL),
1122  // begin new 2
1123  ASSIGN(C_LOP0 , __MMA_C_CONFIG_LOP_C),\
1124  // end new 2
1125  ASSIGN(C_RSVD3 , 0),
1126  ASSIGN(C_OPERATION1 , __MMA_C_CONFIG_MULPLUS),
1127  // begin new 2
1128  ASSIGN(C_LOP1 , __MMA_C_CONFIG_LOP_C),
1129  // end new 2
1130  // begin new
1131  ASSIGN(C_BIASORDER , __MMA_C_CONFIG_BIAS_ORDER_COLUMN),
1132  // end new
1133  ASSIGN(C_RSVD4 , 0),
1134  // begin new
1135  //ASSIGN(C_HWLDDST , __MMA_C_CONFIG_HWLDDST_X4_0),
1136  ASSIGN(C_HWLDDST, __MMA_C_CONFIG_HWLDDST_X4_0),
1137  // end new
1138  ASSIGN(C_RSVD5 , 0),
1139  ASSIGN(C_HWLDTYPE , __MMA_C_CONFIG_HWLDTYPE_INT8),
1140  ASSIGN(C_RSVD6 , 0),
1141  ASSIGN(C_OPSTART , __MMA_C_CONFIG_OPSTART_OPERATION0), // No enum in MMA spec? Initial C operand selections
1142  ASSIGN(C_BSTART , 0x0), // Initial B bank selection for reading B matrix data
1143  ASSIGN(C_CRSTART , 0x0), // Initial C bank selection for reading operands
1144  ASSIGN(C_CWSTART , 0x0), // Initial C bank selection for writing computation results
1145  ASSIGN(C_CLSTART , 0x0), // Initial C bank selection for writing operands from HWALD*
1146  ASSIGN(C_RSVD7 , 0),
1147  ASSIGN(C_CROFFSET , 0x0), // 6-bits C row read offset
1148  ASSIGN(C_RSVD8 , 0),
1149  ASSIGN(C_CWOFFSET , 0x0), // C row write offset for computations
1150  ASSIGN(C_RSVD9 , 0),
1151  ASSIGN(C_CLOFFSET , 0x0), // C row write offset for HWALD* instructions
1152  ASSIGN(C_RSVD10 , 0),
1153  ASSIGN(C_CLSWPER , 0), // C bank switch period for HWALD* instruction writes
1154  ASSIGN(C_CLRSTPER , 0), // C write row offset reset period for HWALD*
1155  ASSIGN(C_OP1PER , 0), // Operation 1 period
1156  ASSIGN(C_OP0PER , FFTLIB_MMA_SIZE_8_BIT), // Operation 0 period
1157  ASSIGN(C_BSWPER , FFTLIB_MMA_SIZE_8_BIT), // B bank switch period
1158  ASSIGN(C_CRSWPER , 0), // C bank switch period for read instructions
1159  ASSIGN(C_CWSWPER , 0), // C bank switch period for computation writes
1160  ASSIGN(C_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
1161  ASSIGN(C_CWRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C write row offset reset period for computations
1162 
1163  // begin new
1164  // ASSIGN(X_ReLU , 0x1), // Enable Rectified Linear Units non-linearity after optional saturation
1165  // ASSIGN(X_RSVD1 , 0),
1166  // ASSIGN(X_SAT , 0x1), // Enable saturation in the transfer buffer element type after optional rounding
1167  // ASSIGN(X_RSVD2 , 0),
1168  // ASSIGN(X_RE , 0x1), // Enable routing via 1/2 LSB addition after shifting
1169  ASSIGN(X_ReLU, 0), /* Optional non-linearity. */
1170  // begin new 2
1171  ASSIGN(X_PSAT, 0),
1172  // end new 2
1173  ASSIGN(X_SAT_MIN_5_0, 0),
1174  ASSIGN(X_SAT, 1), // Enable saturation in the transfer buffer element type after optional rounding
1175  ASSIGN(X_SAT_MIN_12_6, 0),
1176  ASSIGN(X_RE, 0x1), // Enable routing via 1/2 LSB addition after shifting
1177  ASSIGN(X_SAT_MIN_15_13, 0),
1178  ASSIGN(X_RANGE, __MMA_X_CONFIG_RANGE_DISABLE_NOINIT), /* Min/Max range accumulation control on C matrix reads by X FSM */
1179  ASSIGN(X_SCALE_SHIFT_CTRL, __MMA_X_CONFIG_SCALE_SHIFT_CTRL_DISABLE),
1180  // end new
1181  ASSIGN(X_RSVD3 , 0),
1182  ASSIGN(X_SHIFT , 0), // 7 bits Right shift amount), signed or unsigned depending on CTYPE fieldASSIGN(
1183  // begin new
1184  //ASSIGN(X_RSVD4 , 0),
1185  ASSIGN(X_VPACKN, __MMA_X_CONFIG_VPACKN_DISABLE),
1186  // end new
1187  ASSIGN(X_XTYPE , __MMA_X_CONFIG_XTYPE_UINT8), // Transfer buffer element typeASSIGN( Not all combinations of CTYPE and XTYPE are supported
1188  // begin new
1189  //ASSIGN(X_RSVD5 , 0),
1190  ASSIGN(X_SAT_MAX_3_0, 0),
1191  // end new
1192  ASSIGN(X_CTYPE , __MMA_X_CONFIG_CTYPE_INT32), // C matrix element typeASSIGN( This must be consistent with the B FSM setting
1193  // begin new
1194  // ASSIGN(X_RSVD6 , 0),
1195  ASSIGN(X_SAT_MAX_8_4, 0),
1196  // end new
1197  ASSIGN(X_CSWPER , FFTLIB_MMA_SIZE_8_BIT), // C read bank switch period
1198  ASSIGN(X_CRRSTPER , FFTLIB_MMA_SIZE_8_BIT), // C read row offset reset period
1199  ASSIGN(X_COFFSET , 0x0), // C matrix row read address offset
1200  ASSIGN(X_CSTART , 0x0), // Initial C bank selection
1201  // begin new
1202  //ASSIGN(X_RSVD7 , 0x0), // Reserved
1203  ASSIGN(X_SAT_MAX_15_9, 0),
1204  // end new
1205 
1206  ASSIGN(RSVD , 0),
1207  ASSIGN(PARITYCTRL , __MMA_NORMAL)
1208 };
1209 
1210 /* -------------------------------------------------------------------------- */
1211 /* MISRAC Rule 4.9(DEFINE.FUNC) Deviation: The advisory is not being */
1212 /* addressed so as not to lose portability across different platforms. */
1213 /* -------------------------------------------------------------------------- */
1214 #ifdef WIN32
1215 #define ASSIGN(param, value) value
1216 #else
1217 #define ASSIGN(param, value) .param = value
1218 #endif
1219 const __HWA_OFFSET_REG offsetRegStruct_zeros =
1220 {
1221  ASSIGN(offset0 , 0),
1222  ASSIGN(offset1 , 0),
1223  ASSIGN(offset2 , 0),
1224  ASSIGN(offset3 , 0),
1225  ASSIGN(A_LUT_VAL_0 , 0),
1226  ASSIGN(offset4 , 0),
1227  ASSIGN(offset5 , 0),
1228  ASSIGN(offset6 , 0),
1229  ASSIGN(offset7 , 0),
1230  ASSIGN(A_LUT_VAL_1 , 0),
1231  ASSIGN(offset8 , 0),
1232  ASSIGN(offset9 , 0),
1233  ASSIGN(offset10 , 0),
1234  ASSIGN(offset11 , 0),
1235  ASSIGN(A_LUT_VAL_2 , 0),
1236  ASSIGN(offset12 , 0),
1237  ASSIGN(offset13 , 0),
1238  ASSIGN(offset14 , 0),
1239  ASSIGN(offset15 , 0),
1240  ASSIGN(A_LUT_VAL_3 , 0),
1241  ASSIGN(offset16 , 0),
1242  ASSIGN(offset17 , 0),
1243  ASSIGN(offset18 , 0),
1244  ASSIGN(offset19 , 0),
1245  ASSIGN(A_LUT_VAL_4 , 0),
1246  ASSIGN(offset20 , 0),
1247  ASSIGN(offset21 , 0),
1248  ASSIGN(offset22 , 0),
1249  ASSIGN(offset23 , 0),
1250  ASSIGN(A_LUT_VAL_5 , 0),
1251  ASSIGN(offset24 , 0),
1252  ASSIGN(offset25 , 0),
1253  ASSIGN(offset26 , 0),
1254  ASSIGN(offset27 , 0),
1255  ASSIGN(A_LUT_VAL_6 , 0),
1256  ASSIGN(offset28 , 0),
1257  ASSIGN(offset29 , 0),
1258  ASSIGN(offset30 , 0),
1259  ASSIGN(offset31 , 0),
1260  ASSIGN(A_LUT_VAL_7 , 0),
1261  ASSIGN(offset32 , 0),
1262  ASSIGN(offset33 , 0),
1263  ASSIGN(offset34 , 0),
1264  ASSIGN(offset35 , 0),
1265  ASSIGN(A_LUT_VAL_8 , 0),
1266  ASSIGN(offset36 , 0),
1267  ASSIGN(offset37 , 0),
1268  ASSIGN(offset38 , 0),
1269  ASSIGN(offset39 , 0),
1270  ASSIGN(A_LUT_VAL_9 , 0),
1271  ASSIGN(offset40 , 0),
1272  ASSIGN(offset41 , 0),
1273  ASSIGN(offset42 , 0),
1274  ASSIGN(offset43 , 0),
1275  ASSIGN(A_LUT_VAL_10 , 0),
1276  ASSIGN(offset44 , 0),
1277  ASSIGN(offset45 , 0),
1278  ASSIGN(offset46 , 0),
1279  ASSIGN(offset47 , 0),
1280  ASSIGN(A_LUT_VAL_11 , 0),
1281  ASSIGN(offset48 , 0),
1282  ASSIGN(offset49 , 0),
1283  ASSIGN(offset50 , 0),
1284  ASSIGN(offset51 , 0),
1285  ASSIGN(A_LUT_VAL_12 , 0),
1286  ASSIGN(offset52 , 0),
1287  ASSIGN(offset53 , 0),
1288  ASSIGN(offset54 , 0),
1289  ASSIGN(offset55 , 0),
1290  ASSIGN(A_LUT_VAL_13 , 0),
1291  ASSIGN(offset56 , 0),
1292  ASSIGN(offset57 , 0),
1293  ASSIGN(offset58 , 0),
1294  ASSIGN(offset59 , 0),
1295  ASSIGN(A_LUT_VAL_14 , 0),
1296  ASSIGN(offset60 , 0),
1297  ASSIGN(offset61 , 0),
1298  ASSIGN(offset62 , 0),
1299  ASSIGN(offset63 , 0),
1300  ASSIGN(A_LUT_VAL_15 , 0)
1301 };
1302 
1303 const __HWA_OFFSET_REG offsetRegStruct_diagonal_32bit =
1304 {
1305  ASSIGN(offset0 , 0),
1306  ASSIGN(offset1 , 0),
1307  ASSIGN(offset2 , 0),
1308  ASSIGN(offset3 , 0),
1309  ASSIGN(A_LUT_VAL_0 , 0),
1310  ASSIGN(offset4 , 1),
1311  ASSIGN(offset5 , 0),
1312  ASSIGN(offset6 , 0),
1313  ASSIGN(offset7 , 0),
1314  ASSIGN(A_LUT_VAL_1 , 0),
1315  ASSIGN(offset8 , 2),
1316  ASSIGN(offset9 , 0),
1317  ASSIGN(offset10 , 0),
1318  ASSIGN(offset11 , 0),
1319  ASSIGN(A_LUT_VAL_2 , 0),
1320  ASSIGN(offset12 , 3),
1321  ASSIGN(offset13 , 0),
1322  ASSIGN(offset14 , 0),
1323  ASSIGN(offset15 , 0),
1324  ASSIGN(A_LUT_VAL_3 , 0),
1325  ASSIGN(offset16 , 4),
1326  ASSIGN(offset17 , 0),
1327  ASSIGN(offset18 , 0),
1328  ASSIGN(offset19 , 0),
1329  ASSIGN(A_LUT_VAL_4 , 0),
1330  ASSIGN(offset20 , 5),
1331  ASSIGN(offset21 , 0),
1332  ASSIGN(offset22 , 0),
1333  ASSIGN(offset23 , 0),
1334  ASSIGN(A_LUT_VAL_5 , 0),
1335  ASSIGN(offset24 , 6),
1336  ASSIGN(offset25 , 0),
1337  ASSIGN(offset26 , 0),
1338  ASSIGN(offset27 , 0),
1339  ASSIGN(A_LUT_VAL_6 , 0),
1340  ASSIGN(offset28 , 7),
1341  ASSIGN(offset29 , 0),
1342  ASSIGN(offset30 , 0),
1343  ASSIGN(offset31 , 0),
1344  ASSIGN(A_LUT_VAL_7 , 0),
1345  ASSIGN(offset32 , 8),
1346  ASSIGN(offset33 , 0),
1347  ASSIGN(offset34 , 0),
1348  ASSIGN(offset35 , 0),
1349  ASSIGN(A_LUT_VAL_8 , 0),
1350  ASSIGN(offset36 , 9),
1351  ASSIGN(offset37 , 0),
1352  ASSIGN(offset38 , 0),
1353  ASSIGN(offset39 , 0),
1354  ASSIGN(A_LUT_VAL_9 , 0),
1355  ASSIGN(offset40 , 10),
1356  ASSIGN(offset41 , 0),
1357  ASSIGN(offset42 , 0),
1358  ASSIGN(offset43 , 0),
1359  ASSIGN(A_LUT_VAL_10 , 0),
1360  ASSIGN(offset44 , 11),
1361  ASSIGN(offset45 , 0),
1362  ASSIGN(offset46 , 0),
1363  ASSIGN(offset47 , 0),
1364  ASSIGN(A_LUT_VAL_11 , 0),
1365  ASSIGN(offset48 , 12),
1366  ASSIGN(offset49 , 0),
1367  ASSIGN(offset50 , 0),
1368  ASSIGN(offset51 , 0),
1369  ASSIGN(A_LUT_VAL_12 , 0),
1370  ASSIGN(offset52 , 13),
1371  ASSIGN(offset53 , 0),
1372  ASSIGN(offset54 , 0),
1373  ASSIGN(offset55 , 0),
1374  ASSIGN(A_LUT_VAL_13 , 0),
1375  ASSIGN(offset56 , 14),
1376  ASSIGN(offset57 , 0),
1377  ASSIGN(offset58 , 0),
1378  ASSIGN(offset59 , 0),
1379  ASSIGN(A_LUT_VAL_14 , 0),
1380  ASSIGN(offset60 , 15),
1381  ASSIGN(offset61 , 0),
1382  ASSIGN(offset62 , 0),
1383  ASSIGN(offset63 , 0),
1384  ASSIGN(A_LUT_VAL_15 , 0)
1385 };
1386 
1387 const __HWA_OFFSET_REG offsetRegStruct_diagonal_16bit =
1388 {
1389  ASSIGN(offset0 , 0),
1390  ASSIGN(offset1 , 0),
1391  ASSIGN(offset2 , 1),
1392  ASSIGN(offset3 , 0),
1393  ASSIGN(A_LUT_VAL_0 , 0),
1394  ASSIGN(offset4 , 2),
1395  ASSIGN(offset5 , 0),
1396  ASSIGN(offset6 , 3),
1397  ASSIGN(offset7 , 0),
1398  ASSIGN(A_LUT_VAL_1 , 0),
1399  ASSIGN(offset8 , 4),
1400  ASSIGN(offset9 , 0),
1401  ASSIGN(offset10 , 5),
1402  ASSIGN(offset11 , 0),
1403  ASSIGN(A_LUT_VAL_2 , 0),
1404  ASSIGN(offset12 , 6),
1405  ASSIGN(offset13 , 0),
1406  ASSIGN(offset14 , 7),
1407  ASSIGN(offset15 , 0),
1408  ASSIGN(A_LUT_VAL_3 , 0),
1409  ASSIGN(offset16 , 8),
1410  ASSIGN(offset17 , 0),
1411  ASSIGN(offset18 , 9),
1412  ASSIGN(offset19 , 0),
1413  ASSIGN(A_LUT_VAL_4 , 0),
1414  ASSIGN(offset20 , 10),
1415  ASSIGN(offset21 , 0),
1416  ASSIGN(offset22 , 11),
1417  ASSIGN(offset23 , 0),
1418  ASSIGN(A_LUT_VAL_5 , 0),
1419  ASSIGN(offset24 , 12),
1420  ASSIGN(offset25 , 0),
1421  ASSIGN(offset26 , 13),
1422  ASSIGN(offset27 , 0),
1423  ASSIGN(A_LUT_VAL_6 , 0),
1424  ASSIGN(offset28 , 14),
1425  ASSIGN(offset29 , 0),
1426  ASSIGN(offset30 , 15),
1427  ASSIGN(offset31 , 0),
1428  ASSIGN(A_LUT_VAL_7 , 0),
1429  ASSIGN(offset32 , 16),
1430  ASSIGN(offset33 , 0),
1431  ASSIGN(offset34 , 17),
1432  ASSIGN(offset35 , 0),
1433  ASSIGN(A_LUT_VAL_8 , 0),
1434  ASSIGN(offset36 , 18),
1435  ASSIGN(offset37 , 0),
1436  ASSIGN(offset38 , 19),
1437  ASSIGN(offset39 , 0),
1438  ASSIGN(A_LUT_VAL_9 , 0),
1439  ASSIGN(offset40 , 20),
1440  ASSIGN(offset41 , 0),
1441  ASSIGN(offset42 , 21),
1442  ASSIGN(offset43 , 0),
1443  ASSIGN(A_LUT_VAL_10 , 0),
1444  ASSIGN(offset44 , 22),
1445  ASSIGN(offset45 , 0),
1446  ASSIGN(offset46 , 23),
1447  ASSIGN(offset47 , 0),
1448  ASSIGN(A_LUT_VAL_11 , 0),
1449  ASSIGN(offset48 , 24),
1450  ASSIGN(offset49 , 0),
1451  ASSIGN(offset50 , 25),
1452  ASSIGN(offset51 , 0),
1453  ASSIGN(A_LUT_VAL_12 , 0),
1454  ASSIGN(offset52 , 26),
1455  ASSIGN(offset53 , 0),
1456  ASSIGN(offset54 , 27),
1457  ASSIGN(offset55 , 0),
1458  ASSIGN(A_LUT_VAL_13 , 0),
1459  ASSIGN(offset56 , 28),
1460  ASSIGN(offset57 , 0),
1461  ASSIGN(offset58 , 29),
1462  ASSIGN(offset59 , 0),
1463  ASSIGN(A_LUT_VAL_14 , 0),
1464  ASSIGN(offset60 , 30),
1465  ASSIGN(offset61 , 0),
1466  ASSIGN(offset62 , 31),
1467  ASSIGN(offset63 , 0),
1468  ASSIGN(A_LUT_VAL_15 , 0)
1469 };
1470 
1471 const __HWA_OFFSET_REG offsetRegStruct_diagonal_8bit =
1472 {
1473  ASSIGN(offset0 , 0),
1474  ASSIGN(offset1 , 1),
1475  ASSIGN(offset2 , 2),
1476  ASSIGN(offset3 , 3),
1477  ASSIGN(A_LUT_VAL_0 , 0),
1478  ASSIGN(offset4 , 4),
1479  ASSIGN(offset5 , 5),
1480  ASSIGN(offset6 , 6),
1481  ASSIGN(offset7 , 7),
1482  ASSIGN(A_LUT_VAL_1 , 0),
1483  ASSIGN(offset8 , 8),
1484  ASSIGN(offset9 , 9),
1485  ASSIGN(offset10 , 10),
1486  ASSIGN(offset11 , 11),
1487  ASSIGN(A_LUT_VAL_2 , 0),
1488  ASSIGN(offset12 , 12),
1489  ASSIGN(offset13 , 13),
1490  ASSIGN(offset14 , 14),
1491  ASSIGN(offset15 , 15),
1492  ASSIGN(A_LUT_VAL_3 , 0),
1493  ASSIGN(offset16 , 16),
1494  ASSIGN(offset17 , 17),
1495  ASSIGN(offset18 , 18),
1496  ASSIGN(offset19 , 19),
1497  ASSIGN(A_LUT_VAL_4 , 0),
1498  ASSIGN(offset20 , 20),
1499  ASSIGN(offset21 , 21),
1500  ASSIGN(offset22 , 22),
1501  ASSIGN(offset23 , 23),
1502  ASSIGN(A_LUT_VAL_5 , 0),
1503  ASSIGN(offset24 , 24),
1504  ASSIGN(offset25 , 25),
1505  ASSIGN(offset26 , 26),
1506  ASSIGN(offset27 , 27),
1507  ASSIGN(A_LUT_VAL_6 , 0),
1508  ASSIGN(offset28 , 28),
1509  ASSIGN(offset29 , 29),
1510  ASSIGN(offset30 , 30),
1511  ASSIGN(offset31 , 31),
1512  ASSIGN(A_LUT_VAL_7 , 0),
1513  ASSIGN(offset32 , 32),
1514  ASSIGN(offset33 , 33),
1515  ASSIGN(offset34 , 34),
1516  ASSIGN(offset35 , 35),
1517  ASSIGN(A_LUT_VAL_8 , 0),
1518  ASSIGN(offset36 , 36),
1519  ASSIGN(offset37 , 37),
1520  ASSIGN(offset38 , 38),
1521  ASSIGN(offset39 , 39),
1522  ASSIGN(A_LUT_VAL_9 , 0),
1523  ASSIGN(offset40 , 40),
1524  ASSIGN(offset41 , 41),
1525  ASSIGN(offset42 , 42),
1526  ASSIGN(offset43 , 43),
1527  ASSIGN(A_LUT_VAL_10 , 0),
1528  ASSIGN(offset44 , 44),
1529  ASSIGN(offset45 , 45),
1530  ASSIGN(offset46 , 46),
1531  ASSIGN(offset47 , 47),
1532  ASSIGN(A_LUT_VAL_11 , 0),
1533  ASSIGN(offset48 , 48),
1534  ASSIGN(offset49 , 49),
1535  ASSIGN(offset50 , 50),
1536  ASSIGN(offset51 , 51),
1537  ASSIGN(A_LUT_VAL_12 , 0),
1538  ASSIGN(offset52 , 52),
1539  ASSIGN(offset53 , 53),
1540  ASSIGN(offset54 , 54),
1541  ASSIGN(offset55 , 55),
1542  ASSIGN(A_LUT_VAL_13 , 0),
1543  ASSIGN(offset56 , 56),
1544  ASSIGN(offset57 , 57),
1545  ASSIGN(offset58 , 58),
1546  ASSIGN(offset59 , 59),
1547  ASSIGN(A_LUT_VAL_14 , 0),
1548  ASSIGN(offset60 , 60),
1549  ASSIGN(offset61 , 61),
1550  ASSIGN(offset62 , 62),
1551  ASSIGN(offset63 , 63),
1552  ASSIGN(A_LUT_VAL_15 , 0)
1553 };
1554 
1555 /* -------------------------------------------------------------------------- */
1556 /* MISRAC Rule 8.2(UNMATCHED.PARAMS) Deviation: This is the compiler */
1557 /* recommended way to initialize a vector. */
1558 /* -------------------------------------------------------------------------- */
1559 #if defined(_HOST_BUILD)
1560 // permutation register values scale and shift
1561 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_1 = c7x::uchar_vec(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1562 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_2 = c7x::uchar_vec(0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1563 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_3 = c7x::uchar_vec(0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1564 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_4 = c7x::uchar_vec(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1565 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_5 = c7x::uchar_vec(0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1566 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_6 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1567 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_7 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1568 
1569 // permutation register values for 32-bit bias
1570 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_1 = c7x::uchar_vec(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1571 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_1 = c7x::uchar_vec(0);
1572 
1573 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_2 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1574 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_2 = c7x::uchar_vec(0);
1575 
1576 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_3 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1577 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_3 = c7x::uchar_vec(0);
1578 
1579 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_4 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1580 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_4 = c7x::uchar_vec(0);
1581 
1582 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_5 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3);
1583 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_5 = c7x::uchar_vec(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1584 
1585 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_6 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1586 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_6 = c7x::uchar_vec(16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1587 
1588 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_7 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7);
1589 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_7 = c7x::uchar_vec(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1590 
1591 // permutation register values for 64-bit bias
1592 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_1 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1593 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_1 = c7x::uchar_vec(0);
1594 
1595 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_2 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1596 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_2 = c7x::uchar_vec(0);
1597 
1598 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_3 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1599 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_3 = c7x::uchar_vec(16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1600 
1601 // permutation register values scale and shift, split groups case
1602 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_1 = c7x::uchar_vec(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1603 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_2 = c7x::uchar_vec(0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1604 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_3 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1605 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_4 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1606 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_5 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1607 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_6 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1608 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_7 = c7x::uchar_vec(0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1609 
1610 
1611 // permutation register values for 32-bit bias, split groups case
1612 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_1 = c7x::uchar_vec( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1613 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_1 = c7x::uchar_vec( 0);
1614 
1615 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_2 = c7x::uchar_vec( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1616 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_2 = c7x::uchar_vec( 0);
1617 
1618 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_3 = c7x::uchar_vec( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1619 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_3 = c7x::uchar_vec( 0);
1620 
1621 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_4 = c7x::uchar_vec( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
1622 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_4 = c7x::uchar_vec( 0);
1623 
1624 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_5 = c7x::uchar_vec( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23);
1625 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_5 = c7x::uchar_vec(24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1626 
1627 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_6 = c7x::uchar_vec( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1628 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_6 = c7x::uchar_vec(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1629 
1630 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_7 = c7x::uchar_vec( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 0, 1, 2, 3, 4, 5, 6, 7);
1631 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_7 = c7x::uchar_vec( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1632 
1633 // permutation register values for 64-bit bias, split groups case
1635 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_1 = c7x::uchar_vec(0);
1636 
1638 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_2 = c7x::uchar_vec(0);
1639 
1642 
1643 
1644 #else // target (not host emulation) build
1645 
1646 // permutation register values scale and shift
1647 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_1 = (const c7x::uchar_vec)(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1648 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_2 = (const c7x::uchar_vec)(0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1649 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_3 = (const c7x::uchar_vec)(0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1650 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_4 = (const c7x::uchar_vec)(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1651 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_5 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1652 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_6 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1653 const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_7 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1654 
1655 // permutation register values for 32-bit bias
1656 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_1 = (const c7x::uchar_vec)(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1657 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_1 = (const c7x::uchar_vec)(0);
1658 
1659 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_2 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1660 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_2 = (const c7x::uchar_vec)(0);
1661 
1662 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_3 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1663 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_3 = (const c7x::uchar_vec)(0);
1664 
1665 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_4 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1666 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_4 = (const c7x::uchar_vec)(0);
1667 
1668 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_5 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3);
1669 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_5 = (const c7x::uchar_vec)(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1670 
1671 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_6 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1672 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_6 = (const c7x::uchar_vec)(16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1673 
1674 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_7 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7);
1675 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_7 = (const c7x::uchar_vec)(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1676 
1677 // permutation register values for 64-bit bias
1678 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_1 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1679 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_1 = (const c7x::uchar_vec)(0);
1680 
1681 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_2 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1682 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_2 = (const c7x::uchar_vec)(0);
1683 
1684 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_3 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1685 const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_3 = (const c7x::uchar_vec)(16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1686 
1687 
1688 // permutation register values scale and shift, split groups case
1689 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_1 = (const c7x::uchar_vec)(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1690 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_2 = (const c7x::uchar_vec)(0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1691 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_3 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1692 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_4 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1693 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_5 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1694 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_6 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1695 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_7 = (const c7x::uchar_vec)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1696 
1697 
1698 // permutation register values for 32-bit bias, split groups case
1699 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_1 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1700 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_1 = (const c7x::uchar_vec)( 0);
1701 
1702 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_2 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1703 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_2 = (const c7x::uchar_vec)( 0);
1704 
1705 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_3 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1706 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_3 = (const c7x::uchar_vec)( 0);
1707 
1708 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_4 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
1709 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_4 = (const c7x::uchar_vec)( 0);
1710 
1711 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_5 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23);
1712 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_5 = (const c7x::uchar_vec)(24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1713 
1714 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_6 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1715 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_6 = (const c7x::uchar_vec)(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1716 
1717 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_7 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 0, 1, 2, 3, 4, 5, 6, 7);
1718 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_7 = (const c7x::uchar_vec)( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1719 
1720 // permutation register values for 64-bit bias, split groups case
1721 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_64bit_No_1 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1722 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_1 = (const c7x::uchar_vec)( 0);
1723 
1724 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_64bit_No_2 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
1725 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_2 = (const c7x::uchar_vec)( 0);
1726 
1727 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_64bit_No_3 = (const c7x::uchar_vec)( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1728 const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_3 = (const c7x::uchar_vec)(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1729 
1730 #endif // _HOST_BUILD
1731 
1732 
1733 
1734 
1735 /* ======================================================================== */
1736 /* End of file: FFTLIB_configurations.c */
1737 /* ======================================================================== */
#define FFTLIB_MMA_SIZE_16_BIT
type is 16-bit integers
#define FFTLIB_MMA_SIZE_8_BIT
MMA size as a function of precision.
#define FFTLIB_MMA_SIZE_32_BIT
type is 32-bit integers
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_3
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_3
const __HWA_OFFSET_REG offsetRegStruct_zeros
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_7
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_3
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_64bit_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_2
const __HWA_CONFIG_REG_v1 configRegisterStruct_i16u_i16s_o16u
const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_5
const __HWA_OFFSET_REG offsetRegStruct_diagonal_16bit
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_6
const __HWA_CONFIG_REG_v1 configRegisterStruct_i16s_i16s_o16u
const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_4
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_7
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_3
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_3
const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_6
const __HWA_CONFIG_REG_v1 configRegisterStruct_i8u_i8s_o8s
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_6
const __HWA_OFFSET_REG offsetRegStruct_diagonal_8bit
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_7
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_4
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_64bit_No_1
#define ASSIGN(param, value)
const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_7
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_7
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_6
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_3
const __HWA_CONFIG_REG_v1 configRegisterStruct_i32s_i32s_o32s
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_3
const __HWA_CONFIG_REG_v1 configRegisterStruct_i8s_i8s_o8u
const __HWA_CONFIG_REG_v1 configRegisterStruct_i16s_i16s_o16s
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_5
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_4
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_5
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_4
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_7
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_3
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_64bit_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_4
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_6
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_6
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_scale_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_4
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_32bit_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_3
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_64bit_No_2
const __HWA_OFFSET_REG offsetRegStruct_diagonal_32bit
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_scale_No_1
const __HWA_CONFIG_REG_v1 configRegisterStruct_i8u_i8s_o8u
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_5
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec1_32bit_No_5
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_32bit_No_1
const c7x::uchar_vec FFTLIB_vperm_convolve_col_biasVec0_32bit_No_5
const __HWA_CONFIG_REG_v1 configRegisterStruct_i16u_i16s_o16s
const __HWA_CONFIG_REG_v1 configRegisterStruct_i8s_i8s_o8s
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec1_64bit_No_2
const c7x::uchar_vec FFTLIB_vperm_convolve_col_splitGroups_biasVec0_64bit_No_3