40 #include "../common/c71/DSPLIB_inlines.h"
50 #define SE_PARAM_BASE (0x0000)
51 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
52 #define SE_SE1_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_BASE)
53 #define SE_SA0_PARAM_OFFSET (SE_SE1_PARAM_OFFSET + SE_PARAM_SIZE)
54 #define SE_SA1_PARAM_OFFSET (SE_SA0_PARAM_OFFSET + SE_PARAM_SIZE)
56 #define DSPLIB_MATMUL_UNROLL_FACTOR (16)
57 #define DSPLIB_MATMUL_SE_UNROLL_FACTOR (8)
59 template <
typename dataType>
67 __SE_TEMPLATE_v1 se0Params;
68 __SA_TEMPLATE_v1 sa0Params;
69 __SA_TEMPLATE_v1 sa1Params;
71 __SE_ELETYPE SE_ELETYPE;
72 __SE_VECLEN SE_VECLEN;
73 __SA_VECLEN SA_VECLEN;
77 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
79 int32_t M = pKerPrivArgs->
M;
80 int32_t K = pKerPrivArgs->
K;
81 int32_t N = pKerPrivArgs->
N;
86 typedef typename c7x::make_full_vector<dataType>::type vec;
88 int32_t elementCount = c7x::element_count_of<vec>::value;
89 SE_VECLEN = c7x::se_veclen<vec>::value;
90 SA_VECLEN = c7x::sa_veclen<vec>::value;
91 SE_ELETYPE = c7x::se_eletype<vec>::value;
94 pKerPrivArgs->
NBlocks = NBlocks;
100 sa0Params = __gen_SA_TEMPLATE_v1();
101 sa0Params.VECLEN = SA_VECLEN;
102 sa0Params.DIMFMT = __SA_DIMFMT_4D;
107 sa0Params.ICNT2 = NBlocks;
110 sa0Params.DIM3 = strideIn0;
116 se0Params = __gen_SE_TEMPLATE_v1();
117 se0Params.ELETYPE = SE_ELETYPE;
118 se0Params.VECLEN = SE_VECLEN;
119 se0Params.DIMFMT = __SE_DIMFMT_5D;
121 se0Params.ICNT0 = elementCount;
123 se0Params.DIM1 = (int32_t) ((uint32_t) elementCount << (uint32_t) 1);
125 se0Params.DIM2 = strideIn1;
126 se0Params.ICNT3 = NBlocks;
135 sa1Params = __gen_SA_TEMPLATE_v1();
136 sa1Params.VECLEN = SA_VECLEN;
137 sa1Params.DIMFMT = __SA_DIMFMT_4D;
139 sa1Params.ICNT0 = elementCount;
141 sa1Params.DIM1 = elementCount;
142 sa1Params.ICNT2 = NBlocks;
145 sa1Params.DIM3 = strideOut;
165 template <
typename T,
typename vec>
static inline void writeOutSA1(__vpred tmp, vec *addr, T pOut, vec out)
168 DSPLIB_debugPrintVector(out);
169 tmp = c7x::strm_agen<1, vec>::get_vpred();
170 addr = c7x::strm_agen<1, vec>::get_adv(pOut);
171 __vstore_pred(tmp, addr, out);
174 template <
typename dataType>
182 int32_t M = pKerPrivArgs->
M;
183 int32_t K = pKerPrivArgs->
K;
184 int32_t NBlocks = pKerPrivArgs->
NBlocks;
186 __SE_TEMPLATE_v1 se0Params;
187 __SE_TEMPLATE_v1 se1Params;
188 __SA_TEMPLATE_v1 sa0Params;
189 __SA_TEMPLATE_v1 sa1Params;
191 #if DSPLIB_DEBUGPRINT
192 printf(
"Enter DSPLIB_matMul_exec_ci\n");
195 typedef typename c7x::make_full_vector<dataType>::type vec;
197 int32_t elementCount = c7x::element_count_of<vec>::value;
199 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
208 __SE0_OPEN(pIn1, se0Params);
209 __SE1_OPEN(((dataType *) pIn1 + elementCount), se1Params);
212 __SA0_OPEN(sa0Params);
213 __SA1_OPEN(sa1Params);
218 vec r00, r01, r03, r02, r04, r05, r06, r07;
219 vec r08, r09, r0a, r0b, r0c, r0d, r0e, r0f;
225 for (int32_t n = 0; n < M * NBlocks; n++) {
245 for (int32_t k = 0; k < K; k++) {
247 dataType *addrA = (c7x::strm_agen<0, dataType>::get_adv(pIn0));
248 a = __vload_dup(addrA);
251 DSPLIB_debugPrintVector(a);
253 b = c7x::strm_eng<0, vec>::get_adv();
256 b = c7x::strm_eng<1, vec>::get_adv();
259 b = c7x::strm_eng<0, vec>::get_adv();
261 DSPLIB_debugPrintVector(b);
264 DSPLIB_debugPrintVector(r02);
266 b = c7x::strm_eng<1, vec>::get_adv();
269 b = c7x::strm_eng<0, vec>::get_adv();
272 b = c7x::strm_eng<1, vec>::get_adv();
275 b = c7x::strm_eng<0, vec>::get_adv();
278 b = c7x::strm_eng<1, vec>::get_adv();
281 b = c7x::strm_eng<0, vec>::get_adv();
284 b = c7x::strm_eng<1, vec>::get_adv();
287 b = c7x::strm_eng<0, vec>::get_adv();
290 b = c7x::strm_eng<1, vec>::get_adv();
293 b = c7x::strm_eng<0, vec>::get_adv();
296 b = c7x::strm_eng<1, vec>::get_adv();
299 b = c7x::strm_eng<0, vec>::get_adv();
302 b = c7x::strm_eng<1, vec>::get_adv();
334 void *restrict pOut);
338 void *restrict pOut);
#define DSPLIB_MATMUL_SE_UNROLL_FACTOR
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn0, void *restrict pIn1, void *restrict pOut)
#define SE_SE0_PARAM_OFFSET
#define SE_SA1_PARAM_OFFSET
static void writeOutSA1(__vpred tmp, vec *addr, T pOut, vec out)
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn0, void *restrict pIn1, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matMul_N_unroll_init_ci(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn0, const DSPLIB_bufParams2D_t *bufParamsIn1, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matMul_InitArgs *pKerInitArgs)
#define SE_SE1_PARAM_OFFSET
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn0, const DSPLIB_bufParams2D_t *bufParamsIn1, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matMul_InitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matMul_N_unroll_exec_ci(DSPLIB_kernelHandle handle, void *restrict pIn0, void *restrict pIn1, void *restrict pOut)
#define DSPLIB_MATMUL_UNROLL_FACTOR
template DSPLIB_STATUS DSPLIB_matMul_N_unroll_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn0, const DSPLIB_bufParams2D_t *bufParamsIn1, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matMul_InitArgs *pKerInitArgs)
#define SE_SA0_PARAM_OFFSET
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_matMul.
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_MATMUL_IXX_IXX_OXX_PBLOCK_SIZE]
int32_t strideIn1Elements
int32_t strideIn0Elements
int32_t strideOutElements