54 #define SE_PARAM_BASE (0x0000)
55 #define SE_SE0_PARAM_OFFSET (SE_PARAM_BASE)
56 #define SE_SA0_PARAM_OFFSET (SE_SE0_PARAM_OFFSET + SE_PARAM_SIZE)
61 template <
typename dataType>
69 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
70 uint32_t widthIn = pKerPrivArgs->
widthIn;
71 uint32_t heightIn = pKerPrivArgs->
heightIn;
72 int32_t strideIn = bufParamsIn->
stride_y;
73 int32_t strideOut = bufParamsOut->
stride_y;
74 int32_t dataSize =
sizeof(dataType);
75 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
76 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
77 typedef typename c7x::make_full_vector<dataType>::type vec;
78 int32_t eleCount = c7x::element_count_of<vec>::value;
79 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
80 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
81 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
82 int32_t iter = (heightIn + (eleCount * 2) - 1) / (eleCount * 2);
84 se0Params.ICNT0 = widthIn;
85 se0Params.ICNT1 = (heightIn > (uint32_t) eleCount) ? eleCount : heightIn;
86 se0Params.DIM1 = strideIn / dataSize;
87 se0Params.ICNT2 = iter;
88 se0Params.DIM2 = (strideIn / dataSize) * eleCount * 2;
89 se0Params.DIMFMT = __SE_DIMFMT_3D;
90 se0Params.TRANSPOSE = __SE_TRANSPOSE_64BIT;
91 se0Params.ELETYPE = SE_ELETYPE;
92 se0Params.VECLEN = SE_VECLEN;
94 sa0Params.ICNT0 = eleCount * 2;
95 sa0Params.ICNT1 = widthIn;
96 sa0Params.DIM1 = strideOut / dataSize;
97 sa0Params.ICNT2 = iter;
98 sa0Params.DIM2 = eleCount * 2;
99 sa0Params.VECLEN = SA_VECLEN;
100 sa0Params.DIMFMT = __SA_DIMFMT_3D;
101 sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
102 sa0Params.DECDIM1_WIDTH = heightIn;
111 template <
typename dataType>
119 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
120 uint32_t widthIn = pKerPrivArgs->
widthIn;
121 uint32_t heightIn = pKerPrivArgs->
heightIn;
122 int32_t strideIn = bufParamsIn->
stride_y;
123 int32_t strideOut = bufParamsOut->
stride_y;
124 int32_t dataSize =
sizeof(dataType);
125 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
126 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
127 typedef typename c7x::make_full_vector<dataType>::type vec;
128 int32_t eleCount = c7x::element_count_of<vec>::value;
129 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
130 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
131 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
132 int32_t iter = (heightIn + (eleCount * 2) - 1) / (eleCount * 2);
134 se0Params.TRANSPOSE = __SE_TRANSPOSE_32BIT;
135 se0Params.ICNT0 = widthIn;
136 se0Params.ICNT1 = (heightIn > (uint32_t) eleCount) ? eleCount : heightIn;
137 se0Params.DIM1 = strideIn / dataSize;
138 se0Params.ICNT2 = iter;
139 se0Params.DIM2 = (strideIn / dataSize) * eleCount * 2;
140 se0Params.DIMFMT = __SE_DIMFMT_3D;
141 se0Params.ELETYPE = SE_ELETYPE;
142 se0Params.VECLEN = SE_VECLEN;
144 sa0Params.ICNT0 = eleCount * 2;
145 sa0Params.ICNT1 = widthIn;
146 sa0Params.DIM1 = strideOut / dataSize;
147 sa0Params.ICNT2 = iter;
148 sa0Params.DIM2 = eleCount * 2;
149 sa0Params.VECLEN = SA_VECLEN;
150 sa0Params.DIMFMT = __SA_DIMFMT_3D;
151 sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
152 sa0Params.DECDIM1_WIDTH = heightIn;
161 template <
typename dataType>
169 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
170 uint32_t widthIn = pKerPrivArgs->
widthIn;
171 uint32_t heightIn = pKerPrivArgs->
heightIn;
172 int32_t strideIn = bufParamsIn->
stride_y;
173 int32_t strideOut = bufParamsOut->
stride_y;
174 int32_t dataSize =
sizeof(dataType);
175 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
176 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
177 typedef typename c7x::make_full_vector<dataType>::type vec;
178 int32_t eleCount = c7x::element_count_of<vec>::value;
179 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
180 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
181 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
182 int32_t iter = (heightIn + eleCount - 1) / eleCount;
184 se0Params.TRANSPOSE = __SE_TRANSPOSE_32BIT;
185 se0Params.ICNT0 = widthIn;
186 se0Params.ICNT1 = (heightIn > (uint32_t) (eleCount / 2)) ? (eleCount / 2) : heightIn;
187 se0Params.DIM1 = strideIn / dataSize;
188 se0Params.ICNT2 = iter;
189 se0Params.DIM2 = (strideIn / dataSize) * eleCount;
190 se0Params.DIMFMT = __SE_DIMFMT_3D;
191 se0Params.ELETYPE = SE_ELETYPE;
192 se0Params.VECLEN = SE_VECLEN;
194 sa0Params.ICNT0 = eleCount;
195 sa0Params.ICNT1 = widthIn;
196 sa0Params.DIM1 = strideOut / dataSize;
197 sa0Params.ICNT2 = iter;
198 sa0Params.DIM2 = eleCount;
199 sa0Params.VECLEN = SA_VECLEN;
200 sa0Params.DIMFMT = __SA_DIMFMT_3D;
201 sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
202 sa0Params.DECDIM1_WIDTH = heightIn;
211 template <
typename dataType>
219 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
220 uint32_t widthIn = pKerPrivArgs->
widthIn;
221 uint32_t heightIn = pKerPrivArgs->
heightIn;
222 int32_t strideIn = bufParamsIn->
stride_y;
223 int32_t strideOut = bufParamsOut->
stride_y;
224 int32_t dataSize =
sizeof(dataType);
226 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
227 __SA_TEMPLATE_v1 sa0Params = __gen_SA_TEMPLATE_v1();
228 typedef typename c7x::make_full_vector<dataType>::type vec;
229 int32_t eleCount = c7x::element_count_of<vec>::value;
230 __SE_ELETYPE SE_ELETYPE = c7x::se_eletype<vec>::value;
231 __SE_VECLEN SE_VECLEN = c7x::se_veclen<vec>::value;
232 __SA_VECLEN SA_VECLEN = c7x::sa_veclen<vec>::value;
234 #if __C7X_VEC_SIZE_BITS__ == 512
235 int32_t outEleCount = eleCount / 2;
237 int32_t outEleCount = eleCount;
240 iter = (heightIn + outEleCount - 1) / outEleCount;
242 se0Params.TRANSPOSE = __SE_TRANSPOSE_32BIT;
243 se0Params.ICNT0 = widthIn;
244 se0Params.ICNT1 = 16;
245 se0Params.DIM1 = strideIn;
246 se0Params.ICNT2 = iter;
247 se0Params.DIM2 = strideIn * outEleCount;
248 se0Params.DIMFMT = __SE_DIMFMT_3D;
249 se0Params.ELETYPE = SE_ELETYPE;
250 se0Params.VECLEN = SE_VECLEN;
252 sa0Params.ICNT0 = outEleCount;
253 sa0Params.ICNT1 = widthIn;
254 sa0Params.DIM1 = strideOut / dataSize;
255 sa0Params.ICNT2 = iter;
256 sa0Params.DIM2 = outEleCount;
257 sa0Params.VECLEN = SA_VECLEN;
258 sa0Params.DIMFMT = __SA_DIMFMT_3D;
259 sa0Params.DECDIM1 = __SA_DECDIM_DIM2;
260 sa0Params.DECDIM1_WIDTH = heightIn;
277 matTrans_init_32bit<float>(handle, bufParamsIn, bufParamsOut);
292 matTrans_init_64bit<double>(handle, bufParamsIn, bufParamsOut);
307 matTrans_init_8bit<int8_t>(handle, bufParamsIn, bufParamsOut);
322 matTrans_init_8bit<uint8_t>(handle, bufParamsIn, bufParamsOut);
337 matTrans_init_16bit<int16_t>(handle, bufParamsIn, bufParamsOut);
352 matTrans_init_16bit<uint16_t>(handle, bufParamsIn, bufParamsOut);
367 matTrans_init_32bit<int32_t>(handle, bufParamsIn, bufParamsOut);
382 matTrans_init_32bit<uint32_t>(handle, bufParamsIn, bufParamsOut);
397 matTrans_init_64bit<int64_t>(handle, bufParamsIn, bufParamsOut);
412 matTrans_init_64bit<uint64_t>(handle, bufParamsIn, bufParamsOut);
422 template <
typename dataType>
428 uint32_t widthIn = pKerPrivArgs->
widthIn;
429 int32_t strideIn = pKerPrivArgs->
strideIn;
430 uint32_t dataSize =
sizeof(dataType);
431 __SE_TEMPLATE_v1 se0Params;
432 __SA_TEMPLATE_v1 sa0Params;
433 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
437 dataType *restrict pInLocal = (dataType *) pIn;
438 dataType *restrict pOutLocal = (dataType *) pOut;
439 typedef typename c7x::make_full_vector<dataType>::type vec;
440 int32_t eleCount = c7x::element_count_of<vec>::value;
441 int32_t loopCount = iter * widthIn;
443 DSPLIB_DEBUGPRINTFN(0,
"pInLocal: %p pOutLocal: %p loopCount: %d\n", pInLocal, pOutLocal, loopCount);
445 __SE0_OPEN(pInLocal, se0Params);
446 __SE1_OPEN(pInLocal + ((strideIn / dataSize) * eleCount), se0Params);
447 __SA0_OPEN(sa0Params);
449 for (int32_t i = 0; i < loopCount; i++) {
450 vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
451 vec loadVec2 = c7x::strm_eng<1, vec>::get_adv();
453 __vpred predTemp = c7x::strm_agen<0, vec>::get_vpred();
454 vec *pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
455 __vstore_pred(predTemp, pStoreVec, loadVec1);
457 predTemp = c7x::strm_agen<0, vec>::get_vpred();
458 pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
459 __vstore_pred(predTemp, pStoreVec, loadVec2);
468 template <
typename dataType>
474 uint32_t widthIn = pKerPrivArgs->
widthIn;
475 int32_t strideIn = pKerPrivArgs->
strideIn;
476 uint32_t dataSize =
sizeof(dataType);
477 __SE_TEMPLATE_v1 se0Params;
478 __SA_TEMPLATE_v1 sa0Params;
479 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
483 dataType *restrict pInLocal = (dataType *) pIn;
484 dataType *restrict pOutLocal = (dataType *) pOut;
485 typedef typename c7x::make_full_vector<dataType>::type vec;
486 int32_t eleCount = c7x::element_count_of<vec>::value;
487 int32_t loopCount = iter * widthIn / 2;
489 DSPLIB_DEBUGPRINTFN(0,
"pInLocal: %p pOutLocal: %p loopCount: %d\n", pInLocal, pOutLocal, loopCount);
491 __SE0_OPEN(pInLocal, se0Params);
492 __SA0_OPEN(sa0Params);
493 __SE1_OPEN(pInLocal + ((strideIn / dataSize) * (eleCount / 2)), se0Params);
495 for (
int i = 0; i < loopCount; i++) {
496 vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
497 vec loadVec2 = c7x::strm_eng<1, vec>::get_adv();
499 vec checkEven = __pack_consec_low(loadVec2, loadVec1);
500 __vpred predTemp = c7x::strm_agen<0, vec>::get_vpred();
501 vec *pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
502 __vstore_pred(predTemp, pStoreVec, checkEven);
504 vec checkOdd = __pack_consec_high(loadVec2, loadVec1);
505 predTemp = c7x::strm_agen<0, vec>::get_vpred();
506 pStoreVec = c7x::strm_agen<0, vec>::get_adv(pOutLocal);
507 __vstore_pred(predTemp, pStoreVec, checkOdd);
516 template <
typename dataType>
522 uint32_t widthIn = pKerPrivArgs->
widthIn;
523 int32_t strideIn = pKerPrivArgs->
strideIn;
524 uint32_t dataSize =
sizeof(dataType);
525 __SE_TEMPLATE_v1 se0Params;
526 __SA_TEMPLATE_v1 sa0Params;
527 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
531 dataType *restrict pInLocal = (dataType *) pIn;
532 dataType *restrict pOutLocal = (dataType *) pOut;
533 typedef typename c7x::make_full_vector<dataType>::type vec;
534 int32_t eleCount = c7x::element_count_of<vec>::value;
535 int32_t loopCount = iter * widthIn / 4;
537 DSPLIB_DEBUGPRINTFN(0,
"pInLocal: %p pOutLocal: %p loopCount: %d\n", pInLocal, pOutLocal, loopCount);
539 __SE0_OPEN(pInLocal, se0Params);
540 __SA0_OPEN(sa0Params);
541 #if (__C7X_VEC_SIZE_BITS__ == 256)
542 __SE1_OPEN(pInLocal + ((strideIn / dataSize) * (eleCount / 2)), se0Params);
543 for (
int i = 0; i < loopCount; i++) {
545 vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
546 vec loadVec2 = c7x::strm_eng<0, vec>::get_adv();
548 vec loadVec3 = c7x::strm_eng<1, vec>::get_adv();
549 vec loadVec4 = c7x::strm_eng<1, vec>::get_adv();
551 vec iEven1 = __pack_consec_low(loadVec2, loadVec1);
552 vec iOdd1 = __pack_consec_high(loadVec2, loadVec1);
553 vec iEven2 = __pack_consec_low(loadVec4, loadVec3);
554 vec iOdd2 = __pack_consec_high(loadVec4, loadVec3);
556 __vpred predTemp = c7x::strm_agen<0, vec>::get_vpred();
557 c7x::char_vec *pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
558 __vstore_pred_packl_2src(predTemp, pStoreVec, c7x::as_short_vec(iEven1), c7x::as_short_vec(iEven2));
560 predTemp = c7x::strm_agen<0, vec>::get_vpred();
561 pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
562 __vstore_pred_packl_2src(predTemp, pStoreVec, c7x::as_short_vec(iOdd1), c7x::as_short_vec(iOdd2));
564 predTemp = c7x::strm_agen<0, vec>::get_vpred();
565 pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
566 __vstore_pred_packh_2src(predTemp, pStoreVec, c7x::as_short_vec(iEven1), c7x::as_short_vec(iEven2));
568 predTemp = c7x::strm_agen<0, vec>::get_vpred();
569 pStoreVec = c7x::strm_agen<0, c7x::char_vec>::get_adv(pOutLocal);
570 __vstore_pred_packh_2src(predTemp, pStoreVec, c7x::as_short_vec(iOdd1), c7x::as_short_vec(iOdd2));
573 #elif (__C7X_VEC_SIZE_BITS__ == 512)
574 typedef typename c7x::char_hvec vecOut;
575 __SE1_OPEN(pInLocal + ((strideIn / dataSize) * eleCount / 4), se0Params);
576 for (
int i = 0; i < loopCount; i++) {
577 vec loadVec1 = c7x::strm_eng<0, vec>::get_adv();
578 vec loadVec2 = c7x::strm_eng<1, vec>::get_adv();
580 vec iEven = __pack_consec_low(loadVec2, loadVec1);
581 vec iOdd = __pack_consec_high(loadVec2, loadVec1);
583 __vpred predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
584 vecOut *pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
585 __vstore_pred_packl(predTemp, pStoreVec, c7x::as_short_vec(iEven));
587 predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
588 pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
589 __vstore_pred_packl(predTemp, pStoreVec, c7x::as_short_vec(iOdd));
591 predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
592 pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
593 __vstore_pred_packh(predTemp, pStoreVec, c7x::as_short_vec(iEven));
595 predTemp = c7x::strm_agen<0, c7x::short_vec>::get_vpred();
596 pStoreVec = c7x::strm_agen<0, vecOut>::get_adv(pOutLocal);
597 __vstore_pred_packh(predTemp, pStoreVec, c7x::as_short_vec(iOdd));
613 matTrans_compute_64_32bit<float>(handle, pIn, pOut);
625 matTrans_compute_64_32bit<double>(handle, pIn, pOut);
637 matTrans_compute_8bit<int8_t>(handle, pIn, pOut);
649 matTrans_compute_8bit<uint8_t>(handle, pIn, pOut);
661 matTrans_compute_16bit<int16_t>(handle, pIn, pOut);
673 matTrans_compute_16bit<uint16_t>(handle, pIn, pOut);
685 matTrans_compute_64_32bit<int32_t>(handle, pIn, pOut);
697 matTrans_compute_64_32bit<uint32_t>(handle, pIn, pOut);
709 matTrans_compute_64_32bit<int64_t>(handle, pIn, pOut);
721 matTrans_compute_64_32bit<uint64_t>(handle, pIn, pOut);
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int64_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< double >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define SE_SE0_PARAM_OFFSET
void matTrans_compute_64_32bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
void matTrans_init_8bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
void matTrans_init_64bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint64_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
void matTrans_compute_8bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
void matTrans_compute_16bit(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
void matTrans_init_16bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int32_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
void matTrans_init_32bit(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int8_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< int16_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< float >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int16_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int64_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< int32_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_init_ci< uint8_t >(DSPLIB_kernelHandle handle, const DSPLIB_bufParams2D_t *bufParamsIn, const DSPLIB_bufParams2D_t *bufParamsOut, const DSPLIB_matTransInitArgs *pKerInitArgs)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< float >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< uint64_t >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
#define SE_SA0_PARAM_OFFSET
DSPLIB_STATUS DSPLIB_matTrans_exec_ci< double >(DSPLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut)
Header file for kernel's internal use. For the kernel's interface, please see DSPLIB_mat_trans.
#define DSPLIB_DEBUGPRINTFN(N, fmt,...)
DSPLIB_STATUS_NAME
The enumeration of all status codes.
void * DSPLIB_kernelHandle
Handle type for DSPLIB operations.
A structure for a 2 dimensional buffer descriptor.
int32_t stride_y
Stride in Y dimension in bytes.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[DSPLIB_MAT_TRANS_IXX_IXX_OXX_PBLOCK_SIZE]
Buffer to save SE & SA configuration parameters
uint32_t heightIn
Height of input data matrix
int32_t strideIn
Stride between rows of input data matrix
uint32_t widthIn
Size of input buffer for different batches DSPLIB_matTrans_init that will be retrieved and used by DS...