37 #ifndef COMMON_FFTLIB_UTIL_SE0ALOADSE1BLOADCOMPUTECSA0CSTORE_H_
38 #define COMMON_FFTLIB_UTIL_SE0ALOADSE1BLOADCOMPUTECSA0CSTORE_H_ 1
72 _nassert(numRows > 0);
73 for(r = 0; r < numRows; r++){
74 __mma_vec valA = c7x::strm_eng<0, __mma_vec>::get_adv();
75 __mma_vec valB = c7x::strm_eng<1, __mma_vec>::get_adv();
76 __HWALDAB(valA, valB);
79 __HWAOPXFER(__MMA_A_LDA);
81 VB0 = __HWARCV((uint32_t)0);
85 __vpred tmp = c7x::strm_agen<0, __mma_vec>::get_vpred();
86 __mma_vec * VB1 = c7x::strm_agen<0, __mma_vec>::get_adv(CbackDestination);
88 __vstore_pred(tmp, VB1, VB0);
100 template <
int32_t UNROLL_TIMES>
106 FFTLIB_UNROLL(UNROLL_TIMES)
107 for(r = 0; r < numRows; r++){
108 __mma_vec valA = c7x::strm_eng<0, __mma_vec>::get_adv();
109 __mma_vec valB = c7x::strm_eng<1, __mma_vec>::get_adv();
110 __HWALDAB(valA, valB);
113 __HWAOPXFER(__MMA_A_LDA);
115 VB0 = __HWARCV((uint32_t)0);
119 __vpred tmp = c7x::strm_agen<0, __mma_vec>::get_vpred();
120 __mma_vec * VB1 = c7x::strm_agen<0, __mma_vec>::get_adv(CbackDestination);
122 __vstore_pred(tmp, VB1, VB0);
static void FFTLIB_UTIL_SE0AloadSE1BloadComputeCSA0Cstore(int32_t numRows, uint8_t *CbackDestination)
static void FFTLIB_UTIL_SE0AloadSE1BloadComputeCSA0Cstore_unroll(int32_t numRows, uint8_t *CbackDestination)