48 #define SE_PARAM_BASE (0x0000)
49 #define SE0_PARAM_OFFSET (SE_PARAM_BASE)
52 static inline double sqrtdp(
double a)
63 p1 = oneP5 - d0 * p0 * half;
77 template <u
int32_t dTypeIn, u
int32_t dTypeOut>
85 __SE_TEMPLATE_v1 se0Params = __gen_SE_TEMPLATE_v1();
91 size_t width = pKerPrivArgs->
width;
92 size_t height = pKerPrivArgs->
height;
97 elemCount = c7x::element_count_of<c7x::uchar_qvec>::value;
100 elemCount = c7x::element_count_of<c7x::ushort_qvec>::value;
103 size_t wBlocks = VXLIB_ceilingDiv(width, elemCount);
104 size_t numBlocks = height * wBlocks;
107 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
109 if (strideIn == width) {
111 se0Params.DIMFMT = __SE_DIMFMT_1D;
112 se0Params.ICNT0 = width * height;
117 se0Params.DIMFMT = __SE_DIMFMT_3D;
118 se0Params.ICNT0 = elemCount;
120 se0Params.DIM1 = strideIn;
121 se0Params.ICNT1 = height;
123 se0Params.DIM2 = elemCount;
124 se0Params.ICNT2 = VXLIB_ceilingDiv(width, elemCount);
126 se0Params.DECDIM1 = __SE_DECDIM_DIM2;
127 se0Params.DECDIM1_WIDTH = width;
128 se0Params.DECDIM1SD = __SE_DECDIMSD_DIM0;
131 se0Params.PROMOTE = __SE_PROMOTE_4X_ZEROEXT;
134 se0Params.ELETYPE = c7x::se_eletype<c7x::uchar_vec>::value;
135 se0Params.VECLEN = c7x::se_veclen<c7x::uchar_qvec>::value;
139 se0Params.ELETYPE = c7x::se_eletype<c7x::ushort_vec>::value;
140 se0Params.VECLEN = c7x::se_veclen<c7x::ushort_qvec>::value;
175 template <
typename dTypeIn,
typename dTypeOut>
178 void *restrict pOut0,
179 void *restrict pOut1,
180 void *restrict pPixelsProcessed,
181 void *restrict pCurrentSum,
182 void *restrict pCurrentSqSum)
192 __SE_TEMPLATE_v1 se0Params;
195 typedef typename std::conditional<std::is_same<dTypeIn, uint8_t>::value, uint32_t, uint64_t>::type dTypeAcc;
197 typedef typename std::conditional<std::is_same<dTypeIn, uint8_t>::value, c7x::uint_vec, c7x::ulong_vec>::type
201 dTypeIn *restrict pInLocal = (dTypeIn *) pIn;
202 dTypeOut *restrict pOut0Local = (dTypeOut *) pOut0;
203 dTypeOut *restrict pOut1Local = (dTypeOut *) pOut1;
205 uint32_t *restrict pPixelsProcessedLocal = (uint32_t *) pPixelsProcessed;
206 dTypeAcc *restrict pCurrentSumLocal = (dTypeAcc *) pCurrentSum;
207 dTypeAcc *restrict pCurrentSqSumLocal = (dTypeAcc *) pCurrentSqSum;
210 printf(
"Enter VXLIB_meanStdDev_exec_ci\n");
214 uint8_t *pBlock = pKerPrivArgs->
bufPblock;
215 size_t numBlocks = pKerPrivArgs->
numBlocks;
216 size_t elemCount = 0;
219 elemCount = c7x::element_count_of<c7x::uchar_qvec>::value;
222 elemCount = c7x::element_count_of<c7x::ushort_qvec>::value;
225 size_t width = pKerPrivArgs->
width;
226 size_t height = pKerPrivArgs->
height;
229 if (stride == width){
230 numBlocks = VXLIB_ceilingDiv((height * width), elemCount);
237 __SE0_OPEN(pInLocal, se0Params);
241 dTypeAccVec acc, acc_sq;
243 acc = (dTypeAccVec) (0);
244 acc_sq = (dTypeAccVec) (0);
246 uint32_t N = width * height;
248 for (int32_t counter = 0; counter < (int32_t)numBlocks; counter++) {
251 a = c7x::strm_eng<0, dTypeAccVec>::get_adv();
261 dTypeAcc sum = __horizontal_add(acc);
264 dTypeAcc sum_sq = __horizontal_add(acc_sq);
267 *pPixelsProcessedLocal += N;
270 *pCurrentSumLocal += sum;
273 *pCurrentSqSumLocal += sum_sq;
277 double sum_f = (double) *pCurrentSumLocal;
278 double sum_sq_f = (double) *pCurrentSqSumLocal;
280 *pOut0Local = (dTypeOut) (sum_f / (*pPixelsProcessedLocal));
283 double variance = (sum_sq_f - ((sum_f * sum_f) / (*pPixelsProcessedLocal))) / (*pPixelsProcessedLocal);
285 *pOut1Local = (dTypeOut)
sqrtdp(variance);
301 void *restrict pOut0,
302 void *restrict pOut1,
303 void *restrict pPixelsProcessed,
304 void *restrict pCurrentSum,
305 void *restrict pCurrentSqSum);
309 void *restrict pOut0,
310 void *restrict pOut1,
311 void *restrict pPixelsProcessed,
312 void *restrict pCurrentSum,
313 void *restrict pCurrentSqSum);
template VXLIB_STATUS VXLIB_meanStdDev_init_ci< VXLIB_MEANSTDDEV_DTYPE_I8U_O32F >(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_meanStdDev_InitArgs *pKerInitArgs)
static double sqrtdp(double a)
template VXLIB_STATUS VXLIB_meanStdDev_exec_ci< VXLIB_MEANSTDDEV_TYPENAME_I8U_O32F >(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut0, void *restrict pOut1, void *restrict pPixelsProcessed, void *restrict pCurrentSum, void *restrict pCurrentSqSum)
VXLIB_STATUS VXLIB_meanStdDev_exec_ci(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut0, void *restrict pOut1, void *restrict pPixelsProcessed, void *restrict pCurrentSum, void *restrict pCurrentSqSum)
This function is the main execution function for the C7x implementation of the kernel....
template VXLIB_STATUS VXLIB_meanStdDev_exec_ci< VXLIB_MEANSTDDEV_TYPENAME_I16U_O32F >(VXLIB_kernelHandle handle, void *restrict pIn, void *restrict pOut0, void *restrict pOut1, void *restrict pPixelsProcessed, void *restrict pCurrentSum, void *restrict pCurrentSqSum)
VXLIB_STATUS VXLIB_meanStdDev_init_ci(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_meanStdDev_InitArgs *pKerInitArgs)
This function is the initialization function for the C7x implementation of the kernel....
template VXLIB_STATUS VXLIB_meanStdDev_init_ci< VXLIB_MEANSTDDEV_DTYPE_I16U_O32F >(VXLIB_kernelHandle handle, const VXLIB_bufParams2D_t *bufParamsIn, const VXLIB_meanStdDev_InitArgs *pKerInitArgs)
Header file for kernel's internal use. For the kernel's interface, please see VXLIB_meanStdDev.
#define VXLIB_MEANSTDDEV_I8U_O32F_TEMPLATE(dTypeIn)
void * VXLIB_kernelHandle
Handle type for VXLIB operations.
VXLIB_STATUS_NAME
The enumeration of all status codes.
A structure for a 2 dimensional buffer descriptor.
Structure containing the parameters to initialize the kernel.
Structure that is reserved for internal use by the kernel.
uint8_t bufPblock[VXLIB_MEANSTDDEV_IXX_IXX_OXX_PBLOCK_SIZE]
Array to hold SE/SA params.
size_t height
Height of image
size_t strideInElements
Stride of input in elements.
size_t width
Width of image
size_t numBlocks
Number of blocks to be processed after simidfication.