119 #include "../../../common/xdais_types.h" 120 #include "sTIDL_IOBufDesc.h" 121 #include "tivx_tidl_utils.h" 123 #include "itidl_ti.h" 124 #include "vx_tutorial_tidl.h" 125 #include "test_engine/test_utils.h" 127 #define MAX(_a,_b) (((_a) > (_b)) ? (_a) : (_b)) 129 #define CFG_FILE_NAME "tivx/tidl/tidl_infer.cfg" 131 #define NUM_EVE_CPU (obj->num_eve_cores) 132 #define NUM_DSP_CPU 2 133 #define MAX_NUM_THREADS 4 136 char tidl_params_file_path[VX_TUTORIAL_MAX_FILE_PATH];
137 char tidl_network_file_path[VX_TUTORIAL_MAX_FILE_PATH];
138 char input_file_path[VX_TUTORIAL_MAX_FILE_PATH];
139 char output_file_path[VX_TUTORIAL_MAX_FILE_PATH];
140 uint32_t operation_mode;
141 uint32_t processing_core_mode;
142 uint32_t num_eve_cores;
143 } VxTutorialTidl_CfgObj;
145 VxTutorialTidl_CfgObj gCfgObj;
147 static vx_status parse_cfg_file(VxTutorialTidl_CfgObj *obj,
char *cfg_file_name);
153 #ifdef HOST_EMULATION 161 void vx_tutorial_tidl()
174 vx_tensor input_tensors[MAX_NUM_THREADS][VX_TUTORIAL_MAX_TENSORS];
175 vx_tensor output_tensors1[MAX_NUM_THREADS][VX_TUTORIAL_MAX_TENSORS];
176 vx_tensor output_tensors2[MAX_NUM_THREADS][VX_TUTORIAL_MAX_TENSORS];
178 vx_perf_t perf_graph, perf_node1, perf_node2;
179 int32_t i, threadIdx;
180 int32_t quantHistoryBoot, quantHistory, quantMargin;
184 char filePath[MAXPATHLENGTH];
185 const char *targetCore1[MAX_NUM_THREADS];
186 const char *targetCore2[MAX_NUM_THREADS];
187 vx_enum targetCpuId1[MAX_NUM_THREADS];
188 vx_enum targetCpuId2[MAX_NUM_THREADS];
192 VxTutorialTidl_CfgObj *obj = &gCfgObj;
194 vx_graph graph[MAX_NUM_THREADS] = {0};
195 vx_node node1[MAX_NUM_THREADS] = {0};
196 vx_node node2[MAX_NUM_THREADS] = {0};
200 uint32_t num_input_tensors = 0;
201 uint32_t num_output_tensors1 = 0;
202 uint32_t num_output_tensors2 = 0;
204 uint32_t maxNumThreads= 1;
206 printf(
" vx_tutorial_tidl: Tutorial Started !!! \n");
209 VX_TUTORIAL_ASSERT_VALID_REF(context);
213 sizeFilePath = snprintf(filePath, MAXPATHLENGTH,
"%s/%s", ct_get_test_file_path(), CFG_FILE_NAME);
215 if (sizeFilePath > MAXPATHLENGTH) {
216 printf(
"Error: path of config gile too long to fit in string\n");
220 printf(
" Reading config file %s ...\n", filePath);
222 status= parse_cfg_file(obj, filePath);
228 printf(
" Reading network file %s ...\n", obj->tidl_network_file_path);
230 network = vx_tidl_utils_readNetwork(context, &obj->tidl_network_file_path[0]);
231 VX_TUTORIAL_ASSERT_VALID_REF(network)
240 #ifdef HOST_EMULATION 246 if (obj->processing_core_mode== 2) {
247 obj->processing_core_mode= 1;
251 if (obj->processing_core_mode== 0){
258 int32_t numLayersGroup= vx_tidl_utils_countLayersGroup(network, layersGroupCount);
260 if (numLayersGroup== 1) {
261 if (layersGroupCount[1]!=0) {
263 maxNumThreads= NUM_EVE_CPU;
264 targetCore1[0]= TIVX_TARGET_EVE1;targetCore1[1]= TIVX_TARGET_EVE2;targetCore1[2]= TIVX_TARGET_EVE3;targetCore1[3]= TIVX_TARGET_EVE4;
267 else if (layersGroupCount[2]!=0) {
269 maxNumThreads= NUM_DSP_CPU;
274 printf(
" Invalid layer group ID detected, exiting ...\n");
277 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
278 targetCore2[threadIdx]= NULL;
282 else if (numLayersGroup== 2) {
283 maxNumThreads= NUM_EVE_CPU;
284 targetCore1[0]= TIVX_TARGET_EVE1;targetCore1[1]= TIVX_TARGET_EVE2;targetCore1[2]= TIVX_TARGET_EVE3;targetCore1[3]= TIVX_TARGET_EVE4;
290 printf(
" Invalid number of groups of layers, exiting ...\n");
295 else if (obj->processing_core_mode== 1) {
296 maxNumThreads= NUM_EVE_CPU;
297 targetCore1[0]= TIVX_TARGET_EVE1;targetCore1[1]= TIVX_TARGET_EVE2;targetCore1[2]= TIVX_TARGET_EVE3;targetCore1[3]= TIVX_TARGET_EVE4;
299 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
300 targetCore2[threadIdx]= NULL;
304 else if (obj->processing_core_mode== 2) {
305 maxNumThreads= NUM_DSP_CPU;
308 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
309 targetCore2[threadIdx]= NULL;
314 printf(
"Invalid processing core mode, exiting ...\n");
319 if (obj->processing_core_mode!= 0) {
320 vx_tidl_utils_updateLayersGroup(network, targetCpuId1[0]);
323 config1 = vx_tidl_utils_getConfig(context, network, &num_input_tensors, &num_output_tensors1, targetCpuId1[0]);
327 num_output_tensors2= 0;
331 int32_t num_interm_tensors= num_output_tensors1;
333 config2 = vx_tidl_utils_getConfig(context, network, &num_output_tensors1, &num_output_tensors2, targetCpuId2[0]);
335 if (num_interm_tensors != num_output_tensors1) {
336 printf(
"Number of output tensors from first group of layers not equal to the number of input tensors from second group of layers. Exiting ...\n");
341 printf(
" Reading network params file %s ...\n", obj->tidl_params_file_path);
343 status= vx_tidl_utils_readParams(network, &obj->tidl_params_file_path[0]);
347 VX_TUTORIAL_ASSERT_VALID_REF(kernel1)
351 VX_TUTORIAL_ASSERT_VALID_REF(kernel2)
355 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
357 printf(
"\nThread #%d: Create graph ... \n", threadIdx+1);
361 VX_TUTORIAL_ASSERT_VALID_REF(graph[threadIdx])
363 printf(
"Thread #%d: Create input and output tensors for node 1 ... \n", threadIdx+1);
366 status= createInputTensors(context, config1, &input_tensors[threadIdx][0]);
370 status= createOutputTensor(context, config1, &output_tensors1[threadIdx][0]);
383 quantHistoryBoot= 20;
387 printf(
"Thread #%d: Create node 1 ... \n", threadIdx+1);
389 createParams1[threadIdx]= vx_tidl_utils_setCreateParams(context, quantHistoryBoot, quantHistory, quantMargin);
390 inArgs1[threadIdx]= vx_tidl_utils_setInArgs(context);
391 outArgs1[threadIdx]= vx_tidl_utils_setOutArgs(context);
399 node1[threadIdx] =
tivxTIDLNode(graph[threadIdx], kernel1,
401 &input_tensors[threadIdx][0],
402 &output_tensors1[threadIdx][0]
404 VX_TUTORIAL_ASSERT_VALID_REF(node1[threadIdx])
408 #ifdef HOST_EMULATION 411 gTidlNodeCpuId[2*threadIdx]= targetCpuId1[threadIdx];
415 printf(
"Thread #%d: Create output tensors for node 2 ... \n", threadIdx+1);
418 status= createOutputTensor(context, config2, &output_tensors2[threadIdx][0]);
421 printf(
"Thread #%d: Create node 2 ... \n", threadIdx+1);
423 createParams2[threadIdx]= vx_tidl_utils_setCreateParams(context, quantHistoryBoot, quantHistory, quantMargin);
424 inArgs2[threadIdx]= vx_tidl_utils_setInArgs(context);
425 outArgs2[threadIdx]= vx_tidl_utils_setOutArgs(context);
433 node2[threadIdx] =
tivxTIDLNode(graph[threadIdx], kernel2,
435 &output_tensors1[threadIdx][0],
436 &output_tensors2[threadIdx][0]
438 VX_TUTORIAL_ASSERT_VALID_REF(node2[threadIdx])
442 #ifdef HOST_EMULATION 445 gTidlNodeCpuId[2*threadIdx+1]= targetCpuId2[threadIdx];
453 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
454 printf(
"Thread #%d: Verify graph ... \n", threadIdx+1);
465 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
468 status= readInput(context, config1, &input_tensors[threadIdx][0], &obj->input_file_path[0], obj->operation_mode);
481 #ifdef SEQUENTIAL_SCHEDULE 482 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
483 printf(
"Thread #%d: Execute graph ... \n",threadIdx + 1);
489 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
490 printf(
"Thread #%d: Start graph ... \n",threadIdx + 1);
499 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
500 printf(
"Thread #%d: Wait for graph ... \n",threadIdx + 1);
509 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
511 printf(
"\nThread #%d: Results\n", threadIdx+1);
512 printf(
"---------------------\n");
516 real_output_tensors= &output_tensors1[threadIdx][0];
520 real_output_tensors= &output_tensors2[threadIdx][0];
524 displayOutput(NULL, (
vx_df_image)NULL, NULL, 0, 0, 0, realConfig, real_output_tensors, &obj->output_file_path[0], obj->operation_mode);
529 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
531 printf(
"\n---- Thread #%d: Node 1 (%s) Execution time: %4.6f ms\n", threadIdx+1, targetCore1[threadIdx], perf_node1.
min/1000000.0);
533 if(node2[threadIdx] != 0) {
535 printf(
"---- Thread #%d: Node 2 (%s) Execution time: %4.6f ms\n", threadIdx+1, targetCore2[threadIdx], perf_node2.
min/1000000.0);
539 printf(
"---- Thread #%d: Total Graph Execution time: %4.6f ms\n", threadIdx + 1, perf_graph.
min/1000000.0);
542 #ifdef SEQUENTIAL_SCHEDULE 543 printf(
"\nExecution time of all the threads running sequentially: %4.6f ms\n", exe_time/1000.0);
545 printf(
"\nExecution time of all the threads running in parallel: %4.6f ms\n", exe_time/1000.0);
548 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
561 if (node2[threadIdx] !=0 ){
591 for (threadIdx= 0; threadIdx < maxNumThreads; threadIdx++) {
597 for (i= 0; i < num_input_tensors; i++) {
603 for (i= 0; i < num_output_tensors1; i++) {
609 for (i= 0; i < num_output_tensors2; i++) {
627 printf(
"\n vx_tutorial_tidl: Tutorial Done !!! \n");
636 static vx_status parse_cfg_file(VxTutorialTidl_CfgObj *obj,
char *cfg_file_name)
638 FILE *fp = fopen(cfg_file_name,
"r");
642 char filePath[MAXPATHLENGTH];
646 obj->processing_core_mode= 0;
647 obj->num_eve_cores= 1;
651 printf(
"# ERROR: Unable to open config file [%s]\n", cfg_file_name);
652 #ifdef HOST_EMULATION 653 printf(
"# ERROR: Please make sure that the environment variable VX_TEST_DATA_PATH is set to .../conformance_tests/test_data\n");
659 while(fgets(line_str,
sizeof(line_str), fp)!=NULL)
663 if (strchr(line_str,
'#'))
669 token = strtok(line_str, s);
671 if(strcmp(token,
"tidl_params_file_path")==0)
673 token = strtok(NULL, s);
674 token[strlen(token)-1]=0;
675 sizeFilePath = snprintf(filePath, MAXPATHLENGTH,
"%s/%s", ct_get_test_file_path(), token);
676 if (sizeFilePath > MAXPATHLENGTH) {
677 printf(
"Error in parse_cfg_file, path too long to fit in string\n");
680 strcpy(obj->tidl_params_file_path, filePath);
684 if(strcmp(token,
"tidl_network_file_path")==0)
686 token = strtok(NULL, s);
687 token[strlen(token)-1]=0;
688 sizeFilePath = snprintf(filePath, MAXPATHLENGTH,
"%s/%s", ct_get_test_file_path(), token);
689 if (sizeFilePath > MAXPATHLENGTH) {
690 printf(
"Error in parse_cfg_file, path too long to fit in string\n");
693 strcpy(obj->tidl_network_file_path, filePath);
697 if(strcmp(token,
"input_file_path")==0)
699 token = strtok(NULL, s);
700 token[strlen(token)-1]=0;
701 sizeFilePath = snprintf(filePath, MAXPATHLENGTH,
"%s/%s", ct_get_test_file_path(), token);
702 if (sizeFilePath > MAXPATHLENGTH) {
703 printf(
"Error in parse_cfg_file, path too long to fit in string\n");
706 strcpy(obj->input_file_path, filePath);
710 if(strcmp(token,
"output_file_path")==0)
712 token = strtok(NULL, s);
713 token[strlen(token)-1]=0;
714 sizeFilePath = snprintf(filePath, MAXPATHLENGTH,
"%s/%s", ct_get_test_file_path(), token);
715 if (sizeFilePath > MAXPATHLENGTH) {
716 printf(
"Error in parse_cfg_file, path too long to fit in string\n");
719 strcpy(obj->output_file_path, filePath);
723 if(strcmp(token,
"operation_mode")==0)
725 token = strtok(NULL, s);
726 obj->operation_mode = atoi(token);
729 if(strcmp(token,
"processing_core_mode")==0)
731 token = strtok(NULL, s);
732 obj->processing_core_mode = atoi(token);
735 if(strcmp(token,
"num_eve_cores")==0)
737 token = strtok(NULL, s);
738 obj->num_eve_cores = atoi(token);
750 vx_size input_sizes[VX_TUTORIAL_MAX_TENSOR_DIMS];
752 sTIDL_IOBufDesc_t *ioBufDesc;
759 if (ioBufDesc->numInputBuf < VX_TUTORIAL_MAX_TENSORS) {
761 for(
id = 0;
id < ioBufDesc->numInputBuf;
id++) {
762 input_sizes[0] = ioBufDesc->inWidth[id] + ioBufDesc->inPadL[id] + ioBufDesc->inPadR[id];
763 input_sizes[1] = ioBufDesc->inHeight[id] + ioBufDesc->inPadT[id] + ioBufDesc->inPadB[id];
764 input_sizes[2] = ioBufDesc->inNumChannels[id];
781 vx_size output_sizes[VX_TUTORIAL_MAX_TENSOR_DIMS];
784 sTIDL_IOBufDesc_t *ioBufDesc;
790 if (ioBufDesc->numOutputBuf < VX_TUTORIAL_MAX_TENSORS) {
792 for(
id = 0;
id < ioBufDesc->numOutputBuf;
id++) {
793 output_sizes[0] = ioBufDesc->outWidth[id] + ioBufDesc->outPadL[id] + ioBufDesc->outPadR[id];
794 output_sizes[1] = ioBufDesc->outHeight[id] + ioBufDesc->outPadT[id] + ioBufDesc->outPadB[id];
795 output_sizes[2] = ioBufDesc->outNumChannels[id];
810 static vx_status readDataS8(FILE *fp, int8_t *ptr, int32_t n,
811 int32_t width, int32_t height, int32_t pitch,
818 for(i0 = 0; i0 < n; i0++)
820 for(i1 = 0; i1 < height; i1++)
822 readSize= fread(&ptr[i0*chOffset + i1*pitch], 1, width, fp);
823 if (readSize != width) {
840 int8_t *input_buffer = NULL;
846 vx_size start[VX_TUTORIAL_MAX_TENSOR_DIMS];
847 vx_size input_strides[VX_TUTORIAL_MAX_TENSOR_DIMS];
848 vx_size input_sizes[VX_TUTORIAL_MAX_TENSOR_DIMS];
850 sTIDL_IOBufDesc_t *ioBufDesc;
854 fp= fopen(input_file,
"rb");
858 printf(
"# ERROR: Unable to open input file [%s]\n", input_file);
865 for(
id = 0;
id < ioBufDesc->numInputBuf;
id++)
867 input_sizes[0] = ioBufDesc->inWidth[id] + ioBufDesc->inPadL[id] + ioBufDesc->inPadR[id];
868 input_sizes[1] = ioBufDesc->inHeight[id] + ioBufDesc->inPadT[id] + ioBufDesc->inPadB[id];
869 input_sizes[2] = ioBufDesc->inNumChannels[id];
871 start[0] = start[1] = start[2] = 0;
873 input_strides[0] = 1;
874 input_strides[1] = input_sizes[0];
875 input_strides[2] = input_sizes[1] * input_strides[1];
883 &input_buffer[(ioBufDesc->inPadT[
id] * input_strides[1]) + ioBufDesc->inPadL[
id]],
884 ioBufDesc->inNumChannels[
id],
885 ioBufDesc->inWidth[
id],
886 ioBufDesc->inHeight[
id],
910 vx_size output_sizes[VX_TUTORIAL_MAX_TENSOR_DIMS];
916 sTIDL_IOBufDesc_t *ioBufDesc;
921 for(
id = 0;
id < 1;
id++)
923 output_sizes[0] = ioBufDesc->outWidth[id] + ioBufDesc->outPadL[id] + ioBufDesc->outPadR[id];
924 output_sizes[1] = ioBufDesc->outHeight[id] + ioBufDesc->outPadT[id] + ioBufDesc->outPadB[id];
925 output_sizes[2] = ioBufDesc->outNumChannels[id];
935 vx_size output_strides[VX_TUTORIAL_MAX_TENSOR_DIMS];
936 vx_size start[VX_TUTORIAL_MAX_TENSOR_DIMS];
938 start[0] = start[1] = start[2] = start[3] = 0;
940 output_strides[0] = 1;
941 output_strides[1] = output_sizes[0];
942 output_strides[2] = output_sizes[1] * output_strides[1];
946 if (operation_mode == 0)
952 pOut = (uint8_t *)output_buffer + (ioBufDesc->outPadT[
id] * output_sizes[0]) + ioBufDesc->outPadL[id];
954 for(i = 0; i < 5; i++)
957 classid[i] = 0xFFFFFFFF;
959 for(j = 0; j < output_sizes[0]; j++)
961 if(pOut[j] > score[i])
968 pOut[classid[i]] = 0;
971 printf(
"\nImage classification Top-5 results: \n");
973 for(i = 0; i < 5; i++)
975 printf(
" %s, class-id: %d, score: %u\n", (
char *)&imgnet_labels[classid[i]], classid[i], score[i]);
979 if (operation_mode== 1)
993 ODLayerObjInfo *pObjInfo;
998 pOut = (uint8_t *)output_buffer + (ioBufDesc->outPadT[
id] * output_sizes[0]) + ioBufDesc->outPadL[id];
1000 pObjInfo = (ODLayerObjInfo *)pOut;
1002 printf(
"\nObjId|label|score| xmin| ymin| xmax| ymax|\n");
1003 printf(
"------------------------------------------\n");
1004 for(i = 0; i < numObjs; i++)
1006 ODLayerObjInfo * pObj = pObjInfo + i;
1007 if ((int32_t)(pObj->objId)!=-1) {
1008 printf(
"%5d|%5d|%5.2f|%5.2f|%5.2f|%5.2f|%5.2f|\n", (int32_t)pObj->objId, (uint32_t)pObj->label, pObj->score, pObj->xmin, pObj->ymin, pObj->xmax, pObj->ymax);
1020 printf(
"\nNumber of detected objects: %d\n\n", i);
vx_kernel tivxAddKernelTIDL(vx_context context, uint32_t num_input_tensors, uint32_t num_output_tensors)
Used for the Application to create the tidl kernel from the context.
VX_DIRECTIVE_ENABLE_PERFORMANCE
VX_API_ENTRY vx_node VX_API_CALL tivxTIDLNode(vx_graph graph, vx_kernel kernel, vx_reference appParams[], vx_tensor input_tensors[], vx_tensor output_tensors[])
[Graph] Creates a TIDL Node.
Interface to TI extension APIs.
VX_API_ENTRY vx_status VX_API_CALL vxReleaseTensor(vx_tensor *tensor)
Releases a reference to a tensor data object. The object may not be garbage collected until its total...
vx_status VX_API_CALL vxQueryGraph(vx_graph graph, vx_enum attribute, void *ptr, vx_size size)
vx_status VX_API_CALL vxProcessGraph(vx_graph graph)
vx_status VX_API_CALL vxReleaseContext(vx_context *context)
vx_status VX_API_CALL vxRemoveKernel(vx_kernel kernel)
struct _vx_context * vx_context
vx_status VX_API_CALL vxSetNodeTarget(vx_node node, vx_enum target_enum, const char *target_string)
struct _vx_reference * vx_reference
#define TIVX_TARGET_DSP2
Name for DSP target class, instance 2.
vx_status VX_API_CALL vxWaitGraph(vx_graph graph)
#define TIVX_TARGET_DSP1
Name for DSP target class, instance 1.
struct _vx_user_data_object * vx_user_data_object
The User Data Object. User Data Object is a strongly-typed container for other data structures.
vx_status VX_API_CALL vxReleaseGraph(vx_graph *graph)
struct _vx_kernel * vx_kernel
vx_status VX_API_CALL vxQueryNode(vx_node node, vx_enum attribute, void *ptr, vx_size size)
VX_API_ENTRY vx_status VX_API_CALL vxMapUserDataObject(vx_user_data_object user_data_object, vx_size offset, vx_size size, vx_map_id *map_id, void **ptr, vx_enum usage, vx_enum mem_type, vx_uint32 flags)
Allows the application to get direct access to a subset of the user data object.
vx_graph VX_API_CALL vxCreateGraph(vx_context context)
VX_API_ENTRY vx_status VX_API_CALL vxUnmapUserDataObject(vx_user_data_object user_data_object, vx_map_id map_id)
Unmap and commit potential changes to a user data object subset that was previously mapped....
VX_API_ENTRY vx_status VX_API_CALL vxReleaseUserDataObject(vx_user_data_object *user_data_object)
Releases a reference of a User data object. The object may not be garbage collected until its total r...
VX_API_ENTRY vx_status VX_API_CALL tivxUnmapTensorPatch(vx_tensor tensor, vx_map_id map_id)
Unmap and commit potential changes to a tensor object patch that were previously mapped....
vx_status VX_API_CALL vxDirective(vx_reference reference, vx_enum directive)
vx_status VX_API_CALL vxVerifyGraph(vx_graph graph)
vx_status VX_API_CALL vxScheduleGraph(vx_graph graph)
vx_context VX_API_CALL vxCreateContext()
VX_API_ENTRY vx_tensor VX_API_CALL vxCreateTensor(vx_context context, vx_size number_of_dims, const vx_size *dims, vx_enum data_type, vx_int8 fixed_point_position)
Creates an opaque reference to a tensor data buffer.
struct _vx_graph * vx_graph
vx_status VX_API_CALL vxReleaseNode(vx_node *node)
VX_API_ENTRY vx_status VX_API_CALL tivxMapTensorPatch(vx_tensor tensor, vx_size number_of_dims, const vx_size *view_start, const vx_size *view_end, vx_map_id *map_id, vx_size *stride, void **ptr, vx_enum usage, vx_enum mem_type)
Allows the application to get direct access to a patch of tensor object.
struct _vx_node * vx_node
struct _vx_tensor * vx_tensor
The multidimensional data object (Tensor).
vx_status VX_API_CALL vxGetStatus(vx_reference reference)
tivx_cpu_id_e
CPU ID for supported CPUs.