Source code for relay.ti_tests.infer_model

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
""" Run inference using DLR or TVM Runtime """


import os
import sys
from platform import processor
from utils import supported_platform


[docs]def run_model(artifacts_folder: str, input_dict, use_dlr: bool):
  """ Run model with given input using DLR or TVM Runtime.

  Parameters
  ----------
  artifacts_folder:
      Folder containing compilation artifacts.
  input_dict:
      Dictionary of input name (str) to input data (numpy.ndarray).
  use_dlr:
      If True, use DLR. If False, use the TVM runtime directly.

  Return
  ------
  results: List of result tensors.
  """

  if use_dlr:
    from dlr import DLRModel

    module = DLRModel(artifacts_folder)
    results = module.run(input_dict)

    # get optional tidl info, run with TIDL_RT_PERFSTATS=1
    if os.environ.get("TIDL_RT_PERFSTATS"):
      perf_data = module.get_TI_benchmark_data()
      print(perf_data)

  else:
    import tvm
    from tvm.contrib import graph_executor as runtime

    loaded_json = open(artifacts_folder + "/deploy_graph.json").read()
    loaded_lib = tvm.runtime.load_module(artifacts_folder + "/deploy_lib.so")
    loaded_params = bytearray(open(artifacts_folder + "/deploy_param.params", "rb").read())

    # create a runtime executor module
    module = runtime.create(loaded_json, loaded_lib, tvm.cpu())

    # load params into the module
    module.load_params(loaded_params)

    # feed input data
    for key, value in input_dict.items():
      module.set_input(key, value)

    # run
    module.run()

    # get output
    results = []
    for i in range(module.get_num_outputs()):
      results.append(module.get_output(i).asnumpy())

    # get optional tidl info, run with TIDL_RT_PERFSTATS=1
    if os.environ.get("TIDL_RT_PERFSTATS"):
      import ctypes
      for c in range(16):
        try:
          func = loaded_lib.get_function(f'tidl_get_custom_data_{c}')
        except AttributeError as e:
          break
        vec_void = func()
        # vec_void is pointer to C++ std::vector<uint64_t>, hack into memory layout
        pf_ptr = ctypes.cast(vec_void, ctypes.POINTER(ctypes.POINTER(ctypes.c_ulonglong)))
        pf = pf_ptr[0]
        print(f"tidl_{c}: cp_in {pf[1]-pf[0]} process {pf[3]-pf[2]} cp_out {pf[5]-pf[4]}")

  print(artifacts_folder + ": inference execution finished")
  return results


def infer_model(model_name, platform, is_target, is_dlr, w_tidl, w_c7x, batch_size=0):
  """ Run model model inference for a single (platform, target, tidl, c7x) config """
  from prepostproc import get_test_inputs, check_test_results
  from utils import get_artifacts_folder

  artifacts_folder = get_artifacts_folder(model_name, platform, is_target, w_tidl, w_c7x,
                                          batch_size)
  if not os.path.exists(artifacts_folder):
    raise Exception(f"{artifacts_folder} does not exist for inference")
  print(f"Running inference with deployable module in {artifacts_folder} ...")

  input_list = get_test_inputs(model_name, batch_size)
  res = run_model(artifacts_folder, input_list[0], is_dlr)
  passed = check_test_results(model_name, res, artifacts_folder)
  print("Pass" if passed else "Fail")
  return passed


def parse_args():
  import argparse
  parser = argparse.ArgumentParser()
  parser.add_argument('model_name', nargs='?')
  parser.add_argument('--platform', action='store',
                      default="J7",
                      help='Compile model for which platform (J7, J721S2)')
  parser.add_argument('--dlr', action='store_true',
                      default=True,
                      help="Use DLR runtime for inference")
  parser.add_argument('--tvm', action='store_false',
                      dest='dlr',
                      help="Use TVM runtime for inference")
  parser.add_argument('--tidl', action='store_true',
                      default=True,
                      help="Enable TIDL offload")
  parser.add_argument('--notidl', action='store_false',
                      dest="tidl",
                      help="Disable TIDL offload")
  parser.add_argument('--c7x', action='store_true',
                      default=False,
                      help="Enable C7x code generation")
  parser.add_argument('--noc7x', action='store_false',
                      dest="c7x",
                      help="Disable C7x code generation")
  parser.add_argument('--batch_size', action='store',
                      default=0, type=int,
                      help='Overwrite default batch size in the model, 0 means no overwrite')
  args = parser.parse_args()

  assert(args.model_name is not None), "Please specify a model name"
  assert(supported_platform(args.platform)), f"Platform {args.platform} is not supported"

  return args


if __name__ == "__main__":
  args = parse_args()
  is_target = (processor() == "aarch64")

  ret = False
  try:
    ret = infer_model(args.model_name, args.platform, is_target, args.dlr, args.tidl, args.c7x,
                      args.batch_size)
  except Exception as ex:
    print(ex)
    ret = False

  print(f"infer_model {'succeed' if ret else 'fail'}ed: {args.model_name} {args.platform} "
        f"{'target' if is_target else 'host'} {'dlr' if args.dlr else 'tvm'} "
        f"{'tidl' if args.tidl else 'notidl'} {'c7x' if args.c7x else 'noc7x'}"
        f"{(' bs'+str(args.batch_size)) if args.batch_size != 0 else ''}")
  sys.exit(0 if ret else 1)