import onnxruntime as ort import numpy as np from helpers.app_settings import get_app_settings import os import time import json def profile_onnx_model( path_to_onnx: str, num_runs: int = 100, warmup_runs: int = 5 ) -> None: """ Profiles an ONNX model by running inference multiple times and collecting performance data. Prints session initialization time, provider used, average inference time (excluding warm-up), and parses the ONNX Runtime JSON trace to show the most expensive operation. Parameters: path_to_onnx (str): Path to the ONNX model file. num_runs (int): Number of inference runs (including warm-ups). warmup_runs (int): Number of warm-up runs to skip from timing. """ # Session setup options = ort.SessionOptions() options.enable_profiling = True options.add_session_config_entry("session.enable_quant_qdq_cleanup", "1") options.add_session_config_entry("ep.dynamic.workload_type", "Efficient") # Try GPU, then fallback to CPU try: start_time = time.time() session = ort.InferenceSession( path_to_onnx, sess_options=options, providers=["CUDAExecutionProvider"] ) print("Running on the GPU") except Exception as e: session = ort.InferenceSession( path_to_onnx, sess_options=options, providers=["CPUExecutionProvider"] ) print("Could not find GPU, running on CPU") end_time = time.time() print(f"[Timing] Model load + session init time: {end_time - start_time:.4f} sec") print("Session providers:", session.get_providers()) # Prepare dummy input input_name = session.get_inputs()[0].name input_shape = [ dim if isinstance(dim, int) and dim > 0 else 1 for dim in session.get_inputs()[0].shape ] input_data = np.random.randn(*input_shape).astype(np.float32) # Time multiple inferences (skip warm-up) times = [] for i in range(num_runs): t0 = time.time() session.run(None, {input_name: input_data}) t1 = time.time() if i >= warmup_runs: times.append(t1 - t0) avg_time = sum(times) / len(times) print( f"[Timing] Avg inference time (excluding {warmup_runs} warm-ups): {avg_time:.6f} sec" ) # End profiling & parse JSON profile_file = session.end_profiling() print(f"[Output] Profiling trace saved to: {profile_file}") try: with open(profile_file, "r") as f: trace = json.load(f) nodes = [e for e in trace if e.get("cat") == "Node"] print(f"[Profile] Number of nodes executed: {len(nodes)}") if nodes: top = max(nodes, key=lambda x: x.get("dur", 0)) print( f"[Profile] Most expensive op: {top['name']} — {top['dur'] / 1e6:.3f} ms" ) except Exception as e: print(f"[Warning] Failed to parse profiling JSON: {e}") if __name__ == "__main__": settings = get_app_settings() output_path = os.path.join("onnx_files", "inference_recognition_model.onnx") profile_onnx_model(output_path)