modrec-workflow/onnx_scripts/profile_onnx.py
Liyu Xiao f967009dbf
All checks were successful
RIA Hub Workflow Demo / ria-demo (push) Successful in 1m53s
naming Changes
2025-06-16 10:20:29 -04:00

42 lines
1.6 KiB
Python

import onnxruntime as ort
import numpy as np
from helpers.app_settings import get_app_settings
import os
def profile_onnx_model(path_to_onnx: str, num_runs: int = 100):
# Set up session options
options = ort.SessionOptions()
options.enable_profiling = True
# Enables cleanup of QuantizeLinear/DequantizeLinear node pairs (optional optimization)
options.add_session_config_entry("session.enable_quant_qdq_cleanup", "1")
# Set workload type for efficiency (low scheduling priority)
options.add_session_config_entry("ep.dynamic.workload_type", "Efficient")
# Create inference session on CPU
session = ort.InferenceSession(path_to_onnx, sess_options=options, providers=["CPUExecutionProvider"])
print("Session providers:", session.get_providers())
# Get model input details
input_name = session.get_inputs()[0].name
input_shape = session.get_inputs()[0].shape
# Generate dummy input data
# If model expects dynamic shape (None), replace with fixed size (e.g. batch 1)
input_shape = [dim if isinstance(dim, int) and dim > 0 else 1 for dim in input_shape]
input_data = np.random.randn(*input_shape).astype(np.float32)
# Run inference multiple times to collect profiling data
for _ in range(num_runs):
session.run(None, {input_name: input_data})
# End profiling and get profile file path
profile_file = session.end_profiling()
print(f"Profiling saved to: {profile_file}")
if __name__ == "__main__":
settings = get_app_settings()
output_path = os.path.join("onnx_files", "inference_recognition_model.onnx")
profile_onnx_model(output_path)