From 3a32a83c3406399984ed79de6ff9e3dbcb9b73d6 Mon Sep 17 00:00:00 2001 From: Liyu Xiao Date: Mon, 26 May 2025 10:28:11 -0400 Subject: [PATCH] added profiling for the onnx model --- .gitignore | 3 +- .riahub/workflows/workflow.yaml | 16 ++++++- .../convert_to_onnx.py | 0 onnx_scripts/profile_onnx.py | 42 +++++++++++++++++++ requirements.txt | 1 + 5 files changed, 59 insertions(+), 3 deletions(-) rename convert_to_onnx.py => onnx_scripts/convert_to_onnx.py (100%) create mode 100644 onnx_scripts/profile_onnx.py diff --git a/.gitignore b/.gitignore index ff2718f..7d082af 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ __pycache__/ *.ckpt *.ipynb -*.onnx \ No newline at end of file +*.onnx +*.json \ No newline at end of file diff --git a/.riahub/workflows/workflow.yaml b/.riahub/workflows/workflow.yaml index e1cac93..3563044 100644 --- a/.riahub/workflows/workflow.yaml +++ b/.riahub/workflows/workflow.yaml @@ -67,20 +67,32 @@ jobs: - name: 3. Convert to ONNX file run: | - PYTHONPATH=. python convert_to_onnx.py + PYTHONPATH=. python onnx_scripts/convert_to_onnx.py echo "building inference app" - + - name: Upload ONNX file uses: actions/upload-artifact@v3 with: name: ria-demo-onnx path: onnx_files/inference_recognition_model.onnx + + - name: 4. Profile ONNX model + run: | + PYTHONPATH=. python onnx_scripts/profile_onnx.py + + - name: Upload JSON profiling data + uses: actions/upload-artifact@v3 + with: + name: profile-data + path: '**/onnxruntime_profile_*.json' - name: 4. Convert to ORT file run: | python -m onnxruntime.tools.convert_onnx_models_to_ort \ --input /onnx_files/inference_recognition_model.onnx \ --output /ort_files/inference_recognition_model.ort \ + --optimization_style Fixed \ + --target_platform amd64 - name: Upload ORT file uses: actions/upload-artifact@v3 diff --git a/convert_to_onnx.py b/onnx_scripts/convert_to_onnx.py similarity index 100% rename from convert_to_onnx.py rename to onnx_scripts/convert_to_onnx.py diff --git a/onnx_scripts/profile_onnx.py b/onnx_scripts/profile_onnx.py new file mode 100644 index 0000000..f4f265d --- /dev/null +++ b/onnx_scripts/profile_onnx.py @@ -0,0 +1,42 @@ +import onnxruntime as ort +import numpy as np +from helpers.app_settings import get_app_settings +from onnx_files import ONNX_DIR +import os + +def profile_onnx_model(path_to_onnx: str, num_runs: int = 100): + # Set up session options + options = ort.SessionOptions() + options.enable_profiling = True + + # Enables cleanup of QuantizeLinear/DequantizeLinear node pairs (optional optimization) + options.add_session_config_entry("session.enable_quant_qdq_cleanup", "1") + + # Set workload type for efficiency (low scheduling priority) + options.add_session_config_entry("ep.dynamic.workload_type", "Efficient") + + # Create inference session on CPU + session = ort.InferenceSession(path_to_onnx, sess_options=options, providers=["CPUExecutionProvider"]) + print("Session providers:", session.get_providers()) + + # Get model input details + input_name = session.get_inputs()[0].name + input_shape = session.get_inputs()[0].shape + + # Generate dummy input data + # If model expects dynamic shape (None), replace with fixed size (e.g. batch 1) + input_shape = [dim if isinstance(dim, int) and dim > 0 else 1 for dim in input_shape] + input_data = np.random.randn(*input_shape).astype(np.float32) + + # Run inference multiple times to collect profiling data + for _ in range(num_runs): + session.run(None, {input_name: input_data}) + + # End profiling and get profile file path + profile_file = session.end_profiling() + print(f"Profiling saved to: {profile_file}") + +if __name__ == "__main__": + settings = get_app_settings() + output_path = os.path.join(ONNX_DIR, f"{settings.inference.onnx_model_filename}.onnx") + profile_onnx_model(output_path) diff --git a/requirements.txt b/requirements.txt index 124f65c..71322d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ scikit_learn timm torch onnx +onnxruntime ./wheel/utils-0.1.2.dev0-py3-none-any.whl \ No newline at end of file