From 3a32a83c3406399984ed79de6ff9e3dbcb9b73d6 Mon Sep 17 00:00:00 2001
From: Liyu Xiao <liyu@qoherent.ai>
Date: Mon, 26 May 2025 10:28:11 -0400
Subject: [PATCH] added profiling for the onnx model

---
 .gitignore                                    |  3 +-
 .riahub/workflows/workflow.yaml               | 16 ++++++-
 .../convert_to_onnx.py                        |  0
 onnx_scripts/profile_onnx.py                  | 42 +++++++++++++++++++
 requirements.txt                              |  1 +
 5 files changed, 59 insertions(+), 3 deletions(-)
 rename convert_to_onnx.py => onnx_scripts/convert_to_onnx.py (100%)
 create mode 100644 onnx_scripts/profile_onnx.py

diff --git a/.gitignore b/.gitignore
index ff2718f..7d082af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@
 __pycache__/
 *.ckpt
 *.ipynb
-*.onnx
\ No newline at end of file
+*.onnx
+*.json
\ No newline at end of file
diff --git a/.riahub/workflows/workflow.yaml b/.riahub/workflows/workflow.yaml
index e1cac93..3563044 100644
--- a/.riahub/workflows/workflow.yaml
+++ b/.riahub/workflows/workflow.yaml
@@ -67,20 +67,32 @@ jobs:
 
       - name: 3. Convert to ONNX file
         run: |
-          PYTHONPATH=. python convert_to_onnx.py
+          PYTHONPATH=. python onnx_scripts/convert_to_onnx.py
           echo "building inference app"
-      
+
       - name: Upload ONNX file
         uses: actions/upload-artifact@v3
         with:
           name: ria-demo-onnx
           path: onnx_files/inference_recognition_model.onnx
+
+      - name: 4. Profile ONNX model
+        run: |
+          PYTHONPATH=. python onnx_scripts/profile_onnx.py
+      
+      - name: Upload JSON profiling data
+        uses: actions/upload-artifact@v3
+        with:
+          name: profile-data          
+          path: '**/onnxruntime_profile_*.json'
       
       - name: 4. Convert to ORT file
         run: |
           python -m onnxruntime.tools.convert_onnx_models_to_ort \
             --input /onnx_files/inference_recognition_model.onnx \
             --output /ort_files/inference_recognition_model.ort \
+            --optimization_style Fixed \
+            --target_platform amd64
 
       - name: Upload ORT file
         uses: actions/upload-artifact@v3
diff --git a/convert_to_onnx.py b/onnx_scripts/convert_to_onnx.py
similarity index 100%
rename from convert_to_onnx.py
rename to onnx_scripts/convert_to_onnx.py
diff --git a/onnx_scripts/profile_onnx.py b/onnx_scripts/profile_onnx.py
new file mode 100644
index 0000000..f4f265d
--- /dev/null
+++ b/onnx_scripts/profile_onnx.py
@@ -0,0 +1,42 @@
+import onnxruntime as ort
+import numpy as np
+from helpers.app_settings import get_app_settings
+from onnx_files import ONNX_DIR
+import os
+
+def profile_onnx_model(path_to_onnx: str, num_runs: int = 100):
+    # Set up session options
+    options = ort.SessionOptions()
+    options.enable_profiling = True
+
+    # Enables cleanup of QuantizeLinear/DequantizeLinear node pairs (optional optimization)
+    options.add_session_config_entry("session.enable_quant_qdq_cleanup", "1")
+
+    # Set workload type for efficiency (low scheduling priority)
+    options.add_session_config_entry("ep.dynamic.workload_type", "Efficient")
+
+    # Create inference session on CPU
+    session = ort.InferenceSession(path_to_onnx, sess_options=options, providers=["CPUExecutionProvider"])
+    print("Session providers:", session.get_providers())
+
+    # Get model input details
+    input_name = session.get_inputs()[0].name
+    input_shape = session.get_inputs()[0].shape
+
+    # Generate dummy input data
+    # If model expects dynamic shape (None), replace with fixed size (e.g. batch 1)
+    input_shape = [dim if isinstance(dim, int) and dim > 0 else 1 for dim in input_shape]
+    input_data = np.random.randn(*input_shape).astype(np.float32)
+
+    # Run inference multiple times to collect profiling data
+    for _ in range(num_runs):
+        session.run(None, {input_name: input_data})
+
+    # End profiling and get profile file path
+    profile_file = session.end_profiling()
+    print(f"Profiling saved to: {profile_file}")
+
+if __name__ == "__main__":
+    settings = get_app_settings()
+    output_path = os.path.join(ONNX_DIR, f"{settings.inference.onnx_model_filename}.onnx")
+    profile_onnx_model(output_path)
diff --git a/requirements.txt b/requirements.txt
index 124f65c..71322d3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ scikit_learn
 timm
 torch
 onnx
+onnxruntime
 ./wheel/utils-0.1.2.dev0-py3-none-any.whl
\ No newline at end of file