added profiling for the onnx model

2025-05-26 10:28:11 -04:00 · 2025-05-26 10:28:11 -04:00 · 3a32a83c34
commit 3a32a83c34
parent b3d17f804c
5 changed files with 59 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,4 +4,5 @@
 __pycache__/
 *.ckpt
 *.ipynb
-*.onnx
+*.onnx
+*.json
--- a/.riahub/workflows/workflow.yaml
+++ b/.riahub/workflows/workflow.yaml
@ -67,20 +67,32 @@ jobs:

      - name: 3. Convert to ONNX file
        run: |
-          PYTHONPATH=. python convert_to_onnx.py
+          PYTHONPATH=. python onnx_scripts/convert_to_onnx.py
          echo "building inference app"
-      
+
      - name: Upload ONNX file
        uses: actions/upload-artifact@v3
        with:
          name: ria-demo-onnx
          path: onnx_files/inference_recognition_model.onnx
+
+      - name: 4. Profile ONNX model
+        run: |
+          PYTHONPATH=. python onnx_scripts/profile_onnx.py
+      
+      - name: Upload JSON profiling data
+        uses: actions/upload-artifact@v3
+        with:
+          name: profile-data          
+          path: '**/onnxruntime_profile_*.json'
      
      - name: 4. Convert to ORT file
        run: |
          python -m onnxruntime.tools.convert_onnx_models_to_ort \
            --input /onnx_files/inference_recognition_model.onnx \
            --output /ort_files/inference_recognition_model.ort \
+            --optimization_style Fixed \
+            --target_platform amd64

      - name: Upload ORT file
        uses: actions/upload-artifact@v3
--- a/onnx_scripts/convert_to_onnx.py
+++ b/onnx_scripts/convert_to_onnx.py
--- a/onnx_scripts/profile_onnx.py
+++ b/onnx_scripts/profile_onnx.py
@ -0,0 +1,42 @@
+import onnxruntime as ort
+import numpy as np
+from helpers.app_settings import get_app_settings
+from onnx_files import ONNX_DIR
+import os
+
+def profile_onnx_model(path_to_onnx: str, num_runs: int = 100):
+    # Set up session options
+    options = ort.SessionOptions()
+    options.enable_profiling = True
+
+    # Enables cleanup of QuantizeLinear/DequantizeLinear node pairs (optional optimization)
+    options.add_session_config_entry("session.enable_quant_qdq_cleanup", "1")
+
+    # Set workload type for efficiency (low scheduling priority)
+    options.add_session_config_entry("ep.dynamic.workload_type", "Efficient")
+
+    # Create inference session on CPU
+    session = ort.InferenceSession(path_to_onnx, sess_options=options, providers=["CPUExecutionProvider"])
+    print("Session providers:", session.get_providers())
+
+    # Get model input details
+    input_name = session.get_inputs()[0].name
+    input_shape = session.get_inputs()[0].shape
+
+    # Generate dummy input data
+    # If model expects dynamic shape (None), replace with fixed size (e.g. batch 1)
+    input_shape = [dim if isinstance(dim, int) and dim > 0 else 1 for dim in input_shape]
+    input_data = np.random.randn(*input_shape).astype(np.float32)
+
+    # Run inference multiple times to collect profiling data
+    for _ in range(num_runs):
+        session.run(None, {input_name: input_data})
+
+    # End profiling and get profile file path
+    profile_file = session.end_profiling()
+    print(f"Profiling saved to: {profile_file}")
+
+if __name__ == "__main__":
+    settings = get_app_settings()
+    output_path = os.path.join(ONNX_DIR, f"{settings.inference.onnx_model_filename}.onnx")
+    profile_onnx_model(output_path)
--- a/requirements.txt
+++ b/requirements.txt
@ -8,4 +8,5 @@ scikit_learn
 timm
 torch
 onnx
+onnxruntime
 ./wheel/utils-0.1.2.dev0-py3-none-any.whl