added profiling for the onnx model

2025-05-26 10:28:11 -04:00 · 2025-05-26 10:28:11 -04:00 · 3a32a83c34
commit 3a32a83c34
parent b3d17f804c
5 changed files with 59 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,4 @@ __pycache__/
 *.ckpt
 *.ipynb
 *.onnx
 *.json
--- a/.riahub/workflows/workflow.yaml
+++ b/.riahub/workflows/workflow.yaml
@ -67,7 +67,7 @@ jobs:
      - name: 3. Convert to ONNX file
        run: |
-          PYTHONPATH=. python convert_to_onnx.py
+          PYTHONPATH=. python onnx_scripts/convert_to_onnx.py
          echo "building inference app"
      - name: Upload ONNX file
@ -76,11 +76,23 @@ jobs:
          name: ria-demo-onnx
          path: onnx_files/inference_recognition_model.onnx
      - name: 4. Profile ONNX model
        run: |
          PYTHONPATH=. python onnx_scripts/profile_onnx.py
      - name: Upload JSON profiling data
        uses: actions/upload-artifact@v3
        with:
          name: profile-data          
          path: '**/onnxruntime_profile_*.json'
      - name: 4. Convert to ORT file
        run: |
          python -m onnxruntime.tools.convert_onnx_models_to_ort \
            --input /onnx_files/inference_recognition_model.onnx \
            --output /ort_files/inference_recognition_model.ort \
            --optimization_style Fixed \
            --target_platform amd64
      - name: Upload ORT file
        uses: actions/upload-artifact@v3
--- a/onnx_scripts/convert_to_onnx.py
+++ b/onnx_scripts/convert_to_onnx.py
--- a/onnx_scripts/profile_onnx.py
+++ b/onnx_scripts/profile_onnx.py
@ -0,0 +1,42 @@
 import onnxruntime as ort
 import numpy as np
 from helpers.app_settings import get_app_settings
 from onnx_files import ONNX_DIR
 import os
 def profile_onnx_model(path_to_onnx: str, num_runs: int = 100):
    # Set up session options
    options = ort.SessionOptions()
    options.enable_profiling = True
    # Enables cleanup of QuantizeLinear/DequantizeLinear node pairs (optional optimization)
    options.add_session_config_entry("session.enable_quant_qdq_cleanup", "1")
    # Set workload type for efficiency (low scheduling priority)
    options.add_session_config_entry("ep.dynamic.workload_type", "Efficient")
    # Create inference session on CPU
    session = ort.InferenceSession(path_to_onnx, sess_options=options, providers=["CPUExecutionProvider"])
    print("Session providers:", session.get_providers())
    # Get model input details
    input_name = session.get_inputs()[0].name
    input_shape = session.get_inputs()[0].shape
    # Generate dummy input data
    # If model expects dynamic shape (None), replace with fixed size (e.g. batch 1)
    input_shape = [dim if isinstance(dim, int) and dim > 0 else 1 for dim in input_shape]
    input_data = np.random.randn(*input_shape).astype(np.float32)
    # Run inference multiple times to collect profiling data
    for _ in range(num_runs):
        session.run(None, {input_name: input_data})
    # End profiling and get profile file path
    profile_file = session.end_profiling()
    print(f"Profiling saved to: {profile_file}")
 if __name__ == "__main__":
    settings = get_app_settings()
    output_path = os.path.join(ONNX_DIR, f"{settings.inference.onnx_model_filename}.onnx")
    profile_onnx_model(output_path)
--- a/requirements.txt
+++ b/requirements.txt
@ -8,4 +8,5 @@ scikit_learn
 timm
 torch
 onnx
 onnxruntime
 ./wheel/utils-0.1.2.dev0-py3-none-any.whl