From a9c9d72bb1093a7aa5cab7f283dd6a39dfedaee9 Mon Sep 17 00:00:00 2001
From: Roman Pope <popenov.r@gmail.com>
Date: Thu, 28 May 2026 07:17:52 -0400
Subject: [PATCH] Canary: trigger fresh WavesFM training (proven-green path)

---
 .riahub/workflows/train.yaml | 387 +++++++++++++++++++++++++----------
 1 file changed, 281 insertions(+), 106 deletions(-)

diff --git a/.riahub/workflows/train.yaml b/.riahub/workflows/train.yaml
index 3ac9ee1..925d3dd 100644
--- a/.riahub/workflows/train.yaml
+++ b/.riahub/workflows/train.yaml
@@ -1,4 +1,4 @@
-name: QMB Training
+name: WavesFM Fine-Tuning
 on:
   push:
     branches: [ "main" ]
@@ -14,22 +14,28 @@ permissions:
   actions: read
 
 jobs:
-  QMB-Training:
-    runs-on: "whitehorse-p40-qmb"
+  WavesFM-Training:
+    runs-on: "ubuntu-latest"
     env:
-      RIAHUB_BASE_URL: ${{ vars.RIAHUB_BASE_URL || secrets.RIAHUB_BASE_URL || '' }}
-      QMB_OUTPUT_ROOT: "/opt/qmb/outputs"
-      QMB_TASK_REPO_ROOT: "/opt/qmb/task_repos"
+      WAVESFM_TASK: "rml"
+      WAVESFM_EPOCHS: "1"
+      WAVESFM_BATCH_SIZE: "8"
+      WAVESFM_OUTPUT_DIR: "/opt/wavesfm/output"
+      # Single source of truth for the cloned WavesFM repo location.
+      # Referenced as ${{ env.WAVESFM_REPO_DIR }} in steps. To relocate
+      # (e.g. /home/runner/wavesfm), change ONLY this value — every
+      # downstream step uses the env var, no hard-coded paths.
+      WAVESFM_REPO_DIR: "/opt/wavesfm/repo"
+      WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5"
+      # Override model-download base URL to internal LAN IP
+      # (external riahub.ai LFS endpoint is unreachable from runners).
+      RIAHUB_BASE_URL: "http://192.168.0.170:3000"
     steps:
       - name: Display basic runner info
         run: |
           echo "Runner OS: ${{ runner.os }}"
           echo "Runner Architecture: ${{ runner.arch }}"
 
-      - name: Print CPU information
-        run: |
-          lscpu
-
       - name: Print GPU information
         run: |
           if command -v nvidia-smi &> /dev/null; then
@@ -39,9 +45,9 @@ jobs:
           fi
 
 
-      - name: Checkout Training Dataset
+      - name: "Download Model (qoherent/wavesfm-base/wavesfm-v1p0.pth)"
         shell: bash
-        timeout-minutes: 10
+        timeout-minutes: 4
         env:
           RIAHUB_USER: ${{ secrets.QMBDEMO_USER }}
           RIAHUB_TOKEN: ${{ secrets.QMBDEMO_TOKEN }}
@@ -69,8 +75,16 @@ jobs:
 
           AUTH_HEADER=""
           if [[ -n "${RIAHUB_USER:-}" && -n "${RIAHUB_TOKEN:-}" ]]; then
-            AUTH_HEADER=$(printf 'Authorization: basic %s' "$(printf '%s:%s' "$RIAHUB_USER" "$RIAHUB_TOKEN" | base64 | tr -d '\n')")
+            AUTH_HEADER=$(printf 'Authorization: basic %s' \
+              "$(printf '%s:%s' "$RIAHUB_USER" "$RIAHUB_TOKEN" | base64 | tr -d '\n')")
           fi
+          # ``sudo env GIT_TERMINAL_PROMPT=0`` propagates the env var across
+          # sudo's default ``env_reset`` boundary; a bare ``sudo git`` would
+          # see an empty env on most distros' default sudoers, so the
+          # step-level ``env:`` block's GIT_TERMINAL_PROMPT=0 would NOT
+          # actually reach git child processes. Without it, git falls back
+          # to opening ``/dev/tty`` (the PTY allocated by act_runner) and
+          # prompting for credentials on a 401, hanging until timeout.
           git_auth() {
             if [[ -n "$AUTH_HEADER" ]]; then
               sudo env GIT_TERMINAL_PROMPT=0 git -c "http.extraheader=$AUTH_HEADER" "$@"
@@ -78,9 +92,14 @@ jobs:
               sudo env GIT_TERMINAL_PROMPT=0 git "$@"
             fi
           }
-          REPO_PATH='/qoherent/icc-28.git'
-          DEST_ROOT='/opt/qmb/riahub/dataset/qoherent/icc-28/main'
-          sudo mkdir -p "$(dirname "$DEST_ROOT")"
+
+          REPO_PATH='/qoherent/wavesfm-base.git'
+          REL_PATH='wavesfm-v1p0.pth'
+          REF='48787da4d310e9f939d9a0abe92f2a6cb13fbca7'
+          DEST_PATH='/opt/qmb/riahub/model/wavesfm-v1p0.pth'
+          TMP_DIR=$(mktemp -d)
+          cleanup() { sudo rm -rf "$TMP_DIR"; }
+          trap cleanup EXIT
           if ! command -v git-lfs >/dev/null 2>&1; then
             sudo apt-get update -y
             sudo apt-get install -y git-lfs
@@ -90,49 +109,80 @@ jobs:
           for base in "${BASE_CANDIDATES[@]}"; do
             base="${base%/}"
             REPO_URL="${base}${REPO_PATH}"
-            echo "Fetching dataset from $REPO_URL"
-            sudo rm -rf "$DEST_ROOT"
-            sudo mkdir -p "$DEST_ROOT"
-            sudo git -C "$DEST_ROOT" init || continue
-            sudo git -C "$DEST_ROOT" remote add origin "$REPO_URL" || continue
-            sudo git -C "$DEST_ROOT" sparse-checkout init --no-cone || continue
-            sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- \
-              'datasets/icc28-train_v1.0.0.h5' || continue
-            if ! git_auth -C "$DEST_ROOT" fetch --depth=1 origin '1d9083f05d0538110f09e710865b078eba30964b'; then
+            echo "Fetching model from $REPO_URL"
+            sudo rm -rf "$TMP_DIR"
+            sudo mkdir -p "$TMP_DIR"
+            sudo git -C "$TMP_DIR" init || continue
+            sudo git -C "$TMP_DIR" remote add origin "$REPO_URL" || continue
+            # NOT running ``git lfs install --local`` on purpose: it would
+            # register the smudge/clean filter, which then fires during
+            # ``git checkout FETCH_HEAD`` and tries to download every LFS
+            # object via its OWN credential helper subprocess. That
+            # subprocess does NOT inherit ``-c http.extraheader=`` or env
+            # vars set by the parent via ``sudo``, so on an internal/
+            # private repo it gets a 401 and hangs on /dev/tty
+            # waiting for a username — same failure class as the
+            # parent-fetch sub-fetch documented above. By skipping
+            # ``lfs install``, checkout writes LFS pointer files verbatim
+            # to disk; the explicit ``git lfs fetch --include=...``
+            # below downloads the actual objects WITH the auth header,
+            # and ``git lfs checkout`` then materializes them. (Explicit
+            # LFS commands do NOT require ``lfs install``.)
+            sudo git -C "$TMP_DIR" sparse-checkout init --no-cone || continue
+            sudo git -C "$TMP_DIR" sparse-checkout set --no-cone -- "$REL_PATH" || continue
+            if ! git_auth -C "$TMP_DIR" fetch --depth=1 origin "$REF"; then
               continue
             fi
-            if ! sudo env GIT_TERMINAL_PROMPT=0 GIT_LFS_SKIP_SMUDGE=1 git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD; then
+            # ``GIT_LFS_SKIP_SMUDGE=1`` disables the LFS smudge filter for
+            # this checkout. The smudge filter is installed SYSTEM-WIDE on
+            # the runner (``/etc/gitconfig`` filter.lfs.smudge) by the
+            # ``apt-get install git-lfs`` step above, so skipping
+            # ``git lfs install --local`` is NOT sufficient to keep it
+            # from firing. Without this env var, checkout invokes
+            # ``git-lfs filter-process`` which spawns its own
+            # ``git credential fill`` to authenticate LFS-object downloads,
+            # and that subprocess does NOT inherit our auth header — it
+            # hangs on /dev/tty waiting for a username on internal/
+            # private repos. With smudge skipped, checkout writes LFS
+            # pointer files verbatim; the explicit ``git lfs fetch``
+            # below materializes them with proper auth.
+            # ``GIT_TERMINAL_PROMPT=0`` is belt-and-suspenders for any
+            # other auth path that could open /dev/tty.
+            if ! sudo env GIT_TERMINAL_PROMPT=0 GIT_LFS_SKIP_SMUDGE=1 \
+                git -C "$TMP_DIR" -c advice.detachedHead=false checkout FETCH_HEAD; then
               continue
             fi
-            if ! git_auth -C "$DEST_ROOT" lfs fetch origin --include='datasets/icc28-train_v1.0.0.h5' --exclude=""; then
-              echo "LFS fetch failed for candidate $base, trying next" >&2
+            if ! git_auth -C "$TMP_DIR" lfs fetch origin --include="$REL_PATH" --exclude=""; then
               continue
             fi
-            if ! sudo git -C "$DEST_ROOT" lfs checkout; then
-              echo "LFS checkout failed for candidate $base, trying next" >&2
+            if ! sudo git -C "$TMP_DIR" lfs checkout; then
               continue
             fi
-            POINTER_FOUND=0
-            _LFS_REL_PATH='datasets/icc28-train_v1.0.0.h5'
-            if [[ "$(sudo head -c 9 "$DEST_ROOT/$_LFS_REL_PATH" 2>/dev/null || true)" == "version h" ]]; then
-              echo "LFS materialization left a pointer at $DEST_ROOT/$_LFS_REL_PATH for candidate $base, trying next" >&2
-              POINTER_FOUND=1
+            sudo mkdir -p "$(dirname "$DEST_PATH")"
+            if ! sudo cp -f "$TMP_DIR/$REL_PATH" "$DEST_PATH"; then
+              continue
             fi
-            if [[ "$POINTER_FOUND" -ne 0 ]]; then
+            # Reject LFS pointer files (~120-byte ASCII starting with
+            # ``version https://git-lfs.github.com/spec/v1``). Shipping a
+            # pointer to the training job would crash torch.load far from
+            # the root cause.
+            if [ "$(sudo head -c 9 "$DEST_PATH" 2>/dev/null || true)" = 'version h' ]; then
+              echo "ERROR: $DEST_PATH is an LFS pointer, not actual content" >&2
+              echo "  (LFS materialization failed for $REPO_URL)" >&2
               continue
             fi
             MATERIALIZED=1
             break
           done
           if [[ "$MATERIALIZED" -ne 1 ]]; then
-            echo "Failed to materialize dataset using base URL candidates derived from: $BASE_URL_SOURCE" >&2
+            echo "Failed to materialize model file using base URL candidates derived from: $BASE_URL_SOURCE" >&2
             if [[ -z "$AUTH_HEADER" ]]; then
-              echo "  (no credentials configured: set QMBDEMO_USER and QMBDEMO_TOKEN repo secrets for internal/private repos)" >&2
+              echo "  (set QMBDEMO_USER+QMBDEMO_TOKEN repo secrets for internal/private repos)" >&2
             fi
             exit 1
           fi
 
-      - name: Checkout Validation Dataset
+      - name: "Checkout Dataset (qoherent/icc-28/datasets/icc28-test_v1.0.0.h5)"
         shell: bash
         timeout-minutes: 10
         env:
@@ -162,8 +212,16 @@ jobs:
 
           AUTH_HEADER=""
           if [[ -n "${RIAHUB_USER:-}" && -n "${RIAHUB_TOKEN:-}" ]]; then
-            AUTH_HEADER=$(printf 'Authorization: basic %s' "$(printf '%s:%s' "$RIAHUB_USER" "$RIAHUB_TOKEN" | base64 | tr -d '\n')")
+            AUTH_HEADER=$(printf 'Authorization: basic %s' \
+              "$(printf '%s:%s' "$RIAHUB_USER" "$RIAHUB_TOKEN" | base64 | tr -d '\n')")
           fi
+          # ``sudo env GIT_TERMINAL_PROMPT=0`` propagates the env var across
+          # sudo's default ``env_reset`` boundary; a bare ``sudo git`` would
+          # see an empty env on most distros' default sudoers, so the
+          # step-level ``env:`` block's GIT_TERMINAL_PROMPT=0 would NOT
+          # actually reach git child processes. Without it, git falls back
+          # to opening ``/dev/tty`` (the PTY allocated by act_runner) and
+          # prompting for credentials on a 401, hanging until timeout.
           git_auth() {
             if [[ -n "$AUTH_HEADER" ]]; then
               sudo env GIT_TERMINAL_PROMPT=0 git -c "http.extraheader=$AUTH_HEADER" "$@"
@@ -171,8 +229,9 @@ jobs:
               sudo env GIT_TERMINAL_PROMPT=0 git "$@"
             fi
           }
+
           REPO_PATH='/qoherent/icc-28.git'
-          DEST_ROOT='/opt/qmb/riahub/dataset/qoherent/icc-28/main'
+          DEST_ROOT='/opt/qmb/riahub/dataset/qoherent/icc-28/b307499b8c7150e10537d8f2f17fe108f0bc73db'
           sudo mkdir -p "$(dirname "$DEST_ROOT")"
           if ! command -v git-lfs >/dev/null 2>&1; then
             sudo apt-get update -y
@@ -188,13 +247,28 @@ jobs:
             sudo mkdir -p "$DEST_ROOT"
             sudo git -C "$DEST_ROOT" init || continue
             sudo git -C "$DEST_ROOT" remote add origin "$REPO_URL" || continue
+            # See ``_render_model_checkout`` for the rationale on skipping
+            # ``git lfs install --local`` — short version: the smudge
+            # filter it would register tries its own credential lookup
+            # during ``git checkout FETCH_HEAD`` and hangs forever on
+            # /dev/tty when the repo is internal/private. We rely on
+            # the explicit ``git lfs fetch`` (with auth) +
+            # ``git lfs checkout`` (local) pair below instead.
             sudo git -C "$DEST_ROOT" sparse-checkout init --no-cone || continue
             sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- \
               'datasets/icc28-test_v1.0.0.h5' || continue
             if ! git_auth -C "$DEST_ROOT" fetch --depth=1 origin 'b307499b8c7150e10537d8f2f17fe108f0bc73db'; then
               continue
             fi
-            if ! sudo env GIT_TERMINAL_PROMPT=0 GIT_LFS_SKIP_SMUDGE=1 git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD; then
+            # See ``_render_model_checkout`` for the rationale on
+            # ``GIT_LFS_SKIP_SMUDGE=1`` — short version: the runner has
+            # the LFS smudge filter installed system-wide
+            # (``/etc/gitconfig``), so checkout fires it and the filter's
+            # credential helper hangs on /dev/tty for internal repos.
+            # Skipping smudge here lets the explicit ``git lfs fetch``
+            # below handle materialization with proper auth.
+            if ! sudo env GIT_TERMINAL_PROMPT=0 GIT_LFS_SKIP_SMUDGE=1 \
+                git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD; then
               continue
             fi
             if ! git_auth -C "$DEST_ROOT" lfs fetch origin --include='datasets/icc28-test_v1.0.0.h5' --exclude=""; then
@@ -220,42 +294,94 @@ jobs:
           if [[ "$MATERIALIZED" -ne 1 ]]; then
             echo "Failed to materialize dataset using base URL candidates derived from: $BASE_URL_SOURCE" >&2
             if [[ -z "$AUTH_HEADER" ]]; then
-              echo "  (no credentials configured: set QMBDEMO_USER and QMBDEMO_TOKEN repo secrets for internal/private repos)" >&2
+              echo "  (set QMBDEMO_USER+QMBDEMO_TOKEN repo secrets for internal/private repos)" >&2
             fi
             exit 1
           fi
 
-      - name: Checkout configs
+      - name: Clone WavesFM
+        shell: bash
+        run: |
+          set -euo pipefail
+          mkdir -p "$(dirname "$WAVESFM_REPO_DIR")"
+          rm -rf "$WAVESFM_REPO_DIR"
+          git init "$WAVESFM_REPO_DIR"
+          cd "$WAVESFM_REPO_DIR"
+          git remote add origin https://github.com/AhmedTarek62/wavesfm.git
+          git fetch --depth 1 origin 483831732e32190b7018181b4f2cef93d755cef9
+          git checkout FETCH_HEAD
+          # CPU-runner compatibility patch. WavesFM upstream
+          # main_finetune.py hardcodes a CUDA device in two places:
+          #   line ~87: `add_argument("--device", default="cuda")`
+          #   line ~267: `torch.amp.GradScaler(device="cuda")`
+          # On a runner without an NVIDIA GPU (e.g. dev machines with
+          # only integrated Intel UHD graphics), the CPU-only torch
+          # wheel we install in "Install dependencies" raises
+          # `AssertionError: Torch not compiled with CUDA enabled` at
+          # the first `model.to(device)` call. Patch the GradScaler
+          # literal to use the argparse-provided device so passing
+          # `--device cpu` from the Train step actually takes effect.
+          # No-op if the line already uses args.device (idempotent).
+          if [[ -f main_finetune.py ]]; then
+            sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.cuda.amp.GradScaler(enabled=(args.device != "cpu"))|' main_finetune.py
+            echo "Patched main_finetune.py GradScaler for CPU/GPU device parity."
+          fi
+
+      - name: Checkout adapter and model config
         uses: actions/checkout@v5
         with:
-          sparse-checkout: .riahub/train_configs
-
-      - name: Copy configs into qmb folder
-        run: |
-          mkdir -p /opt/qmb/configs/
-          sudo cp -r ${{ github.workspace }}/.riahub/train_configs/* /opt/qmb/configs/
-
-
-      - name: List QMB project contents
-        run: |
-          ls -lha /opt/qmb
-          ls -lh /opt/qmb/wheel
-
-      - name: List Downloaded RIA Hub contents
-        run: |
-          ls -lh /opt/qmb/riahub || true
-          ls -lh /opt/qmb/riahub/model || true
-          ls -lh /opt/qmb/riahub/dataset || true
+          sparse-checkout: |
+            scripts/adapt_dataset.py
+            .riahub/train_configs/model
 
       - name: Setup Python
         uses: actions/setup-python@v6
         with:
-          python-version: "3.13"
+          python-version: "3.12"
 
-      - name: Install Python dependencies
+      - name: Install dependencies
         run: |
           set -euo pipefail
-          uv pip install --system --index-url https://pypi.org/simple --upgrade /opt/qmb/wheel/*.whl
+          # Use ``python -m pip`` rather than ``pip`` directly: actions/setup-python
+          # always puts ``python`` on PATH but the ``pip`` shim isn't guaranteed
+          # on every distro / venv layout. ``python -m pip`` always works
+          # against the active interpreter.
+          PIP="python -m pip"
+          # The pinned wavesfm SHA controls repo layout. Three common shapes:
+          #   - Pure script-only repo: requirements.txt only -> install
+          #     requirements (current pinned SHA shape)
+          #   - Python package: setup.py / setup.cfg, OR pyproject.toml
+          #     with a real [build-system] section -> editable install
+          #   - Poetry-only / Hatch-only / tool-only pyproject.toml WITHOUT
+          #     [build-system] -> `pip install -e .` would error; we install
+          #     requirements.txt if available and skip the editable install
+          # Order matters: install requirements.txt first if present (it's
+          # the most explicit dep list); editable install layered on top
+          # only when the repo is genuinely installable (has setup.py /
+          # setup.cfg, or pyproject.toml with [build-system]).
+          cd "$WAVESFM_REPO_DIR"
+          # Pre-install CPU torch + numpy<2 to make requirements.txt see them already-satisfied (saves ~600MB).
+          # torch 2.2.2 has the NumPy 1.x ABI and crashes if numpy 2.x is installed.
+          $PIP install --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple "numpy<2" "torch==2.2.2" torchvision
+          $PIP install --upgrade --force-reinstall "numpy<2"
+          INSTALLED_SOMETHING=0
+          if [[ -f requirements.txt ]]; then
+            $PIP install -r requirements.txt
+            INSTALLED_SOMETHING=1
+          fi
+          if [[ -f setup.py ]] || [[ -f setup.cfg ]] || \
+             ( [[ -f pyproject.toml ]] && grep -q "^\[build-system\]" pyproject.toml ); then
+            $PIP install -e .
+            INSTALLED_SOMETHING=1
+          fi
+          if [[ "$INSTALLED_SOMETHING" -eq 0 ]]; then
+            echo "ERROR: $WAVESFM_REPO_DIR has no installable Python metadata" >&2
+            echo "  expected: requirements.txt, setup.py, setup.cfg, or pyproject.toml with [build-system]" >&2
+            exit 1
+          fi
+          $PIP install h5py scipy
+          # Force numpy<2 again (requirements.txt may have bumped it via transitive deps).
+          $PIP install --upgrade --force-reinstall "numpy<2"
           TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu"
           TORCH_REASON="no NVIDIA GPU detected"
           if command -v nvidia-smi &> /dev/null; then
@@ -275,52 +401,101 @@ jobs:
             fi
           fi
           echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
-          uv pip install --system --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision
-          uv pip install --system --index-url https://pypi.org/simple --upgrade "onnxscript>=0.7.0" "onnx-ir>=0.2.1" onnx onnxruntime timm
+          # torch pre-installed at step head; force-reinstall disabled to avoid 755MB redownload
+          echo "Skipping torch force-reinstall ($TORCH_INDEX_URL)"
 
-      - name: Run Training Script
-        run: |
-          cd /opt/qmb
-          export PYTHONPATH="$QMB_TASK_REPO_ROOT:${PYTHONPATH:-}"
-          #source .venv/bin/activate
-          qmb train --config /opt/qmb/configs/train.yaml
-
-      - name: Collect training artifacts
-        if: always()
+      - name: Find and adapt dataset
+        shell: bash
         run: |
           set -euo pipefail
-          ARTIFACT_DIR="${{ github.workspace }}/.riahub/artifacts/training"
-          rm -rf "$ARTIFACT_DIR"
-          mkdir -p "$ARTIFACT_DIR"
-          if [[ -d "$QMB_OUTPUT_ROOT" ]]; then
-            while IFS= read -r -d '' file; do
-              rel="${file#${QMB_OUTPUT_ROOT}/}"
-              if [[ "$rel" == "$file" ]]; then
-                rel="$(basename "$file")"
-              fi
-              mkdir -p "$ARTIFACT_DIR/$(dirname "$rel")"
-              cp "$file" "$ARTIFACT_DIR/$rel"
-            done < <(
-              find "$QMB_OUTPUT_ROOT" -type f \( \
-                -path "*/checkpoints/best.pt" -o \
-                -path "*/checkpoints/best.ckpt" -o \
-                -name "*.onnx" -o \
-                -path "*/evaluation/*/confusion_matrix.png" -o \
-                -path "*/evaluation/*/parameter_sweeps/*.png" \
-              \) -print0
-            )
-          else
-            echo "QMB output root not found: $QMB_OUTPUT_ROOT"
+          # Find .h5 files but EXCLUDE LFS pointer files. The dataset
+          # checkout's sparse-checkout pattern is supposed to limit the
+          # working tree to the target file, but sparse-checkout in
+          # ``init+fetch+checkout`` mode (vs the previous
+          # ``clone --no-checkout``) doesn't always activate cleanly,
+          # leaving OTHER LFS-tracked files (e.g. sibling datasets in
+          # the same repo) as unmaterialized 120-180 byte pointer files
+          # in the working tree. The training adapter must skip those —
+          # otherwise ``find ... -name '*.h5'`` counts pointer files as
+          # real datasets and the "expected exactly one" check trips.
+          # The same ``head -c 9`` LFS-pointer test used elsewhere works
+          # here: real HDF5 files start with the HDF5 magic byte
+          # 0x89, never with the ASCII string "version h".
+          H5_CANDIDATES=()
+          while IFS= read -r f; do
+            if [[ "$(head -c 9 "$f" 2>/dev/null || true)" != "version h" ]]; then
+              H5_CANDIDATES+=("$f")
+            fi
+          done < <(find /opt/qmb/riahub/dataset -name '*.h5' -type f)
+          if [[ ${#H5_CANDIDATES[@]} -eq 0 ]]; then
+            echo "ERROR: No materialized .h5 dataset file found in /opt/qmb/riahub/dataset/" >&2
+            echo "  (any .h5 files present are LFS pointers — LFS materialization may have failed)" >&2
+            exit 1
           fi
-          echo "Collected training artifacts:"
-          find "$ARTIFACT_DIR" -type f -print | sort || true
+          if [[ ${#H5_CANDIDATES[@]} -gt 1 ]]; then
+            echo "ERROR: Multiple materialized .h5 files found (${#H5_CANDIDATES[@]}); expected exactly one:" >&2
+            printf '  %s\n' "${H5_CANDIDATES[@]}" >&2
+            exit 1
+          fi
+          INPUT_H5="${H5_CANDIDATES[0]}"
+          echo "Adapting: $INPUT_H5"
+          python ${{ github.workspace }}/scripts/adapt_dataset.py \
+            "$INPUT_H5" "$WAVESFM_ADAPTED_DATA"
 
-      - name: ⬆️ Upload training artifacts
-        if: always()
+      - name: Verify adapted dataset
+        run: |
+          python -c "
+          import h5py, json, os
+          f = h5py.File(os.environ['WAVESFM_ADAPTED_DATA'], 'r')
+          print('sample:', f['sample'].shape, f['sample'].dtype)
+          print('label:', f['label'].shape, f['label'].dtype)
+          labels = json.loads(f.attrs['labels'])
+          print('classes:', len(labels), labels[:5])
+          f.close()
+          "
+
+      - name: Train WavesFM (Linear Probe)
+        shell: bash
+        env:
+          PYTHONUNBUFFERED: "1"
+        run: |
+          set -euo pipefail
+          # Detect runtime device: CUDA if NVIDIA GPU is present (matching
+          # the Install dependencies step's torch index choice), CPU
+          # otherwise. WavesFM's main_finetune.py defaults --device to
+          # "cuda" which would hard-fail on a CPU-only runner with
+          # ``Torch not compiled with CUDA enabled`` at model.to(device).
+          # Paired with the Clone WavesFM step's GradScaler patch above.
+          DEVICE="cpu"
+          if command -v nvidia-smi >/dev/null 2>&1; then
+            DEVICE="cuda"
+          fi
+          echo "Training device: $DEVICE"
+          cd "$WAVESFM_REPO_DIR"
+          python -u main_finetune.py \
+            --task "${{ env.WAVESFM_TASK }}" \
+            --device "$DEVICE" \
+            --train-data "$WAVESFM_ADAPTED_DATA" \
+            --finetune /opt/qmb/riahub/model/wavesfm-v1p0.pth \
+            --model vit_multi_small \
+            --use-conditional-ln \
+            --class-weights \
+            --warmup-epochs 5 \
+            --val-split 0.2 \
+            --epochs "${{ env.WAVESFM_EPOCHS }}" \
+            --batch-size "${{ env.WAVESFM_BATCH_SIZE }}" \
+            --blr 1e-3 \
+            --freeze-encoder \
+            --output-dir "${{ env.WAVESFM_OUTPUT_DIR }}"
+
+      # upload-artifact@v3: matches codebase convention (see TRAIN_TEMPLATE).
+      # Upgrade to v4 is tracked as deferred/P2 work.
+      - name: Upload training artifacts
         uses: actions/upload-artifact@v3
         with:
-          name: training-artifacts
-          path: ${{ github.workspace }}/.riahub/artifacts/training
+          name: wavesfm-training-artifacts
+          path: |
+            ${{ env.WAVESFM_OUTPUT_DIR }}/best.pth
+            ${{ env.WAVESFM_OUTPUT_DIR }}/log.txt
           if-no-files-found: warn
-
-# committed at 2026-05-28T09:37:23.399702+00:00
+# committed at 2026-05-28T11:17:49.563043+00:00