From ceb2c3fc569c01170bbccbe9cdcc18cbc85c7081 Mon Sep 17 00:00:00 2001
From: ash <ash@qoherent.ai>
Date: Thu, 28 May 2026 06:47:45 -0400
Subject: [PATCH] Training run - 2026-05-28 06:47:45

---
 .riahub/train_configs/model/model.yaml |  5 +++
 .riahub/workflows/train.yaml           | 60 +++++---------------------
 2 files changed, 16 insertions(+), 49 deletions(-)
 create mode 100644 .riahub/train_configs/model/model.yaml

diff --git a/.riahub/train_configs/model/model.yaml b/.riahub/train_configs/model/model.yaml
new file mode 100644
index 0000000..4183b59
--- /dev/null
+++ b/.riahub/train_configs/model/model.yaml
@@ -0,0 +1,5 @@
+source: wavesfm
+name: WavesFM Linear Probe
+task: rml
+epochs: 10
+batch_size: 2048
diff --git a/.riahub/workflows/train.yaml b/.riahub/workflows/train.yaml
index af8db83..e1982d1 100644
--- a/.riahub/workflows/train.yaml
+++ b/.riahub/workflows/train.yaml
@@ -15,11 +15,11 @@ permissions:
 
 jobs:
   WavesFM-Training:
-    runs-on: "ubuntu-latest"
+    runs-on: "ubuntu-24.04"
     env:
       WAVESFM_TASK: "rml"
-      WAVESFM_EPOCHS: "3"
-      WAVESFM_BATCH_SIZE: "16"
+      WAVESFM_EPOCHS: "10"
+      WAVESFM_BATCH_SIZE: "2048"
       WAVESFM_OUTPUT_DIR: "/opt/wavesfm/output"
       # Single source of truth for the cloned WavesFM repo location.
       # Referenced as ${{ env.WAVESFM_REPO_DIR }} in steps. To relocate
@@ -27,7 +27,6 @@ jobs:
       # downstream step uses the env var, no hard-coded paths.
       WAVESFM_REPO_DIR: "/opt/wavesfm/repo"
       WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5"
-      RIAHUB_BASE_URL: "http://192.168.0.170:3000"
     steps:
       - name: Display basic runner info
         run: |
@@ -42,7 +41,6 @@ jobs:
             echo "No NVIDIA GPU available."
           fi
 
-
       - name: "Download Model (qoherent/wavesfm-base/wavesfm-v1p0.pth)"
         shell: bash
         timeout-minutes: 4
@@ -179,8 +177,7 @@ jobs:
             fi
             exit 1
           fi
-
-      - name: "Checkout Dataset (qoherent/icc-demo/icc_canary_2026_05_28-v1.0.0.h5)"
+      - name: Checkout Training Dataset
         shell: bash
         timeout-minutes: 10
         env:
@@ -210,16 +207,8 @@ jobs:
 
           AUTH_HEADER=""
           if [[ -n "${RIAHUB_USER:-}" && -n "${RIAHUB_TOKEN:-}" ]]; then
-            AUTH_HEADER=$(printf 'Authorization: basic %s' \
-              "$(printf '%s:%s' "$RIAHUB_USER" "$RIAHUB_TOKEN" | base64 | tr -d '\n')")
+            AUTH_HEADER=$(printf 'Authorization: basic %s' "$(printf '%s:%s' "$RIAHUB_USER" "$RIAHUB_TOKEN" | base64 | tr -d '\n')")
           fi
-          # ``sudo env GIT_TERMINAL_PROMPT=0`` propagates the env var across
-          # sudo's default ``env_reset`` boundary; a bare ``sudo git`` would
-          # see an empty env on most distros' default sudoers, so the
-          # step-level ``env:`` block's GIT_TERMINAL_PROMPT=0 would NOT
-          # actually reach git child processes. Without it, git falls back
-          # to opening ``/dev/tty`` (the PTY allocated by act_runner) and
-          # prompting for credentials on a 401, hanging until timeout.
           git_auth() {
             if [[ -n "$AUTH_HEADER" ]]; then
               sudo env GIT_TERMINAL_PROMPT=0 git -c "http.extraheader=$AUTH_HEADER" "$@"
@@ -227,9 +216,8 @@ jobs:
               sudo env GIT_TERMINAL_PROMPT=0 git "$@"
             fi
           }
-
           REPO_PATH='/qoherent/icc-demo.git'
-          DEST_ROOT='/opt/qmb/riahub/dataset/qoherent/icc-demo/9f87fa9fe2badd314ad81379064e236ea494e89d'
+          DEST_ROOT='/opt/qmb/riahub/dataset/qoherent/icc-demo/main'
           sudo mkdir -p "$(dirname "$DEST_ROOT")"
           if ! command -v git-lfs >/dev/null 2>&1; then
             sudo apt-get update -y
@@ -245,28 +233,13 @@ jobs:
             sudo mkdir -p "$DEST_ROOT"
             sudo git -C "$DEST_ROOT" init || continue
             sudo git -C "$DEST_ROOT" remote add origin "$REPO_URL" || continue
-            # See ``_render_model_checkout`` for the rationale on skipping
-            # ``git lfs install --local`` — short version: the smudge
-            # filter it would register tries its own credential lookup
-            # during ``git checkout FETCH_HEAD`` and hangs forever on
-            # /dev/tty when the repo is internal/private. We rely on
-            # the explicit ``git lfs fetch`` (with auth) +
-            # ``git lfs checkout`` (local) pair below instead.
             sudo git -C "$DEST_ROOT" sparse-checkout init --no-cone || continue
             sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- \
               'icc_canary_2026_05_28-v1.0.0.h5' || continue
             if ! git_auth -C "$DEST_ROOT" fetch --depth=1 origin '9f87fa9fe2badd314ad81379064e236ea494e89d'; then
               continue
             fi
-            # See ``_render_model_checkout`` for the rationale on
-            # ``GIT_LFS_SKIP_SMUDGE=1`` — short version: the runner has
-            # the LFS smudge filter installed system-wide
-            # (``/etc/gitconfig``), so checkout fires it and the filter's
-            # credential helper hangs on /dev/tty for internal repos.
-            # Skipping smudge here lets the explicit ``git lfs fetch``
-            # below handle materialization with proper auth.
-            if ! sudo env GIT_TERMINAL_PROMPT=0 GIT_LFS_SKIP_SMUDGE=1 \
-                git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD; then
+            if ! sudo env GIT_TERMINAL_PROMPT=0 GIT_LFS_SKIP_SMUDGE=1 git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD; then
               continue
             fi
             if ! git_auth -C "$DEST_ROOT" lfs fetch origin --include='icc_canary_2026_05_28-v1.0.0.h5' --exclude=""; then
@@ -292,7 +265,7 @@ jobs:
           if [[ "$MATERIALIZED" -ne 1 ]]; then
             echo "Failed to materialize dataset using base URL candidates derived from: $BASE_URL_SOURCE" >&2
             if [[ -z "$AUTH_HEADER" ]]; then
-              echo "  (set QMBDEMO_USER+QMBDEMO_TOKEN repo secrets for internal/private repos)" >&2
+              echo "  (no credentials configured: set QMBDEMO_USER and QMBDEMO_TOKEN repo secrets for internal/private repos)" >&2
             fi
             exit 1
           fi
@@ -321,7 +294,7 @@ jobs:
           # `--device cpu` from the Train step actually takes effect.
           # No-op if the line already uses args.device (idempotent).
           if [[ -f main_finetune.py ]]; then
-            sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.cuda.amp.GradScaler(enabled=(args.device != "cpu"))|' main_finetune.py
+            sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.amp.GradScaler(device=args.device, enabled=(args.device != "cpu"))|' main_finetune.py
             echo "Patched main_finetune.py GradScaler for CPU/GPU device parity."
           fi
 
@@ -358,12 +331,6 @@ jobs:
           # only when the repo is genuinely installable (has setup.py /
           # setup.cfg, or pyproject.toml with [build-system]).
           cd "$WAVESFM_REPO_DIR"
-          # FAST-PATH: install CPU torch from pytorch.org/whl/cpu FIRST (~200MB).
-          # This makes torch==X already-satisfied so requirements.txt does not
-          # pull the 755MB manylinux wheel with bundled CUDA from PyPI default.
-          $PIP install --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple "numpy<2" "torch==2.2.2" torchvision
-          # Also ensure numpy<2 is pinned for the requirements.txt install below
-          $PIP install --upgrade --force-reinstall "numpy<2"
           INSTALLED_SOMETHING=0
           if [[ -f requirements.txt ]]; then
             $PIP install -r requirements.txt
@@ -380,10 +347,6 @@ jobs:
             exit 1
           fi
           $PIP install h5py scipy
-          # After requirements.txt, force numpy back to <2 (torch 2.2.2 has
-          # NumPy 1.x ABI; transitive deps in requirements.txt would
-          # otherwise leave numpy 2.x in place and crash at runtime).
-          $PIP install --upgrade --force-reinstall "numpy<2"
           TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu"
           TORCH_REASON="no NVIDIA GPU detected"
           if command -v nvidia-smi &> /dev/null; then
@@ -403,8 +366,7 @@ jobs:
             fi
           fi
           echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
-          # torch was pre-installed at the top of this step; no force-reinstall needed.
-          echo "Skipping torch force-reinstall (already installed at step head): $TORCH_INDEX_URL"
+          $PIP install --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision
 
       - name: Find and adapt dataset
         shell: bash
@@ -500,4 +462,4 @@ jobs:
             ${{ env.WAVESFM_OUTPUT_DIR }}/best.pth
             ${{ env.WAVESFM_OUTPUT_DIR }}/log.txt
           if-no-files-found: warn
-# committed at 2026-05-28T06:39:26.910514+00:00
+# committed at 2026-05-28T10:47:45.318818+00:00