Re-render workflow with dataset download_items + dawson-standard CPU runner

This commit is contained in:
P Roman Pope 2026-05-28 02:36:43 -04:00
parent c5ed26372d
commit 0844b20ee4

View File

@ -15,7 +15,7 @@ permissions:
jobs:
WavesFM-Training:
runs-on: "ubuntu-latest"
runs-on: "dawson-standard"
env:
WAVESFM_TASK: "rml"
WAVESFM_EPOCHS: "3"
@ -27,9 +27,7 @@ jobs:
# downstream step uses the env var, no hard-coded paths.
WAVESFM_REPO_DIR: "/opt/wavesfm/repo"
WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5"
# Override the model-download base URL to logan's internal LAN IP.
# External riahub.ai LFS batch endpoint is unreachable from runner;
# internal IP works directly.
# Override model-download base URL to internal LAN IP (external proxy LFS unreachable from runner)
RIAHUB_BASE_URL: "http://192.168.0.170:3000"
steps:
- name: Display basic runner info
@ -183,6 +181,123 @@ jobs:
exit 1
fi
- name: "Checkout Dataset (qoherent/icc-demo/icc_canary_2026_05_28-v1.0.0.h5)"
shell: bash
timeout-minutes: 10
env:
RIAHUB_USER: ${{ secrets.QMBDEMO_USER }}
RIAHUB_TOKEN: ${{ secrets.QMBDEMO_TOKEN }}
GIT_TERMINAL_PROMPT: "0"
run: |
set -euo pipefail
DEFAULT_BASE_URL='https://riahub.ai'
BASE_URL_SOURCE=${RIAHUB_BASE_URL:-$DEFAULT_BASE_URL}
BASE_URL_SOURCE="${BASE_URL_SOURCE%/}"
build_base_candidates() {
local raw="$1"
if [[ "$raw" =~ ^https?:// ]]; then
echo "$raw"
if [[ "$raw" == http://* ]]; then
echo "https://${raw#http://}"
elif [[ "$raw" == https://* ]]; then
echo "http://${raw#https://}"
fi
return
fi
echo "https://$raw"
echo "http://$raw"
}
AUTH_HEADER=""
if [[ -n "${RIAHUB_USER:-}" && -n "${RIAHUB_TOKEN:-}" ]]; then
AUTH_HEADER=$(printf 'Authorization: basic %s' \
"$(printf '%s:%s' "$RIAHUB_USER" "$RIAHUB_TOKEN" | base64 | tr -d '\n')")
fi
# ``sudo env GIT_TERMINAL_PROMPT=0`` propagates the env var across
# sudo's default ``env_reset`` boundary; a bare ``sudo git`` would
# see an empty env on most distros' default sudoers, so the
# step-level ``env:`` block's GIT_TERMINAL_PROMPT=0 would NOT
# actually reach git child processes. Without it, git falls back
# to opening ``/dev/tty`` (the PTY allocated by act_runner) and
# prompting for credentials on a 401, hanging until timeout.
git_auth() {
if [[ -n "$AUTH_HEADER" ]]; then
sudo env GIT_TERMINAL_PROMPT=0 git -c "http.extraheader=$AUTH_HEADER" "$@"
else
sudo env GIT_TERMINAL_PROMPT=0 git "$@"
fi
}
REPO_PATH='/qoherent/icc-demo.git'
DEST_ROOT='/opt/qmb/riahub/dataset/qoherent/icc-demo/9f87fa9fe2badd314ad81379064e236ea494e89d'
sudo mkdir -p "$(dirname "$DEST_ROOT")"
if ! command -v git-lfs >/dev/null 2>&1; then
sudo apt-get update -y
sudo apt-get install -y git-lfs
fi
mapfile -t BASE_CANDIDATES < <(build_base_candidates "$BASE_URL_SOURCE")
MATERIALIZED=0
for base in "${BASE_CANDIDATES[@]}"; do
base="${base%/}"
REPO_URL="${base}${REPO_PATH}"
echo "Fetching dataset from $REPO_URL"
sudo rm -rf "$DEST_ROOT"
sudo mkdir -p "$DEST_ROOT"
sudo git -C "$DEST_ROOT" init || continue
sudo git -C "$DEST_ROOT" remote add origin "$REPO_URL" || continue
# See ``_render_model_checkout`` for the rationale on skipping
# ``git lfs install --local`` — short version: the smudge
# filter it would register tries its own credential lookup
# during ``git checkout FETCH_HEAD`` and hangs forever on
# /dev/tty when the repo is internal/private. We rely on
# the explicit ``git lfs fetch`` (with auth) +
# ``git lfs checkout`` (local) pair below instead.
sudo git -C "$DEST_ROOT" sparse-checkout init --no-cone || continue
sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- \
'icc_canary_2026_05_28-v1.0.0.h5' || continue
if ! git_auth -C "$DEST_ROOT" fetch --depth=1 origin '9f87fa9fe2badd314ad81379064e236ea494e89d'; then
continue
fi
# See ``_render_model_checkout`` for the rationale on
# ``GIT_LFS_SKIP_SMUDGE=1`` — short version: the runner has
# the LFS smudge filter installed system-wide
# (``/etc/gitconfig``), so checkout fires it and the filter's
# credential helper hangs on /dev/tty for internal repos.
# Skipping smudge here lets the explicit ``git lfs fetch``
# below handle materialization with proper auth.
if ! sudo env GIT_TERMINAL_PROMPT=0 GIT_LFS_SKIP_SMUDGE=1 \
git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD; then
continue
fi
if ! git_auth -C "$DEST_ROOT" lfs fetch origin --include='icc_canary_2026_05_28-v1.0.0.h5' --exclude=""; then
echo "LFS fetch failed for candidate $base, trying next" >&2
continue
fi
if ! sudo git -C "$DEST_ROOT" lfs checkout; then
echo "LFS checkout failed for candidate $base, trying next" >&2
continue
fi
POINTER_FOUND=0
_LFS_REL_PATH='icc_canary_2026_05_28-v1.0.0.h5'
if [[ "$(sudo head -c 9 "$DEST_ROOT/$_LFS_REL_PATH" 2>/dev/null || true)" == "version h" ]]; then
echo "LFS materialization left a pointer at $DEST_ROOT/$_LFS_REL_PATH for candidate $base, trying next" >&2
POINTER_FOUND=1
fi
if [[ "$POINTER_FOUND" -ne 0 ]]; then
continue
fi
MATERIALIZED=1
break
done
if [[ "$MATERIALIZED" -ne 1 ]]; then
echo "Failed to materialize dataset using base URL candidates derived from: $BASE_URL_SOURCE" >&2
if [[ -z "$AUTH_HEADER" ]]; then
echo " (set QMBDEMO_USER+QMBDEMO_TOKEN repo secrets for internal/private repos)" >&2
fi
exit 1
fi
- name: Clone WavesFM
shell: bash
run: |
@ -375,4 +490,4 @@ jobs:
${{ env.WAVESFM_OUTPUT_DIR }}/best.pth
${{ env.WAVESFM_OUTPUT_DIR }}/log.txt
if-no-files-found: warn
# re-triggered after org transfer at 2026-05-28T05:53:45+00:00
# committed at 2026-05-28T06:36:42.903115+00:00