Training run - 2026-05-28 07:01:57

This commit is contained in:
A ash 2026-05-28 07:01:57 -04:00
parent ceb2c3fc56
commit 3ef7c1669b

View File

@ -15,7 +15,7 @@ permissions:
jobs:
WavesFM-Training:
runs-on: "ubuntu-24.04"
runs-on: "ubuntu-latest-2080"
env:
WAVESFM_TASK: "rml"
WAVESFM_EPOCHS: "10"
@ -27,6 +27,9 @@ jobs:
# downstream step uses the env var, no hard-coded paths.
WAVESFM_REPO_DIR: "/opt/wavesfm/repo"
WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5"
# Override model-download base URL to internal LAN IP
# (external riahub.ai LFS endpoint is unreachable from runners).
RIAHUB_BASE_URL: "http://192.168.0.170:3000"
steps:
- name: Display basic runner info
run: |
@ -294,7 +297,7 @@ jobs:
# `--device cpu` from the Train step actually takes effect.
# No-op if the line already uses args.device (idempotent).
if [[ -f main_finetune.py ]]; then
sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.amp.GradScaler(device=args.device, enabled=(args.device != "cpu"))|' main_finetune.py
sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.cuda.amp.GradScaler(enabled=(args.device != "cpu"))|' main_finetune.py
echo "Patched main_finetune.py GradScaler for CPU/GPU device parity."
fi
@ -331,6 +334,10 @@ jobs:
# only when the repo is genuinely installable (has setup.py /
# setup.cfg, or pyproject.toml with [build-system]).
cd "$WAVESFM_REPO_DIR"
# Pre-install CPU torch + numpy<2 to make requirements.txt see them already-satisfied (saves ~600MB).
# torch 2.2.2 has the NumPy 1.x ABI and crashes if numpy 2.x is installed.
$PIP install --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple "numpy<2" "torch==2.2.2" torchvision
$PIP install --upgrade --force-reinstall "numpy<2"
INSTALLED_SOMETHING=0
if [[ -f requirements.txt ]]; then
$PIP install -r requirements.txt
@ -347,6 +354,8 @@ jobs:
exit 1
fi
$PIP install h5py scipy
# Force numpy<2 again (requirements.txt may have bumped it via transitive deps).
$PIP install --upgrade --force-reinstall "numpy<2"
TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu"
TORCH_REASON="no NVIDIA GPU detected"
if command -v nvidia-smi &> /dev/null; then
@ -366,7 +375,8 @@ jobs:
fi
fi
echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
$PIP install --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision
# torch pre-installed at step head; force-reinstall disabled to avoid 755MB redownload
echo "Skipping torch force-reinstall ($TORCH_INDEX_URL)"
- name: Find and adapt dataset
shell: bash
@ -462,4 +472,4 @@ jobs:
${{ env.WAVESFM_OUTPUT_DIR }}/best.pth
${{ env.WAVESFM_OUTPUT_DIR }}/log.txt
if-no-files-found: warn
# committed at 2026-05-28T10:47:45.318818+00:00
# committed at 2026-05-28T11:01:57.936314+00:00