Training run - 2026-05-28 07:01:57
Some checks failed
WavesFM Fine-Tuning / WavesFM-Training (push) Failing after 1s

This commit is contained in:
A ash 2026-05-28 07:01:57 -04:00
parent ceb2c3fc56
commit 3ef7c1669b

View File

@ -15,7 +15,7 @@ permissions:
jobs: jobs:
WavesFM-Training: WavesFM-Training:
runs-on: "ubuntu-24.04" runs-on: "ubuntu-latest-2080"
env: env:
WAVESFM_TASK: "rml" WAVESFM_TASK: "rml"
WAVESFM_EPOCHS: "10" WAVESFM_EPOCHS: "10"
@ -27,6 +27,9 @@ jobs:
# downstream step uses the env var, no hard-coded paths. # downstream step uses the env var, no hard-coded paths.
WAVESFM_REPO_DIR: "/opt/wavesfm/repo" WAVESFM_REPO_DIR: "/opt/wavesfm/repo"
WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5" WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5"
# Override model-download base URL to internal LAN IP
# (external riahub.ai LFS endpoint is unreachable from runners).
RIAHUB_BASE_URL: "http://192.168.0.170:3000"
steps: steps:
- name: Display basic runner info - name: Display basic runner info
run: | run: |
@ -294,7 +297,7 @@ jobs:
# `--device cpu` from the Train step actually takes effect. # `--device cpu` from the Train step actually takes effect.
# No-op if the line already uses args.device (idempotent). # No-op if the line already uses args.device (idempotent).
if [[ -f main_finetune.py ]]; then if [[ -f main_finetune.py ]]; then
sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.amp.GradScaler(device=args.device, enabled=(args.device != "cpu"))|' main_finetune.py sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.cuda.amp.GradScaler(enabled=(args.device != "cpu"))|' main_finetune.py
echo "Patched main_finetune.py GradScaler for CPU/GPU device parity." echo "Patched main_finetune.py GradScaler for CPU/GPU device parity."
fi fi
@ -331,6 +334,10 @@ jobs:
# only when the repo is genuinely installable (has setup.py / # only when the repo is genuinely installable (has setup.py /
# setup.cfg, or pyproject.toml with [build-system]). # setup.cfg, or pyproject.toml with [build-system]).
cd "$WAVESFM_REPO_DIR" cd "$WAVESFM_REPO_DIR"
# Pre-install CPU torch + numpy<2 to make requirements.txt see them already-satisfied (saves ~600MB).
# torch 2.2.2 has the NumPy 1.x ABI and crashes if numpy 2.x is installed.
$PIP install --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple "numpy<2" "torch==2.2.2" torchvision
$PIP install --upgrade --force-reinstall "numpy<2"
INSTALLED_SOMETHING=0 INSTALLED_SOMETHING=0
if [[ -f requirements.txt ]]; then if [[ -f requirements.txt ]]; then
$PIP install -r requirements.txt $PIP install -r requirements.txt
@ -347,6 +354,8 @@ jobs:
exit 1 exit 1
fi fi
$PIP install h5py scipy $PIP install h5py scipy
# Force numpy<2 again (requirements.txt may have bumped it via transitive deps).
$PIP install --upgrade --force-reinstall "numpy<2"
TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu" TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu"
TORCH_REASON="no NVIDIA GPU detected" TORCH_REASON="no NVIDIA GPU detected"
if command -v nvidia-smi &> /dev/null; then if command -v nvidia-smi &> /dev/null; then
@ -366,7 +375,8 @@ jobs:
fi fi
fi fi
echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})." echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
$PIP install --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision # torch pre-installed at step head; force-reinstall disabled to avoid 755MB redownload
echo "Skipping torch force-reinstall ($TORCH_INDEX_URL)"
- name: Find and adapt dataset - name: Find and adapt dataset
shell: bash shell: bash
@ -462,4 +472,4 @@ jobs:
${{ env.WAVESFM_OUTPUT_DIR }}/best.pth ${{ env.WAVESFM_OUTPUT_DIR }}/best.pth
${{ env.WAVESFM_OUTPUT_DIR }}/log.txt ${{ env.WAVESFM_OUTPUT_DIR }}/log.txt
if-no-files-found: warn if-no-files-found: warn
# committed at 2026-05-28T10:47:45.318818+00:00 # committed at 2026-05-28T11:01:57.936314+00:00