Training run - 2026-05-28 07:01:57
Some checks failed
WavesFM Fine-Tuning / WavesFM-Training (push) Failing after 1s
Some checks failed
WavesFM Fine-Tuning / WavesFM-Training (push) Failing after 1s
This commit is contained in:
parent
ceb2c3fc56
commit
3ef7c1669b
|
|
@ -15,7 +15,7 @@ permissions:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
WavesFM-Training:
|
WavesFM-Training:
|
||||||
runs-on: "ubuntu-24.04"
|
runs-on: "ubuntu-latest-2080"
|
||||||
env:
|
env:
|
||||||
WAVESFM_TASK: "rml"
|
WAVESFM_TASK: "rml"
|
||||||
WAVESFM_EPOCHS: "10"
|
WAVESFM_EPOCHS: "10"
|
||||||
|
|
@ -27,6 +27,9 @@ jobs:
|
||||||
# downstream step uses the env var, no hard-coded paths.
|
# downstream step uses the env var, no hard-coded paths.
|
||||||
WAVESFM_REPO_DIR: "/opt/wavesfm/repo"
|
WAVESFM_REPO_DIR: "/opt/wavesfm/repo"
|
||||||
WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5"
|
WAVESFM_ADAPTED_DATA: "/opt/wavesfm/adapted_data.h5"
|
||||||
|
# Override model-download base URL to internal LAN IP
|
||||||
|
# (external riahub.ai LFS endpoint is unreachable from runners).
|
||||||
|
RIAHUB_BASE_URL: "http://192.168.0.170:3000"
|
||||||
steps:
|
steps:
|
||||||
- name: Display basic runner info
|
- name: Display basic runner info
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -294,7 +297,7 @@ jobs:
|
||||||
# `--device cpu` from the Train step actually takes effect.
|
# `--device cpu` from the Train step actually takes effect.
|
||||||
# No-op if the line already uses args.device (idempotent).
|
# No-op if the line already uses args.device (idempotent).
|
||||||
if [[ -f main_finetune.py ]]; then
|
if [[ -f main_finetune.py ]]; then
|
||||||
sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.amp.GradScaler(device=args.device, enabled=(args.device != "cpu"))|' main_finetune.py
|
sed -i 's|torch\.amp\.GradScaler(device="cuda")|torch.cuda.amp.GradScaler(enabled=(args.device != "cpu"))|' main_finetune.py
|
||||||
echo "Patched main_finetune.py GradScaler for CPU/GPU device parity."
|
echo "Patched main_finetune.py GradScaler for CPU/GPU device parity."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -331,6 +334,10 @@ jobs:
|
||||||
# only when the repo is genuinely installable (has setup.py /
|
# only when the repo is genuinely installable (has setup.py /
|
||||||
# setup.cfg, or pyproject.toml with [build-system]).
|
# setup.cfg, or pyproject.toml with [build-system]).
|
||||||
cd "$WAVESFM_REPO_DIR"
|
cd "$WAVESFM_REPO_DIR"
|
||||||
|
# Pre-install CPU torch + numpy<2 to make requirements.txt see them already-satisfied (saves ~600MB).
|
||||||
|
# torch 2.2.2 has the NumPy 1.x ABI and crashes if numpy 2.x is installed.
|
||||||
|
$PIP install --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple "numpy<2" "torch==2.2.2" torchvision
|
||||||
|
$PIP install --upgrade --force-reinstall "numpy<2"
|
||||||
INSTALLED_SOMETHING=0
|
INSTALLED_SOMETHING=0
|
||||||
if [[ -f requirements.txt ]]; then
|
if [[ -f requirements.txt ]]; then
|
||||||
$PIP install -r requirements.txt
|
$PIP install -r requirements.txt
|
||||||
|
|
@ -347,6 +354,8 @@ jobs:
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
$PIP install h5py scipy
|
$PIP install h5py scipy
|
||||||
|
# Force numpy<2 again (requirements.txt may have bumped it via transitive deps).
|
||||||
|
$PIP install --upgrade --force-reinstall "numpy<2"
|
||||||
TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
TORCH_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
||||||
TORCH_REASON="no NVIDIA GPU detected"
|
TORCH_REASON="no NVIDIA GPU detected"
|
||||||
if command -v nvidia-smi &> /dev/null; then
|
if command -v nvidia-smi &> /dev/null; then
|
||||||
|
|
@ -366,7 +375,8 @@ jobs:
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
|
echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
|
||||||
$PIP install --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision
|
# torch pre-installed at step head; force-reinstall disabled to avoid 755MB redownload
|
||||||
|
echo "Skipping torch force-reinstall ($TORCH_INDEX_URL)"
|
||||||
|
|
||||||
- name: Find and adapt dataset
|
- name: Find and adapt dataset
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
@ -462,4 +472,4 @@ jobs:
|
||||||
${{ env.WAVESFM_OUTPUT_DIR }}/best.pth
|
${{ env.WAVESFM_OUTPUT_DIR }}/best.pth
|
||||||
${{ env.WAVESFM_OUTPUT_DIR }}/log.txt
|
${{ env.WAVESFM_OUTPUT_DIR }}/log.txt
|
||||||
if-no-files-found: warn
|
if-no-files-found: warn
|
||||||
# committed at 2026-05-28T10:47:45.318818+00:00
|
# committed at 2026-05-28T11:01:57.936314+00:00
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user