updated example reference

This commit is contained in:
ben 2026-05-20 13:04:10 -04:00
parent 065271fb67
commit 1c954dcb3f
12 changed files with 294 additions and 63 deletions

3
.gitattributes vendored
View File

@ -1,2 +1,5 @@
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.sigmf-data filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.hdf5 filter=lfs diff=lfs merge=lfs -text

View File

@ -15,11 +15,17 @@ permissions:
jobs:
QMB-Training:
runs-on: "hades-4090"
# Replace with the name of your registered Gitea Actions runner.
# Find registered runners under Settings → Runners in your RIA Hub instance.
runs-on: "your-runner-name"
env:
# RIAHUB_BASE_URL is the base URL of your RIA Hub instance,
# e.g. https://riahub.example.com — set as a repository variable or secret.
RIAHUB_BASE_URL: ${{ vars.RIAHUB_BASE_URL || secrets.RIAHUB_BASE_URL || '' }}
QMB_OUTPUT_ROOT: "/opt/qmb/outputs"
QMB_TASK_REPO_ROOT: "/opt/qmb/task_repos"
steps:
- name: Display basic runner info
run: |
@ -27,8 +33,7 @@ jobs:
echo "Runner Architecture: ${{ runner.arch }}"
- name: Print CPU information
run: |
lscpu
run: lscpu
- name: Print GPU information
run: |
@ -38,11 +43,15 @@ jobs:
echo "No NVIDIA GPU available."
fi
- name: Checkout Datasets (lswersk/library-test)
# Checks out the dataset repository that contains your .h5 training files.
# Replace YOUR_ORG/YOUR_DATASET_REPO with the actual repository path on your
# RIA Hub instance (e.g. "my-team/rf-datasets").
# Replace the sparse-checkout paths and the commit hash / branch name with
# values that match your dataset layout.
- name: Checkout Datasets
env:
RIAHUB_USER: ${{ secrets.QMBDEMO_USER }}
RIAHUB_TOKEN: ${{ secrets.QMBDEMO_TOKEN }}
RIAHUB_USER: ${{ secrets.DATASET_REPO_USER }}
RIAHUB_TOKEN: ${{ secrets.DATASET_REPO_TOKEN }}
run: |
set -euo pipefail
DEFAULT_BASE_URL="http://localhost:3000"
@ -64,8 +73,21 @@ jobs:
echo "http://$raw"
}
REPO_PATH="/lswersk/library-test.git"
DEST_ROOT="/opt/qmb/riahub/dataset/lswersk/library-test/main"
# --- Edit below: set your dataset repository path and file layout ---
REPO_PATH="/YOUR_ORG/YOUR_DATASET_REPO.git"
DEST_ROOT="/opt/qmb/riahub/dataset/YOUR_ORG/YOUR_DATASET_REPO/main"
# Sparse-checkout paths: list the .h5 files you need for training
SPARSE_PATHS=(
"datasets/train.h5"
"datasets/val.h5"
"datasets/test.h5"
)
# Pin to a specific commit for reproducibility, or use a branch ref:
# FETCH_REF="main" (branch — always latest)
# FETCH_REF="<40-char-sha>" (pinned commit — recommended for training)
FETCH_REF="main"
# --- End edit ---
sudo mkdir -p "$(dirname "$DEST_ROOT")"
mapfile -t BASE_CANDIDATES < <(build_base_candidates "$BASE_URL_SOURCE")
CLONED=0
@ -81,22 +103,19 @@ jobs:
fi
done
if [[ "$CLONED" -ne 1 ]]; then
echo "Failed to clone dataset repo using base URL candidates derived from: $BASE_URL_SOURCE" >&2
echo "Failed to clone dataset repo." >&2
exit 1
fi
if ! command -v git-lfs >/dev/null 2>&1; then
sudo apt-get update -y
sudo apt-get install -y git-lfs
sudo apt-get update -y && sudo apt-get install -y git-lfs
fi
sudo git -C "$DEST_ROOT" lfs install --local || true
sudo git -C "$DEST_ROOT" sparse-checkout init --no-cone
sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- \
"datasets/tiny-pluto/test.h5" \
"datasets/tiny-pluto/train.h5" \
"datasets/tiny-pluto/val.h5"
sudo git -C "$DEST_ROOT" fetch --depth=1 origin "e4bd5193c5bb09aa23afd18e138840befefa59cd"
sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- "${SPARSE_PATHS[@]}"
sudo git -C "$DEST_ROOT" fetch --depth=1 origin "$FETCH_REF"
sudo git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD
sudo git -C "$DEST_ROOT" lfs fetch origin --include="datasets/tiny-pluto/test.h5,datasets/tiny-pluto/train.h5,datasets/tiny-pluto/val.h5" --exclude="" || true
sudo git -C "$DEST_ROOT" lfs fetch origin \
--include="$(IFS=,; echo "${SPARSE_PATHS[*]}")" --exclude="" || true
sudo git -C "$DEST_ROOT" lfs checkout || true
sudo git -C "$DEST_ROOT" remote remove origin || true
sudo git -C "$DEST_ROOT" config --local --unset-all http.extraheader || true
@ -111,16 +130,14 @@ jobs:
mkdir -p /opt/qmb/configs/
sudo cp -r ${{ github.workspace }}/.riahub/train_configs/* /opt/qmb/configs/
- name: List QMB project contents
run: |
ls -lha /opt/qmb
ls -lh /opt/qmb/wheel
- name: List Downloaded RIA Hub contents
- name: List downloaded dataset contents
run: |
ls -lh /opt/qmb/riahub || true
ls -lh /opt/qmb/riahub/model || true
ls -lh /opt/qmb/riahub/dataset || true
- name: Setup Python
@ -151,14 +168,15 @@ jobs:
fi
fi
echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
uv pip install --system --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision
uv pip install --system --index-url https://pypi.org/simple --upgrade "onnxscript>=0.7.0" "onnx-ir>=0.2.1" onnx onnxruntime timm
uv pip install --system --index-url "$TORCH_INDEX_URL" \
--upgrade --force-reinstall torch torchvision
uv pip install --system --index-url https://pypi.org/simple \
--upgrade "onnxscript>=0.7.0" "onnx-ir>=0.2.1" onnx onnxruntime timm
- name: Run Training Script
run: |
cd /opt/qmb
export PYTHONPATH="$QMB_TASK_REPO_ROOT:${PYTHONPATH:-}"
#source .venv/bin/activate
qmb train --config /opt/qmb/configs/train.yaml
- name: Collect training artifacts
@ -171,9 +189,7 @@ jobs:
if [[ -d "$QMB_OUTPUT_ROOT" ]]; then
while IFS= read -r -d '' file; do
rel="${file#${QMB_OUTPUT_ROOT}/}"
if [[ "$rel" == "$file" ]]; then
rel="$(basename "$file")"
fi
[[ "$rel" == "$file" ]] && rel="$(basename "$file")"
mkdir -p "$ARTIFACT_DIR/$(dirname "$rel")"
cp "$file" "$ARTIFACT_DIR/$rel"
done < <(
@ -191,12 +207,10 @@ jobs:
echo "Collected training artifacts:"
find "$ARTIFACT_DIR" -type f -print | sort || true
- name: ⬆️ Upload training artifacts
- name: Upload training artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: training-artifacts
path: ${{ github.workspace }}/.riahub/artifacts/training
if-no-files-found: warn
# committed at 2026-04-24T00:04:24.549781+00:00

BIN
Datasets/example_radio_dataset.h5 (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Datasets/example_synthetic_dataset.h5 (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -23,31 +23,34 @@ RIA Hub is a collaborative platform for RF and machine learning workflows. It co
```
RIA_Example/
├── recordings/
│ └── example_iq_recording.h5 # Raw IQ capture (input to Curator)
├── Recordings/
│ ├── bpsk_915MHz/
│ │ ├── device_A_bpsk.sigmf-data # Binary IQ samples (cf32_le, tracked via Git LFS)
│ │ └── device_A_bpsk.sigmf-meta # JSON metadata: sample rate, frequency, labels
│ ├── qpsk_915MHz/
│ │ ├── device_A_qpsk.sigmf-data
│ │ └── device_A_qpsk.sigmf-meta
│ └── qam16_2400MHz/
│ ├── device_B_qam16.sigmf-data
│ └── device_B_qam16.sigmf-meta
├── datasets/
│ ├── example_radio_dataset.h5 # Curated radio dataset (Curator output / Model Trainer input)
│ └── example_synthetic_dataset.h5 # Synthetically generated dataset (Generator output)
├── Datasets/
│ ├── example_radio_dataset.h5 # Curated from recordings above (Curator output / Model Trainer input)
│ └── example_synthetic_dataset.h5 # Synthetically generated (Generator output)
├── models/
│ ├── example_model.ckpt # PyTorch Module (Model Trainer input / output)
├── Models/
│ ├── example_model.ckpt # PyTorch checkpoint (Model Trainer output)
│ └── example_model.onnx # Exported ONNX model (Screens / Application Packager input)
├── applications/
│ └── example_application.json # Application Composer output, can be built in RIA Screens (Application Packager)
├── curator-configs/
│ └── example_curator_config.json # Reference configuration for the Curator tool
├── .ria/
│ ├── train.yaml # Example Model Trainer workflow (committed to .ria/)
│ ├── example_application.yaml # Example Application Composer Build workflow (committed to .ria/)
└── curator-configs/
└── example_curator_config.json # Example curation configuration for the Curator tool
└── .ria/
└── train.yaml # Example Model Trainer workflow (Gitea Actions)
```
All binary files (`.sigmf-data`, `.h5`, `.ckpt`, `.onnx`) are tracked with Git LFS. Clone with LFS enabled to get the actual file content.
---
## Tool Walkthroughs
@ -56,17 +59,17 @@ RIA_Example/
The Library is a cross-repository browser for all RF and ML assets on the platform. It automatically discovers files pushed to any repository you have access to.
**To explore the example recording:**
1. Import `recordings/example_iq_recording.h5` into any repository via **New Repository → Upload Files** or by pushing via Git LFS.
**To explore the example recordings:**
1. Push this repository (or clone it into your RIA Hub instance). The SigMF file pairs in `Recordings/` are tracked via Git LFS and will be indexed automatically on push.
2. Navigate to **Library** in the top navigation bar.
3. Select the **Recordings** tab. Your file will appear with metadata and a spectrogram thumbnail.
4. Click the file to open the detail view — you can inspect signal properties, view the spectrogram, and copy the file to another repository.
3. Select the **Recordings** tab. Each `.sigmf-data`/`.sigmf-meta` pair appears as a row with metadata and a spectrogram thumbnail.
4. Click any row to open the recording inspector — view the spectrogram, constellation, PSD, and time series tabs.
**Supported asset types in the Library:**
| Type | Extension | Description |
|------|-----------|-------------|
| Recording | `.h5` / `.hdf5` | Raw IQ capture files |
| Recording | `.sigmf-data` + `.sigmf-meta` | SigMF IQ capture pairs — preferred format for the Library and Curator |
| Radio Dataset | `.h5` / `.hdf5` | Labelled, curated training datasets |
| PyTorch Module | `.py` | PyTorch model definitions with a nn.Module class |
| PyTorch State Dict | `.pt` / `.pth` | Model weights / state dictionaries |
@ -79,14 +82,14 @@ The Library is a cross-repository browser for all RF and ML assets on the platfo
The Curator takes raw IQ recordings and produces a labelled, ready-to-train HDF5 dataset. It applies a configurable DSP pipeline: slicing, quality filtering, and optional augmentation.
**Example files:** `recordings/example_iq_recording.h5`, `curator-configs/example_curator_config.json`
**Expected output:** `datasets/example_radio_dataset.h5`
**Example files:** `Recordings/` (three SigMF pairs), `curator-configs/example_curator_config.json`
**Expected output:** `Datasets/example_radio_dataset.h5`
**Steps:**
1. Upload `example_iq_recording.h5` to a repository (the Curator reads from the Library).
1. Push the recordings to a repository — the Curator reads from the Library once files are indexed.
2. Go to **Dataset Manager → Curator**.
3. Select your recording from the Library panel on the left.
4. Configure the pipeline using the settings below, or load `example_curator_config.json` as a reference:
3. Select your recordings from the Library panel on the left.
4. Configure the pipeline using the settings below, or use `curator-configs/example_curator_config.json` as a reference:
- **Data type:** `IQ`
- **Slicer:** `simple` — slice length `1024`
- **Qualifier:** `rms` — minimum threshold `0.01` (filters out silent/noise-only slices)
@ -286,16 +289,34 @@ Projects group your datasets, models, training runs, and deployed applications i
## File Format Reference
### SigMF Recording (`.sigmf-data` + `.sigmf-meta`)
Each recording is a pair of files committed together:
| File | Contains |
|------|---------|
| `.sigmf-data` | Raw IQ samples as interleaved float32 little-endian (`cf32_le`) — I[0], Q[0], I[1], Q[1], … |
| `.sigmf-meta` | JSON metadata: `global` (sample rate, frequency, hardware, RIA-specific fields), `captures` (capture start info), `annotations` (time/frequency regions with labels) |
The `annotations[].core:label` field is what the Curator reads to assign class labels when slicing a recording into a dataset. See `Recordings/bpsk_915MHz/device_A_bpsk.sigmf-meta` for an annotated example.
---
### HDF5 Radio Dataset (`.h5`)
Curated and generated datasets share a common HDF5 layout:
```
dataset.h5
├── data/ # IQ samples, shape [N, slice_length, 2] (float32)
├── labels/ # Integer class labels, shape [N]
├── metadata/ # Recording metadata carried through from source
└── attrs # Dataset-level attributes: name, version, radio_task, backend
├── data # IQ samples, shape (N, 2, L) float32
│ # N = number of slices
│ # 2 = I channel (index 0) and Q channel (index 1)
│ # L = slice length in samples (e.g. 1024)
├── metadata/
│ ├── metadata # Per-slice structured array: rec_id, label, modulation,
│ │ # snr_db, center_freq, sample_rate, device_id, split, ...
│ └── about # Dataset-level: author, date_of_data_generation, dataset_version
└── attrs # File-level: license, source_type, collection metadata
```
@ -332,6 +353,6 @@ dataset.h5
## Getting Help
- Full platform documentation is available in the **Docs** section of RIA Hub.
- Full platform documentation: [RIA Hub Docs](https://qoherent.github.io/riahub-docs/) — covers every tool with step-by-step guides and format references.
- Open an issue in this repository if an example file is missing, broken, or out of date.
- For tool-specific questions, use the in-app help panels (the `?` icon on each tool page).

BIN
Recordings/bpsk_915MHz/device_A_bpsk.sigmf-data (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,45 @@
{
"global": {
"core:datatype": "cf32_le",
"core:extensions": [
{
"name": "ria",
"optional": true,
"version": "0.1.4"
}
],
"core:num_channels": 1,
"core:offset": 0,
"core:recorder": "RIA",
"core:sample_rate": 1000000.0,
"core:sha512": "f1dadadeaa718c9f4a47e787341d5b390d47837751b77244866ff8823d433fd5d7ae133a2892264288b26eafb045cc90a18b2377e3a40d7e8a15843f61e22830",
"core:version": "1.8.0",
"ria:campaign": "example_campaign_001",
"ria:capture_timestamp": 1737000000.0,
"ria:center_frequency": 915000000.0,
"ria:device_id": "synthetic_device_A",
"ria:gain": 30.0,
"ria:rec_id": "9f230648a7fb04a8db994d738e6aae88739a53f7fbe86e0629e157748a99e364",
"ria:sample_rate": 1000000.0,
"ria:source": "synthetic",
"ria:step_duration_s": 0.0655,
"ria:step_label": "device_A_bpsk",
"ria:tx_modulation": "BPSK",
"ria:tx_snr_db": 20.0
},
"captures": [
{
"core:datetime": "2026-01-15T10:00:00.000000Z",
"core:frequency": 915000000.0,
"core:sample_start": 0
}
],
"annotations": [
{
"core:sample_start": 0,
"core:sample_count": 65536,
"core:label": "BPSK",
"core:comment": "Synthetic BPSK signal, SNR=20.0 dB"
}
]
}

BIN
Recordings/qam16_2400MHz/device_B_qam16.sigmf-data (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,45 @@
{
"global": {
"core:datatype": "cf32_le",
"core:extensions": [
{
"name": "ria",
"optional": true,
"version": "0.1.4"
}
],
"core:num_channels": 1,
"core:offset": 0,
"core:recorder": "RIA",
"core:sample_rate": 2000000.0,
"core:sha512": "4228666244c7f0cc0e20b110114fd9c9a31d538f33c0d6f6d13e43d1173c918977ec28a3ac51dd104785a70e5dda56f761fafc4dfd0efd346f0f93294354c8df",
"core:version": "1.8.0",
"ria:campaign": "example_campaign_001",
"ria:capture_timestamp": 1737000131.072,
"ria:center_frequency": 2400000000.0,
"ria:device_id": "synthetic_device_B",
"ria:gain": 30.0,
"ria:rec_id": "bfa5f2e8d475115a5c5b966b5c9a1db7a5b394768dc0d8d05140f846074356ea",
"ria:sample_rate": 2000000.0,
"ria:source": "synthetic",
"ria:step_duration_s": 0.0328,
"ria:step_label": "device_B_qam16",
"ria:tx_modulation": "QAM16",
"ria:tx_snr_db": 25.0
},
"captures": [
{
"core:datetime": "2026-01-15T10:00:00.000000Z",
"core:frequency": 2400000000.0,
"core:sample_start": 0
}
],
"annotations": [
{
"core:sample_start": 0,
"core:sample_count": 65536,
"core:label": "QAM16",
"core:comment": "Synthetic QAM16 signal, SNR=25.0 dB"
}
]
}

BIN
Recordings/qpsk_915MHz/device_A_qpsk.sigmf-data (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,45 @@
{
"global": {
"core:datatype": "cf32_le",
"core:extensions": [
{
"name": "ria",
"optional": true,
"version": "0.1.4"
}
],
"core:num_channels": 1,
"core:offset": 0,
"core:recorder": "RIA",
"core:sample_rate": 1000000.0,
"core:sha512": "d36cc62b2b7e68e0341e2a4d5dfcc2c7f58b284fae6d160942fd247ae506fad882a23054f4dd4ca0cf1d2c7686d8582d557b39c25936299834275fc6214e36b4",
"core:version": "1.8.0",
"ria:campaign": "example_campaign_001",
"ria:capture_timestamp": 1737000065.536,
"ria:center_frequency": 915000000.0,
"ria:device_id": "synthetic_device_A",
"ria:gain": 30.0,
"ria:rec_id": "b54aaa3d7cfe66ba22d251fd1da1c70b9bc7a53c14a8958b707b57bd540d90e7",
"ria:sample_rate": 1000000.0,
"ria:source": "synthetic",
"ria:step_duration_s": 0.0655,
"ria:step_label": "device_A_qpsk",
"ria:tx_modulation": "QPSK",
"ria:tx_snr_db": 18.0
},
"captures": [
{
"core:datetime": "2026-01-15T10:00:00.000000Z",
"core:frequency": 915000000.0,
"core:sample_start": 0
}
],
"annotations": [
{
"core:sample_start": 0,
"core:sample_count": 65536,
"core:label": "QPSK",
"core:comment": "Synthetic QPSK signal, SNR=18.0 dB"
}
]
}

View File

@ -0,0 +1,43 @@
{
"_comment": "Example Curator configuration for a QPSK / BPSK / QAM16 classification dataset. Load this as a reference when setting up a new curation run in Dataset Manager → Curator.",
"slicer_config": {
"type": "simple",
"slice_length": 1024
},
"qualifier_config": {
"type": "rms",
"threshold": 0.01,
"auto_threshold": true
},
"metadata_config": {
"keys": [
"ria:tx_modulation",
"ria:tx_snr_db",
"ria:center_frequency",
"ria:sample_rate",
"ria:device_id",
"ria:campaign"
],
"rename_mappings": {
"label": "ria:tx_modulation",
"snr_db": "ria:tx_snr_db",
"center_freq": "ria:center_frequency",
"sample_rate": "ria:sample_rate",
"device_id": "ria:device_id",
"campaign": "ria:campaign"
}
},
"overwrite": false,
"_notes": {
"slicer_types": ["simple", "random", "overlap"],
"qualifier_types": ["rms", "snr", "energy", "bandwidth", "quantization", "learned"],
"split_ratios": "Set train/val/test proportions in the UI — default is 0.7 / 0.15 / 0.15",
"augmentation": "Configure augmentation policy in the UI; not captured in this file",
"label_source": "core:label from SigMF annotations takes priority over recording-level metadata keys"
}
}