updated example reference

2026-05-20 13:04:10 -04:00 · 2026-05-20 13:04:10 -04:00 · 1c954dcb3f
commit 1c954dcb3f
parent 065271fb67
12 changed files with 294 additions and 63 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,2 +1,5 @@
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
+*.sigmf-data filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.hdf5 filter=lfs diff=lfs merge=lfs -text
--- a/.ria/train.yaml
+++ b/.ria/train.yaml
@ -15,11 +15,17 @@ permissions:

 jobs:
  QMB-Training:
-    runs-on: "hades-4090"
+    # Replace with the name of your registered Gitea Actions runner.
+    # Find registered runners under Settings → Runners in your RIA Hub instance.
+    runs-on: "your-runner-name"
+
    env:
+      # RIAHUB_BASE_URL is the base URL of your RIA Hub instance,
+      # e.g. https://riahub.example.com — set as a repository variable or secret.
      RIAHUB_BASE_URL: ${{ vars.RIAHUB_BASE_URL || secrets.RIAHUB_BASE_URL || '' }}
      QMB_OUTPUT_ROOT: "/opt/qmb/outputs"
      QMB_TASK_REPO_ROOT: "/opt/qmb/task_repos"
+
    steps:
      - name: Display basic runner info
        run: |
@ -27,8 +33,7 @@ jobs:
          echo "Runner Architecture: ${{ runner.arch }}"

      - name: Print CPU information
-        run: |
-          lscpu
+        run: lscpu

      - name: Print GPU information
        run: |
@ -38,11 +43,15 @@ jobs:
            echo "No NVIDIA GPU available."
          fi

-
-      - name: Checkout Datasets (lswersk/library-test)
+      # Checks out the dataset repository that contains your .h5 training files.
+      # Replace YOUR_ORG/YOUR_DATASET_REPO with the actual repository path on your
+      # RIA Hub instance (e.g. "my-team/rf-datasets").
+      # Replace the sparse-checkout paths and the commit hash / branch name with
+      # values that match your dataset layout.
+      - name: Checkout Datasets
        env:
-          RIAHUB_USER: ${{ secrets.QMBDEMO_USER }}
-          RIAHUB_TOKEN: ${{ secrets.QMBDEMO_TOKEN }}
+          RIAHUB_USER: ${{ secrets.DATASET_REPO_USER }}
+          RIAHUB_TOKEN: ${{ secrets.DATASET_REPO_TOKEN }}
        run: |
          set -euo pipefail
          DEFAULT_BASE_URL="http://localhost:3000"
@ -64,8 +73,21 @@ jobs:
            echo "http://$raw"
          }

-          REPO_PATH="/lswersk/library-test.git"
-          DEST_ROOT="/opt/qmb/riahub/dataset/lswersk/library-test/main"
+          # --- Edit below: set your dataset repository path and file layout ---
+          REPO_PATH="/YOUR_ORG/YOUR_DATASET_REPO.git"
+          DEST_ROOT="/opt/qmb/riahub/dataset/YOUR_ORG/YOUR_DATASET_REPO/main"
+          # Sparse-checkout paths: list the .h5 files you need for training
+          SPARSE_PATHS=(
+            "datasets/train.h5"
+            "datasets/val.h5"
+            "datasets/test.h5"
+          )
+          # Pin to a specific commit for reproducibility, or use a branch ref:
+          #   FETCH_REF="main"  (branch — always latest)
+          #   FETCH_REF="<40-char-sha>"  (pinned commit — recommended for training)
+          FETCH_REF="main"
+          # --- End edit ---
+
          sudo mkdir -p "$(dirname "$DEST_ROOT")"
          mapfile -t BASE_CANDIDATES < <(build_base_candidates "$BASE_URL_SOURCE")
          CLONED=0
@ -81,22 +103,19 @@ jobs:
            fi
          done
          if [[ "$CLONED" -ne 1 ]]; then
-            echo "Failed to clone dataset repo using base URL candidates derived from: $BASE_URL_SOURCE" >&2
+            echo "Failed to clone dataset repo." >&2
            exit 1
          fi
          if ! command -v git-lfs >/dev/null 2>&1; then
-            sudo apt-get update -y
-            sudo apt-get install -y git-lfs
+            sudo apt-get update -y && sudo apt-get install -y git-lfs
          fi
          sudo git -C "$DEST_ROOT" lfs install --local || true
          sudo git -C "$DEST_ROOT" sparse-checkout init --no-cone
-          sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- \
-            "datasets/tiny-pluto/test.h5" \
-            "datasets/tiny-pluto/train.h5" \
-            "datasets/tiny-pluto/val.h5"
-          sudo git -C "$DEST_ROOT" fetch --depth=1 origin "e4bd5193c5bb09aa23afd18e138840befefa59cd"
+          sudo git -C "$DEST_ROOT" sparse-checkout set --no-cone -- "${SPARSE_PATHS[@]}"
+          sudo git -C "$DEST_ROOT" fetch --depth=1 origin "$FETCH_REF"
          sudo git -C "$DEST_ROOT" -c advice.detachedHead=false checkout FETCH_HEAD
-          sudo git -C "$DEST_ROOT" lfs fetch origin --include="datasets/tiny-pluto/test.h5,datasets/tiny-pluto/train.h5,datasets/tiny-pluto/val.h5" --exclude="" || true
+          sudo git -C "$DEST_ROOT" lfs fetch origin \
+            --include="$(IFS=,; echo "${SPARSE_PATHS[*]}")" --exclude="" || true
          sudo git -C "$DEST_ROOT" lfs checkout || true
          sudo git -C "$DEST_ROOT" remote remove origin || true
          sudo git -C "$DEST_ROOT" config --local --unset-all http.extraheader || true
@ -111,16 +130,14 @@ jobs:
          mkdir -p /opt/qmb/configs/
          sudo cp -r ${{ github.workspace }}/.riahub/train_configs/* /opt/qmb/configs/

-
      - name: List QMB project contents
        run: |
          ls -lha /opt/qmb
          ls -lh /opt/qmb/wheel

-      - name: List Downloaded RIA Hub contents
+      - name: List downloaded dataset contents
        run: |
          ls -lh /opt/qmb/riahub || true
-          ls -lh /opt/qmb/riahub/model || true
          ls -lh /opt/qmb/riahub/dataset || true

      - name: Setup Python
@ -151,14 +168,15 @@ jobs:
            fi
          fi
          echo "Installing PyTorch from ${TORCH_INDEX_URL} (${TORCH_REASON})."
-          uv pip install --system --index-url "$TORCH_INDEX_URL" --upgrade --force-reinstall torch torchvision
-          uv pip install --system --index-url https://pypi.org/simple --upgrade "onnxscript>=0.7.0" "onnx-ir>=0.2.1" onnx onnxruntime timm
+          uv pip install --system --index-url "$TORCH_INDEX_URL" \
+            --upgrade --force-reinstall torch torchvision
+          uv pip install --system --index-url https://pypi.org/simple \
+            --upgrade "onnxscript>=0.7.0" "onnx-ir>=0.2.1" onnx onnxruntime timm

      - name: Run Training Script
        run: |
          cd /opt/qmb
          export PYTHONPATH="$QMB_TASK_REPO_ROOT:${PYTHONPATH:-}"
-          #source .venv/bin/activate
          qmb train --config /opt/qmb/configs/train.yaml

      - name: Collect training artifacts
@ -171,9 +189,7 @@ jobs:
          if [[ -d "$QMB_OUTPUT_ROOT" ]]; then
            while IFS= read -r -d '' file; do
              rel="${file#${QMB_OUTPUT_ROOT}/}"
-              if [[ "$rel" == "$file" ]]; then
-                rel="$(basename "$file")"
-              fi
+              [[ "$rel" == "$file" ]] && rel="$(basename "$file")"
              mkdir -p "$ARTIFACT_DIR/$(dirname "$rel")"
              cp "$file" "$ARTIFACT_DIR/$rel"
            done < <(
@ -191,12 +207,10 @@ jobs:
          echo "Collected training artifacts:"
          find "$ARTIFACT_DIR" -type f -print | sort || true

-      - name: ⬆️ Upload training artifacts
+      - name: Upload training artifacts
        if: always()
        uses: actions/upload-artifact@v3
        with:
          name: training-artifacts
          path: ${{ github.workspace }}/.riahub/artifacts/training
          if-no-files-found: warn
-
-# committed at 2026-04-24T00:04:24.549781+00:00
--- a/Datasets/example_radio_dataset.h5
+++ b/Datasets/example_radio_dataset.h5
--- a/Datasets/example_synthetic_dataset.h5
+++ b/Datasets/example_synthetic_dataset.h5
--- a/README.md
+++ b/README.md
@ -23,31 +23,34 @@ RIA Hub is a collaborative platform for RF and machine learning workflows. It co
 ```
 RIA_Example/
 │
-├── recordings/
-│   └── example_iq_recording.h5          # Raw IQ capture (input to Curator)
+├── Recordings/
+│   ├── bpsk_915MHz/
+│   │   ├── device_A_bpsk.sigmf-data      # Binary IQ samples (cf32_le, tracked via Git LFS)
+│   │   └── device_A_bpsk.sigmf-meta      # JSON metadata: sample rate, frequency, labels
+│   ├── qpsk_915MHz/
+│   │   ├── device_A_qpsk.sigmf-data
+│   │   └── device_A_qpsk.sigmf-meta
+│   └── qam16_2400MHz/
+│       ├── device_B_qam16.sigmf-data
+│       └── device_B_qam16.sigmf-meta
 │
-├── datasets/
-│   ├── example_radio_dataset.h5          # Curated radio dataset (Curator output / Model Trainer input)
-│   └── example_synthetic_dataset.h5      # Synthetically generated dataset (Generator output)
+├── Datasets/
+│   ├── example_radio_dataset.h5          # Curated from recordings above (Curator output / Model Trainer input)
+│   └── example_synthetic_dataset.h5      # Synthetically generated (Generator output)
 │
-├── models/
-│   ├── example_model.ckpt                  # PyTorch Module (Model Trainer input / output)
+├── Models/
+│   ├── example_model.ckpt                # PyTorch checkpoint (Model Trainer output)
 │   └── example_model.onnx               # Exported ONNX model (Screens / Application Packager input)
 │
-├── applications/
-│   └── example_application.json         # Application Composer output, can be built in RIA Screens (Application Packager)
+├── curator-configs/
+│   └── example_curator_config.json       # Reference configuration for the Curator tool
 │
-
-│
-├── .ria/
-│   ├── train.yaml                        # Example Model Trainer workflow (committed to .ria/)
-│   ├── example_application.yaml          # Example Application Composer Build workflow (committed to .ria/)
-
-│
-└── curator-configs/
-    └── example_curator_config.json       # Example curation configuration for the Curator tool
+└── .ria/
+    └── train.yaml                        # Example Model Trainer workflow (Gitea Actions)
 ```

+All binary files (`.sigmf-data`, `.h5`, `.ckpt`, `.onnx`) are tracked with Git LFS. Clone with LFS enabled to get the actual file content.
+
 ---

 ## Tool Walkthroughs
@ -56,17 +59,17 @@ RIA_Example/

 The Library is a cross-repository browser for all RF and ML assets on the platform. It automatically discovers files pushed to any repository you have access to.

-**To explore the example recording:**
-1. Import `recordings/example_iq_recording.h5` into any repository via **New Repository → Upload Files** or by pushing via Git LFS.
+**To explore the example recordings:**
+1. Push this repository (or clone it into your RIA Hub instance). The SigMF file pairs in `Recordings/` are tracked via Git LFS and will be indexed automatically on push.
 2. Navigate to **Library** in the top navigation bar.
-3. Select the **Recordings** tab. Your file will appear with metadata and a spectrogram thumbnail.
-4. Click the file to open the detail view — you can inspect signal properties, view the spectrogram, and copy the file to another repository.
+3. Select the **Recordings** tab. Each `.sigmf-data`/`.sigmf-meta` pair appears as a row with metadata and a spectrogram thumbnail.
+4. Click any row to open the recording inspector — view the spectrogram, constellation, PSD, and time series tabs.

 **Supported asset types in the Library:**

 | Type | Extension | Description |
 |------|-----------|-------------|
-| Recording | `.h5` / `.hdf5` | Raw IQ capture files |
+| Recording | `.sigmf-data` + `.sigmf-meta` | SigMF IQ capture pairs — preferred format for the Library and Curator |
 | Radio Dataset | `.h5` / `.hdf5` | Labelled, curated training datasets |
 | PyTorch Module | `.py` | PyTorch model definitions with a nn.Module class |
 | PyTorch State Dict | `.pt` / `.pth` | Model weights / state dictionaries |
@ -79,14 +82,14 @@ The Library is a cross-repository browser for all RF and ML assets on the platfo

 The Curator takes raw IQ recordings and produces a labelled, ready-to-train HDF5 dataset. It applies a configurable DSP pipeline: slicing, quality filtering, and optional augmentation.

-**Example files:** `recordings/example_iq_recording.h5`, `curator-configs/example_curator_config.json`  
-**Expected output:** `datasets/example_radio_dataset.h5`
+**Example files:** `Recordings/` (three SigMF pairs), `curator-configs/example_curator_config.json`  
+**Expected output:** `Datasets/example_radio_dataset.h5`

 **Steps:**
-1. Upload `example_iq_recording.h5` to a repository (the Curator reads from the Library).
+1. Push the recordings to a repository — the Curator reads from the Library once files are indexed.
 2. Go to **Dataset Manager → Curator**.
-3. Select your recording from the Library panel on the left.
-4. Configure the pipeline using the settings below, or load `example_curator_config.json` as a reference:
+3. Select your recordings from the Library panel on the left.
+4. Configure the pipeline using the settings below, or use `curator-configs/example_curator_config.json` as a reference:
   - **Data type:** `IQ`
   - **Slicer:** `simple` — slice length `1024`
   - **Qualifier:** `rms` — minimum threshold `0.01` (filters out silent/noise-only slices)
@ -286,16 +289,34 @@ Projects group your datasets, models, training runs, and deployed applications i

 ## File Format Reference

+### SigMF Recording (`.sigmf-data` + `.sigmf-meta`)
+
+Each recording is a pair of files committed together:
+
+| File | Contains |
+|------|---------|
+| `.sigmf-data` | Raw IQ samples as interleaved float32 little-endian (`cf32_le`) — I[0], Q[0], I[1], Q[1], … |
+| `.sigmf-meta` | JSON metadata: `global` (sample rate, frequency, hardware, RIA-specific fields), `captures` (capture start info), `annotations` (time/frequency regions with labels) |
+
+The `annotations[].core:label` field is what the Curator reads to assign class labels when slicing a recording into a dataset. See `Recordings/bpsk_915MHz/device_A_bpsk.sigmf-meta` for an annotated example.
+
+---
+
 ### HDF5 Radio Dataset (`.h5`)

 Curated and generated datasets share a common HDF5 layout:

 ```
 dataset.h5
-├── data/           # IQ samples, shape [N, slice_length, 2] (float32)
-├── labels/         # Integer class labels, shape [N]
-├── metadata/       # Recording metadata carried through from source
-└── attrs           # Dataset-level attributes: name, version, radio_task, backend
+├── data                        # IQ samples, shape (N, 2, L) float32
+│                               #   N = number of slices
+│                               #   2 = I channel (index 0) and Q channel (index 1)
+│                               #   L = slice length in samples (e.g. 1024)
+├── metadata/
+│   ├── metadata                # Per-slice structured array: rec_id, label, modulation,
+│   │                           #   snr_db, center_freq, sample_rate, device_id, split, ...
+│   └── about                   # Dataset-level: author, date_of_data_generation, dataset_version
+└── attrs                       # File-level: license, source_type, collection metadata
 ```


@ -332,6 +353,6 @@ dataset.h5

 ## Getting Help

- Full platform documentation is available in the **Docs** section of RIA Hub.
+- Full platform documentation: [RIA Hub Docs](https://qoherent.github.io/riahub-docs/) — covers every tool with step-by-step guides and format references.
 - Open an issue in this repository if an example file is missing, broken, or out of date.
 - For tool-specific questions, use the in-app help panels (the `?` icon on each tool page).
--- a/Recordings/bpsk_915MHz/device_A_bpsk.sigmf-data
+++ b/Recordings/bpsk_915MHz/device_A_bpsk.sigmf-data
--- a/Recordings/bpsk_915MHz/device_A_bpsk.sigmf-meta
+++ b/Recordings/bpsk_915MHz/device_A_bpsk.sigmf-meta
@ -0,0 +1,45 @@
+{
+  "global": {
+    "core:datatype": "cf32_le",
+    "core:extensions": [
+      {
+        "name": "ria",
+        "optional": true,
+        "version": "0.1.4"
+      }
+    ],
+    "core:num_channels": 1,
+    "core:offset": 0,
+    "core:recorder": "RIA",
+    "core:sample_rate": 1000000.0,
+    "core:sha512": "f1dadadeaa718c9f4a47e787341d5b390d47837751b77244866ff8823d433fd5d7ae133a2892264288b26eafb045cc90a18b2377e3a40d7e8a15843f61e22830",
+    "core:version": "1.8.0",
+    "ria:campaign": "example_campaign_001",
+    "ria:capture_timestamp": 1737000000.0,
+    "ria:center_frequency": 915000000.0,
+    "ria:device_id": "synthetic_device_A",
+    "ria:gain": 30.0,
+    "ria:rec_id": "9f230648a7fb04a8db994d738e6aae88739a53f7fbe86e0629e157748a99e364",
+    "ria:sample_rate": 1000000.0,
+    "ria:source": "synthetic",
+    "ria:step_duration_s": 0.0655,
+    "ria:step_label": "device_A_bpsk",
+    "ria:tx_modulation": "BPSK",
+    "ria:tx_snr_db": 20.0
+  },
+  "captures": [
+    {
+      "core:datetime": "2026-01-15T10:00:00.000000Z",
+      "core:frequency": 915000000.0,
+      "core:sample_start": 0
+    }
+  ],
+  "annotations": [
+    {
+      "core:sample_start": 0,
+      "core:sample_count": 65536,
+      "core:label": "BPSK",
+      "core:comment": "Synthetic BPSK signal, SNR=20.0 dB"
+    }
+  ]
+}
--- a/Recordings/qam16_2400MHz/device_B_qam16.sigmf-data
+++ b/Recordings/qam16_2400MHz/device_B_qam16.sigmf-data
--- a/Recordings/qam16_2400MHz/device_B_qam16.sigmf-meta
+++ b/Recordings/qam16_2400MHz/device_B_qam16.sigmf-meta
@ -0,0 +1,45 @@
+{
+  "global": {
+    "core:datatype": "cf32_le",
+    "core:extensions": [
+      {
+        "name": "ria",
+        "optional": true,
+        "version": "0.1.4"
+      }
+    ],
+    "core:num_channels": 1,
+    "core:offset": 0,
+    "core:recorder": "RIA",
+    "core:sample_rate": 2000000.0,
+    "core:sha512": "4228666244c7f0cc0e20b110114fd9c9a31d538f33c0d6f6d13e43d1173c918977ec28a3ac51dd104785a70e5dda56f761fafc4dfd0efd346f0f93294354c8df",
+    "core:version": "1.8.0",
+    "ria:campaign": "example_campaign_001",
+    "ria:capture_timestamp": 1737000131.072,
+    "ria:center_frequency": 2400000000.0,
+    "ria:device_id": "synthetic_device_B",
+    "ria:gain": 30.0,
+    "ria:rec_id": "bfa5f2e8d475115a5c5b966b5c9a1db7a5b394768dc0d8d05140f846074356ea",
+    "ria:sample_rate": 2000000.0,
+    "ria:source": "synthetic",
+    "ria:step_duration_s": 0.0328,
+    "ria:step_label": "device_B_qam16",
+    "ria:tx_modulation": "QAM16",
+    "ria:tx_snr_db": 25.0
+  },
+  "captures": [
+    {
+      "core:datetime": "2026-01-15T10:00:00.000000Z",
+      "core:frequency": 2400000000.0,
+      "core:sample_start": 0
+    }
+  ],
+  "annotations": [
+    {
+      "core:sample_start": 0,
+      "core:sample_count": 65536,
+      "core:label": "QAM16",
+      "core:comment": "Synthetic QAM16 signal, SNR=25.0 dB"
+    }
+  ]
+}
--- a/Recordings/qpsk_915MHz/device_A_qpsk.sigmf-data
+++ b/Recordings/qpsk_915MHz/device_A_qpsk.sigmf-data
--- a/Recordings/qpsk_915MHz/device_A_qpsk.sigmf-meta
+++ b/Recordings/qpsk_915MHz/device_A_qpsk.sigmf-meta
@ -0,0 +1,45 @@
+{
+  "global": {
+    "core:datatype": "cf32_le",
+    "core:extensions": [
+      {
+        "name": "ria",
+        "optional": true,
+        "version": "0.1.4"
+      }
+    ],
+    "core:num_channels": 1,
+    "core:offset": 0,
+    "core:recorder": "RIA",
+    "core:sample_rate": 1000000.0,
+    "core:sha512": "d36cc62b2b7e68e0341e2a4d5dfcc2c7f58b284fae6d160942fd247ae506fad882a23054f4dd4ca0cf1d2c7686d8582d557b39c25936299834275fc6214e36b4",
+    "core:version": "1.8.0",
+    "ria:campaign": "example_campaign_001",
+    "ria:capture_timestamp": 1737000065.536,
+    "ria:center_frequency": 915000000.0,
+    "ria:device_id": "synthetic_device_A",
+    "ria:gain": 30.0,
+    "ria:rec_id": "b54aaa3d7cfe66ba22d251fd1da1c70b9bc7a53c14a8958b707b57bd540d90e7",
+    "ria:sample_rate": 1000000.0,
+    "ria:source": "synthetic",
+    "ria:step_duration_s": 0.0655,
+    "ria:step_label": "device_A_qpsk",
+    "ria:tx_modulation": "QPSK",
+    "ria:tx_snr_db": 18.0
+  },
+  "captures": [
+    {
+      "core:datetime": "2026-01-15T10:00:00.000000Z",
+      "core:frequency": 915000000.0,
+      "core:sample_start": 0
+    }
+  ],
+  "annotations": [
+    {
+      "core:sample_start": 0,
+      "core:sample_count": 65536,
+      "core:label": "QPSK",
+      "core:comment": "Synthetic QPSK signal, SNR=18.0 dB"
+    }
+  ]
+}
--- a/curator-configs/example_curator_config.json
+++ b/curator-configs/example_curator_config.json
@ -0,0 +1,43 @@
+{
+  "_comment": "Example Curator configuration for a QPSK / BPSK / QAM16 classification dataset. Load this as a reference when setting up a new curation run in Dataset Manager → Curator.",
+
+  "slicer_config": {
+    "type": "simple",
+    "slice_length": 1024
+  },
+
+  "qualifier_config": {
+    "type": "rms",
+    "threshold": 0.01,
+    "auto_threshold": true
+  },
+
+  "metadata_config": {
+    "keys": [
+      "ria:tx_modulation",
+      "ria:tx_snr_db",
+      "ria:center_frequency",
+      "ria:sample_rate",
+      "ria:device_id",
+      "ria:campaign"
+    ],
+    "rename_mappings": {
+      "label":       "ria:tx_modulation",
+      "snr_db":      "ria:tx_snr_db",
+      "center_freq": "ria:center_frequency",
+      "sample_rate": "ria:sample_rate",
+      "device_id":   "ria:device_id",
+      "campaign":    "ria:campaign"
+    }
+  },
+
+  "overwrite": false,
+
+  "_notes": {
+    "slicer_types": ["simple", "random", "overlap"],
+    "qualifier_types": ["rms", "snr", "energy", "bandwidth", "quantization", "learned"],
+    "split_ratios": "Set train/val/test proportions in the UI — default is 0.7 / 0.15 / 0.15",
+    "augmentation": "Configure augmentation policy in the UI; not captured in this file",
+    "label_source": "core:label from SigMF annotations takes priority over recording-level metadata keys"
+  }
+}