LFS'D the recordings folder, added in a new file called dataset_gen.py, which generates a .h5 dataset file under the data folder, modified the worflow file to run the script on every push/PR and uploades the data set as a workflow artifcat

2025-05-15 10:47:54 -04:00 · 2025-05-15 10:47:54 -04:00 · 12b920c88d
commit 12b920c88d
parent 8f28f5db0f
33 changed files with 114 additions and 1 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1 @@
+recordings filter=lfs diff=lfs merge=lfs -text
--- a/.riahub/workflows/workflow.yaml
+++ b/.riahub/workflows/workflow.yaml
@ -37,7 +37,7 @@ jobs:

     - name: 1. Build Dataset
       run: |
-         echo "building dataset"
+         python produce_dataset.py
       # Placeholder: implement conversion from raw .npy recordings → train/val sets


--- a/data/dataset.h5
+++ b/data/dataset.h5
--- a/data_gen.py
+++ b/data_gen.py
@ -0,0 +1,69 @@
+from utils.data import Recording
+import numpy as np
+from utils.signal import block_generator
+
+mods = {
+    "bpsk": {"num_bits_per_symbol": 1, "constellation_type": "psk"},
+    "qpsk": {"num_bits_per_symbol": 2, "constellation_type": "psk"},
+    "qam16": {"num_bits_per_symbol": 4, "constellation_type": "qam"},
+    "qam64": {"num_bits_per_symbol": 6, "constellation_type": "qam"},
+}
+
+
+def generate_modulated_signals():
+    for modulation in ["bpsk", "qpsk", "qam16", "qam64"]:
+        for snr in np.arange(-6, 13, 3):
+
+            recording_length = 1024
+            beta = 0.3  # the rolloff factor, can be changed to add variety
+            sps = 4  # samples per symbol, or the relative bandwidth of the digital signal. Can also be changed.
+
+            # blocks don't directly take the string 'qpsk' so we use the dict 'mods' to get parameters
+            constellation_type = mods[modulation]["constellation_type"]
+            num_bits_per_symbol = mods[modulation]["num_bits_per_symbol"]
+
+            # construct the digital modulation blocks with these parameters
+            # we have bit source -> mapper -> upsampling -> pulse shaping
+
+            bit_source = block_generator.RandomBinarySource()
+            mapper = block_generator.Mapper(
+                constellation_type=constellation_type,
+                num_bits_per_symbol=num_bits_per_symbol,
+            )
+            upsampler = block_generator.Upsampling(factor=sps)
+            pulse_shaping_filter = block_generator.RaisedCosineFilter(
+                upsampling_factor=sps, beta=beta
+            )
+
+            pulse_shaping_filter.connect_input([upsampler])
+            upsampler.connect_input([mapper])
+            mapper.connect_input([bit_source])
+
+            modulation_recording = pulse_shaping_filter.record(
+                num_samples=recording_length
+            )
+
+            # add noise by calculating the power of the modulation recording and generating AWGN from the snr parameter
+            signal_power = np.mean(np.abs(modulation_recording.data[0] ** 2))
+            awgn_source = block_generator.AWGNSource(
+                variance=(signal_power / 2) * (10 ** (((-1 * snr) / 20)))
+            )
+            noise = awgn_source.record(num_samples=recording_length)
+            samples_with_noise = modulation_recording.data + noise.data
+            output_recording = Recording(data=samples_with_noise)
+
+            # add metadata for ML later
+            output_recording.add_to_metadata(key="modulation", value=modulation)
+            output_recording.add_to_metadata(key="snr", value=int(snr))
+            output_recording.add_to_metadata(key="beta", value=beta)
+            output_recording.add_to_metadata(key="sps", value=sps)
+
+            # view if you want
+            # output_recording.view()
+
+            # save to file
+            output_recording.to_npy()  # optionally add path and filename parameters
+
+
+if __name__ == "__main__":
+    generate_modulated_signals()
--- a/produce_dataset.py
+++ b/produce_dataset.py
@ -0,0 +1,43 @@
+import os, h5py, numpy as np
+
+
+def generate_dataset(path_to_recordings, output_path, dataset_name="data"):
+    """
+    Generates a dataset from a folder of .npy files and saves it to an HDF5 file
+
+    Parameters:
+        path_to_recordings (str): Path to the folder containing .npy files
+        output_path (str): Path to the output HDF5 file
+        dataset_name (str): Name of the dataset in the HDF5 file (default: "data")
+
+    Returns:
+        dset (h5py.Dataset): The created dataset object
+    """
+
+    parent = os.path.dirname(output_path)
+    if parent:
+        os.makedirs(parent, exist_ok=True)
+
+    # we assume the recordings are in .npy format
+    files = os.listdir(path_to_recordings)
+    if not files:
+        raise ValueError("No files found in the specified directory.")
+
+    sample = np.load(os.path.join(path_to_recordings, files[0]))
+    shape = sample.shape
+    dtype = sample.dtype
+
+    with h5py.File(output_path, "w") as hf:
+        dset = hf.create_dataset(
+            dataset_name, shape=(len(files),) + shape, dtype=dtype, compression="gzip"
+        )
+
+        for idx, fname in enumerate(files):
+            data = np.load(os.path.join(path_to_recordings, fname))
+            dset[idx, ...] = data
+
+    return dset
+
+
+if __name__ == "__main__":
+    print(generate_dataset("recordings", "data/dataset.h5"))
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_0264b4a.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_0264b4a.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_0b3b80f.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_0b3b80f.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_1effc4c.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_1effc4c.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_37a73db.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_37a73db.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_3d557a9.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_3d557a9.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_442fcb9.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_442fcb9.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_491c457.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_491c457.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_4fff84f.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_4fff84f.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_6676600.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_6676600.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_6d35ff9.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_6d35ff9.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_6d85f3e.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_6d85f3e.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_85a8c83.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_85a8c83.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_940988e.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_940988e.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_9f88dc2.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_9f88dc2.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_a4a6ba6.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_a4a6ba6.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_a60964b.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_a60964b.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_ad350fe.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_ad350fe.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_ae5224a.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_ae5224a.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_b68f080.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_b68f080.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_c00477b.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_c00477b.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_cca57ca.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_cca57ca.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_db8a5b4.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_db8a5b4.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_dd021f7.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_dd021f7.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_e0cc41d.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_e0cc41d.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_e61d9bf.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_e61d9bf.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_f024082.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_f024082.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_f2013fa.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_f2013fa.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_f2ae593.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_f2ae593.npy
				`@ -0,0 +1 @@`
				`recordings filter=lfs diff=lfs merge=lfs -text`