diff --git a/.riahub/workflows/workflow.yaml b/.riahub/workflows/workflow.yaml index 7af8cfe..109b9a6 100644 --- a/.riahub/workflows/workflow.yaml +++ b/.riahub/workflows/workflow.yaml @@ -40,7 +40,7 @@ jobs: - name: 1. Generate Recordings run: | mkdir -p data/recordings - PYTHONPATH=. python scripts/dataset_building/data_gen.py + PYTHONPATH=. python scripts/generate_modulated_signals.py --output-dir data/recordings echo "recordings produced successfully" - name: Upload Recordings diff --git a/conf/app.yaml b/conf/app.yaml index a637988..52adf38 100644 --- a/conf/app.yaml +++ b/conf/app.yaml @@ -3,9 +3,6 @@ general: run_mode: prod dataset: - #where to read the recordings from to produce the data set - input_dir: data/recordings - #number of slices you want to split each recording into num_slices: 8 @@ -19,8 +16,6 @@ dataset: #multiple modulations to contain in the dataset modulation_types: [bpsk, qpsk, qam16, qam64] - #where to output the datasets - output_dir: data/dataset training: #number of training samples being processed together before model updates its weights @@ -40,5 +35,6 @@ inference: num_classes: 4 + app: build_dir: dist \ No newline at end of file diff --git a/helpers/app_settings.py b/helpers/app_settings.py index cd91d8d..8890619 100644 --- a/helpers/app_settings.py +++ b/helpers/app_settings.py @@ -12,13 +12,11 @@ class GeneralConfig: @dataclass class DataSetConfig: - input_dir: str num_slices: int train_split: float seed: int modulation_types: list val_split: float - output_dir: str @dataclass diff --git a/scripts/dataset_building/data_gen.py b/scripts/dataset_building/data_gen.py index 7307701..27d09b7 100644 --- a/scripts/dataset_building/data_gen.py +++ b/scripts/dataset_building/data_gen.py @@ -1,6 +1,7 @@ from utils.data import Recording import numpy as np from utils.signal import block_generator +import argparse mods = { "bpsk": {"num_bits_per_symbol": 1, "constellation_type": "psk"}, @@ -10,7 +11,7 @@ mods = { } -def generate_modulated_signals(): +def generate_modulated_signals(output_dir): for modulation in ["bpsk", "qpsk", "qam16", "qam64"]: for snr in np.arange(-6, 13, 3): for i in range(100): @@ -66,4 +67,11 @@ def generate_modulated_signals(): if __name__ == "__main__": - generate_modulated_signals() + p = argparse.ArgumentParser(description="Generate modulated signal .npy files") + p.add_argument( + "--output-dir", + default=".", + help="Folder where .npy files will be saved" + ) + args = p.parse_args() + generate_modulated_signals(args.output_dir) diff --git a/scripts/dataset_building/produce_dataset.py b/scripts/dataset_building/produce_dataset.py index 2ff76e9..c42fbd2 100644 --- a/scripts/dataset_building/produce_dataset.py +++ b/scripts/dataset_building/produce_dataset.py @@ -98,18 +98,18 @@ def generate_datasets(cfg): dset (h5py.Dataset): The created dataset object """ - parent = os.path.dirname(cfg.output_dir) + parent = os.path.dirname("data/dataset") if not parent: - os.makedirs(cfg.output_dir, exist_ok=True) + os.makedirs("data/dataset", exist_ok=True) # we assume the recordings are in .npy format - files = os.listdir(cfg.input_dir) + files = os.listdir("data/recordings") if not files: raise ValueError("No files found in the specified directory.") records = [] for fname in files: - rec = from_npy(os.path.join(cfg.input_dir, fname)) + rec = from_npy(os.path.join("data/recordings", fname)) data = rec.data # here data is a numpy array with the shape (1, N) @@ -125,8 +125,8 @@ def generate_datasets(cfg): train_records, val_records = split(records, cfg.train_split, cfg.seed) - train_path = os.path.join(cfg.output_dir, "train.h5") - val_path = os.path.join(cfg.output_dir, "val.h5") + train_path = os.path.join("data/dataset", "train.h5") + val_path = os.path.join("data/dataset", "val.h5") write_hdf5_file(train_records, train_path, "training_data") write_hdf5_file(val_records, val_path, "validation_data")