forked from qoherent/modrec-workflow
updated so now the data gen works
This commit is contained in:
parent
b49fa4c2b7
commit
e4542e76c4
|
@ -40,7 +40,7 @@ jobs:
|
|||
- name: 1. Generate Recordings
|
||||
run: |
|
||||
mkdir -p data/recordings
|
||||
PYTHONPATH=. python scripts/dataset_building/data_gen.py
|
||||
PYTHONPATH=. python scripts/generate_modulated_signals.py --output-dir data/recordings
|
||||
echo "recordings produced successfully"
|
||||
|
||||
- name: Upload Recordings
|
||||
|
|
|
@ -3,9 +3,6 @@ general:
|
|||
run_mode: prod
|
||||
|
||||
dataset:
|
||||
#where to read the recordings from to produce the data set
|
||||
input_dir: data/recordings
|
||||
|
||||
#number of slices you want to split each recording into
|
||||
num_slices: 8
|
||||
|
||||
|
@ -19,8 +16,6 @@ dataset:
|
|||
#multiple modulations to contain in the dataset
|
||||
modulation_types: [bpsk, qpsk, qam16, qam64]
|
||||
|
||||
#where to output the datasets
|
||||
output_dir: data/dataset
|
||||
|
||||
training:
|
||||
#number of training samples being processed together before model updates its weights
|
||||
|
@ -40,5 +35,6 @@ inference:
|
|||
num_classes: 4
|
||||
|
||||
|
||||
|
||||
app:
|
||||
build_dir: dist
|
|
@ -12,13 +12,11 @@ class GeneralConfig:
|
|||
|
||||
@dataclass
|
||||
class DataSetConfig:
|
||||
input_dir: str
|
||||
num_slices: int
|
||||
train_split: float
|
||||
seed: int
|
||||
modulation_types: list
|
||||
val_split: float
|
||||
output_dir: str
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from utils.data import Recording
|
||||
import numpy as np
|
||||
from utils.signal import block_generator
|
||||
import argparse
|
||||
|
||||
mods = {
|
||||
"bpsk": {"num_bits_per_symbol": 1, "constellation_type": "psk"},
|
||||
|
@ -10,7 +11,7 @@ mods = {
|
|||
}
|
||||
|
||||
|
||||
def generate_modulated_signals():
|
||||
def generate_modulated_signals(output_dir):
|
||||
for modulation in ["bpsk", "qpsk", "qam16", "qam64"]:
|
||||
for snr in np.arange(-6, 13, 3):
|
||||
for i in range(100):
|
||||
|
@ -66,4 +67,11 @@ def generate_modulated_signals():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_modulated_signals()
|
||||
p = argparse.ArgumentParser(description="Generate modulated signal .npy files")
|
||||
p.add_argument(
|
||||
"--output-dir",
|
||||
default=".",
|
||||
help="Folder where .npy files will be saved"
|
||||
)
|
||||
args = p.parse_args()
|
||||
generate_modulated_signals(args.output_dir)
|
||||
|
|
|
@ -98,18 +98,18 @@ def generate_datasets(cfg):
|
|||
dset (h5py.Dataset): The created dataset object
|
||||
"""
|
||||
|
||||
parent = os.path.dirname(cfg.output_dir)
|
||||
parent = os.path.dirname("data/dataset")
|
||||
if not parent:
|
||||
os.makedirs(cfg.output_dir, exist_ok=True)
|
||||
os.makedirs("data/dataset", exist_ok=True)
|
||||
|
||||
# we assume the recordings are in .npy format
|
||||
files = os.listdir(cfg.input_dir)
|
||||
files = os.listdir("data/recordings")
|
||||
if not files:
|
||||
raise ValueError("No files found in the specified directory.")
|
||||
|
||||
records = []
|
||||
for fname in files:
|
||||
rec = from_npy(os.path.join(cfg.input_dir, fname))
|
||||
rec = from_npy(os.path.join("data/recordings", fname))
|
||||
|
||||
data = rec.data # here data is a numpy array with the shape (1, N)
|
||||
|
||||
|
@ -125,8 +125,8 @@ def generate_datasets(cfg):
|
|||
|
||||
train_records, val_records = split(records, cfg.train_split, cfg.seed)
|
||||
|
||||
train_path = os.path.join(cfg.output_dir, "train.h5")
|
||||
val_path = os.path.join(cfg.output_dir, "val.h5")
|
||||
train_path = os.path.join("data/dataset", "train.h5")
|
||||
val_path = os.path.join("data/dataset", "val.h5")
|
||||
|
||||
write_hdf5_file(train_records, train_path, "training_data")
|
||||
write_hdf5_file(val_records, val_path, "validation_data")
|
||||
|
|
Loading…
Reference in New Issue
Block a user