forked from qoherent/modrec-workflow
updated so now the data gen works
This commit is contained in:
parent
b49fa4c2b7
commit
e4542e76c4
|
@ -40,7 +40,7 @@ jobs:
|
||||||
- name: 1. Generate Recordings
|
- name: 1. Generate Recordings
|
||||||
run: |
|
run: |
|
||||||
mkdir -p data/recordings
|
mkdir -p data/recordings
|
||||||
PYTHONPATH=. python scripts/dataset_building/data_gen.py
|
PYTHONPATH=. python scripts/generate_modulated_signals.py --output-dir data/recordings
|
||||||
echo "recordings produced successfully"
|
echo "recordings produced successfully"
|
||||||
|
|
||||||
- name: Upload Recordings
|
- name: Upload Recordings
|
||||||
|
|
|
@ -3,9 +3,6 @@ general:
|
||||||
run_mode: prod
|
run_mode: prod
|
||||||
|
|
||||||
dataset:
|
dataset:
|
||||||
#where to read the recordings from to produce the data set
|
|
||||||
input_dir: data/recordings
|
|
||||||
|
|
||||||
#number of slices you want to split each recording into
|
#number of slices you want to split each recording into
|
||||||
num_slices: 8
|
num_slices: 8
|
||||||
|
|
||||||
|
@ -19,8 +16,6 @@ dataset:
|
||||||
#multiple modulations to contain in the dataset
|
#multiple modulations to contain in the dataset
|
||||||
modulation_types: [bpsk, qpsk, qam16, qam64]
|
modulation_types: [bpsk, qpsk, qam16, qam64]
|
||||||
|
|
||||||
#where to output the datasets
|
|
||||||
output_dir: data/dataset
|
|
||||||
|
|
||||||
training:
|
training:
|
||||||
#number of training samples being processed together before model updates its weights
|
#number of training samples being processed together before model updates its weights
|
||||||
|
@ -40,5 +35,6 @@ inference:
|
||||||
num_classes: 4
|
num_classes: 4
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
app:
|
app:
|
||||||
build_dir: dist
|
build_dir: dist
|
|
@ -12,13 +12,11 @@ class GeneralConfig:
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DataSetConfig:
|
class DataSetConfig:
|
||||||
input_dir: str
|
|
||||||
num_slices: int
|
num_slices: int
|
||||||
train_split: float
|
train_split: float
|
||||||
seed: int
|
seed: int
|
||||||
modulation_types: list
|
modulation_types: list
|
||||||
val_split: float
|
val_split: float
|
||||||
output_dir: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from utils.data import Recording
|
from utils.data import Recording
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from utils.signal import block_generator
|
from utils.signal import block_generator
|
||||||
|
import argparse
|
||||||
|
|
||||||
mods = {
|
mods = {
|
||||||
"bpsk": {"num_bits_per_symbol": 1, "constellation_type": "psk"},
|
"bpsk": {"num_bits_per_symbol": 1, "constellation_type": "psk"},
|
||||||
|
@ -10,7 +11,7 @@ mods = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def generate_modulated_signals():
|
def generate_modulated_signals(output_dir):
|
||||||
for modulation in ["bpsk", "qpsk", "qam16", "qam64"]:
|
for modulation in ["bpsk", "qpsk", "qam16", "qam64"]:
|
||||||
for snr in np.arange(-6, 13, 3):
|
for snr in np.arange(-6, 13, 3):
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
|
@ -66,4 +67,11 @@ def generate_modulated_signals():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
generate_modulated_signals()
|
p = argparse.ArgumentParser(description="Generate modulated signal .npy files")
|
||||||
|
p.add_argument(
|
||||||
|
"--output-dir",
|
||||||
|
default=".",
|
||||||
|
help="Folder where .npy files will be saved"
|
||||||
|
)
|
||||||
|
args = p.parse_args()
|
||||||
|
generate_modulated_signals(args.output_dir)
|
||||||
|
|
|
@ -98,18 +98,18 @@ def generate_datasets(cfg):
|
||||||
dset (h5py.Dataset): The created dataset object
|
dset (h5py.Dataset): The created dataset object
|
||||||
"""
|
"""
|
||||||
|
|
||||||
parent = os.path.dirname(cfg.output_dir)
|
parent = os.path.dirname("data/dataset")
|
||||||
if not parent:
|
if not parent:
|
||||||
os.makedirs(cfg.output_dir, exist_ok=True)
|
os.makedirs("data/dataset", exist_ok=True)
|
||||||
|
|
||||||
# we assume the recordings are in .npy format
|
# we assume the recordings are in .npy format
|
||||||
files = os.listdir(cfg.input_dir)
|
files = os.listdir("data/recordings")
|
||||||
if not files:
|
if not files:
|
||||||
raise ValueError("No files found in the specified directory.")
|
raise ValueError("No files found in the specified directory.")
|
||||||
|
|
||||||
records = []
|
records = []
|
||||||
for fname in files:
|
for fname in files:
|
||||||
rec = from_npy(os.path.join(cfg.input_dir, fname))
|
rec = from_npy(os.path.join("data/recordings", fname))
|
||||||
|
|
||||||
data = rec.data # here data is a numpy array with the shape (1, N)
|
data = rec.data # here data is a numpy array with the shape (1, N)
|
||||||
|
|
||||||
|
@ -125,8 +125,8 @@ def generate_datasets(cfg):
|
||||||
|
|
||||||
train_records, val_records = split(records, cfg.train_split, cfg.seed)
|
train_records, val_records = split(records, cfg.train_split, cfg.seed)
|
||||||
|
|
||||||
train_path = os.path.join(cfg.output_dir, "train.h5")
|
train_path = os.path.join("data/dataset", "train.h5")
|
||||||
val_path = os.path.join(cfg.output_dir, "val.h5")
|
val_path = os.path.join("data/dataset", "val.h5")
|
||||||
|
|
||||||
write_hdf5_file(train_records, train_path, "training_data")
|
write_hdf5_file(train_records, train_path, "training_data")
|
||||||
write_hdf5_file(val_records, val_path, "validation_data")
|
write_hdf5_file(val_records, val_path, "validation_data")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user