From 91bab4acbd9adbb27cc4b033abc9463921dbd999 Mon Sep 17 00:00:00 2001 From: Liyu Xiao Date: Wed, 18 Jun 2025 13:44:29 -0400 Subject: [PATCH] fixed plot.py --- scripts/dataset_building/data_gen.py | 2 +- scripts/dataset_building/produce_dataset.py | 5 ++- scripts/dataset_building/split_dataset.py | 18 ++++---- scripts/onnx/convert_to_onnx.py | 12 ++--- scripts/onnx/profile_onnx.py | 13 ++++-- scripts/training/plot_data.py | 49 ++++++++++----------- scripts/training/train.py | 2 +- 7 files changed, 52 insertions(+), 49 deletions(-) diff --git a/scripts/dataset_building/data_gen.py b/scripts/dataset_building/data_gen.py index 43ccbb6..0fac831 100644 --- a/scripts/dataset_building/data_gen.py +++ b/scripts/dataset_building/data_gen.py @@ -15,7 +15,7 @@ def generate_modulated_signals(output_dir: str) -> None: adds AWGN noise, and saves the resulting samples as .npy files to the given output directory. The function uses modulation parameters defined in app.yaml and supports modulation types like - PSK and QAM through configurable constellation settings. The generated recordings are tagged + PSK and QAM through configurable constellation settings. The generated recordings are tagged with metadata such as modulation type, SNR, roll-off factor (beta), and samples-per-symbol (sps). Parameters: diff --git a/scripts/dataset_building/produce_dataset.py b/scripts/dataset_building/produce_dataset.py index c0156b0..b2e65c2 100644 --- a/scripts/dataset_building/produce_dataset.py +++ b/scripts/dataset_building/produce_dataset.py @@ -144,7 +144,9 @@ def main(): print("📦 Generating training and validation datasets...") print(f" ➤ Slicing each recording into {dataset_cfg.num_slices} snippets") - print(f" ➤ Train/Val split: {int(dataset_cfg.train_split * 100)}% / {int((1 - dataset_cfg.train_split) * 100)}%") + print( + f" ➤ Train/Val split: {int(dataset_cfg.train_split * 100)}% / {int((1 - dataset_cfg.train_split) * 100)}%" + ) print(f" ➤ Output directory: data/dataset\n") train_path, val_path = generate_datasets(dataset_cfg) @@ -160,6 +162,5 @@ def main(): print(f" 🔸 Validation samples saved to: {val_path} ({num_val} samples)") - if __name__ == "__main__": main() diff --git a/scripts/dataset_building/split_dataset.py b/scripts/dataset_building/split_dataset.py index 93c30ee..732c84b 100644 --- a/scripts/dataset_building/split_dataset.py +++ b/scripts/dataset_building/split_dataset.py @@ -3,12 +3,15 @@ from collections import defaultdict from typing import List, Tuple, Dict import numpy as np + def split( dataset: List[Tuple[np.ndarray, Dict[str, any]]], train_frac: float, seed: int, - label_key: str = "modulation" - ) -> Tuple[List[Tuple[np.ndarray, Dict[str, any]]], List[Tuple[np.ndarray, Dict[str, any]]]]: + label_key: str = "modulation", +) -> Tuple[ + List[Tuple[np.ndarray, Dict[str, any]]], List[Tuple[np.ndarray, Dict[str, any]]] +]: """ Splits a dataset of modulated IQ signal recordings into training and validation subsets. @@ -61,9 +64,8 @@ def split( def split_recording( - recording_list: List[Tuple[np.ndarray, Dict[str, any]]], - num_snippets: int - ) -> List[Tuple[np.ndarray, Dict[str, any]]]: + recording_list: List[Tuple[np.ndarray, Dict[str, any]]], num_snippets: int +) -> List[Tuple[np.ndarray, Dict[str, any]]]: """ Splits each full recording into a specified number of smaller snippets. @@ -75,13 +77,13 @@ def split_recording( array into `num_snippets` contiguous chunks of shape (2, N // num_snippets). Parameters: - recording_list (List[Tuple[np.ndarray, dict]]): + recording_list (List[Tuple[np.ndarray, dict]]): List of (data, metadata) tuples to be split. - num_snippets (int): + num_snippets (int): Number of equal-length segments to divide each recording into. Returns: - List[Tuple[np.ndarray, dict]]: + List[Tuple[np.ndarray, dict]]: A flat list containing all resulting (snippet, metadata) pairs. """ snippet_list = [] diff --git a/scripts/onnx/convert_to_onnx.py b/scripts/onnx/convert_to_onnx.py index 3dfe61a..7a3d209 100644 --- a/scripts/onnx/convert_to_onnx.py +++ b/scripts/onnx/convert_to_onnx.py @@ -7,11 +7,7 @@ from scripts.training.mobilenetv3 import mobilenetv3, RFClassifier from helpers.app_settings import get_app_settings - -def convert_to_onnx( - ckpt_path: str, - fp16: bool=False - ) -> None : +def convert_to_onnx(ckpt_path: str, fp16: bool = False) -> None: """ Convert a PyTorch model to ONNX format. @@ -35,11 +31,9 @@ def convert_to_onnx( in_chans=in_channels, ) ) - + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - checkpoint = torch.load( - ckpt_path, weights_only=True, map_location=device - ) + checkpoint = torch.load(ckpt_path, weights_only=True, map_location=device) model.load_state_dict(checkpoint["state_dict"]) if fp16: diff --git a/scripts/onnx/profile_onnx.py b/scripts/onnx/profile_onnx.py index 1966e8b..7f6ff04 100644 --- a/scripts/onnx/profile_onnx.py +++ b/scripts/onnx/profile_onnx.py @@ -5,7 +5,10 @@ import os import time import json -def profile_onnx_model(path_to_onnx: str, num_runs: int = 100, warmup_runs: int = 5) -> None: + +def profile_onnx_model( + path_to_onnx: str, num_runs: int = 100, warmup_runs: int = 5 +) -> None: """ Profiles an ONNX model by running inference multiple times and collecting performance data. @@ -58,7 +61,9 @@ def profile_onnx_model(path_to_onnx: str, num_runs: int = 100, warmup_runs: int times.append(t1 - t0) avg_time = sum(times) / len(times) - print(f"[Timing] Avg inference time (excluding {warmup_runs} warm-ups): {avg_time:.6f} sec") + print( + f"[Timing] Avg inference time (excluding {warmup_runs} warm-ups): {avg_time:.6f} sec" + ) # End profiling & parse JSON profile_file = session.end_profiling() @@ -71,7 +76,9 @@ def profile_onnx_model(path_to_onnx: str, num_runs: int = 100, warmup_runs: int print(f"[Profile] Number of nodes executed: {len(nodes)}") if nodes: top = max(nodes, key=lambda x: x.get("dur", 0)) - print(f"[Profile] Most expensive op: {top['name']} — {top['dur'] / 1e6:.3f} ms") + print( + f"[Profile] Most expensive op: {top['name']} — {top['dur'] / 1e6:.3f} ms" + ) except Exception as e: print(f"[Warning] Failed to parse profiling JSON: {e}") diff --git a/scripts/training/plot_data.py b/scripts/training/plot_data.py index 1d7f810..3579970 100644 --- a/scripts/training/plot_data.py +++ b/scripts/training/plot_data.py @@ -1,35 +1,33 @@ import os import torch import numpy as np -import h5py from sklearn.metrics import classification_report +import matplotlib from matplotlib import pyplot as plt from scripts.training.mobilenetv3 import mobilenetv3, RFClassifier from helpers.app_settings import get_app_settings -from cm_plotter import plot_confusion_matrix +from cm_plotter import plot_confusion_matrix +from scripts.training.modulation_dataset import ModulationH5Dataset -def load_validation_data(h5_path:str ="data/datasets/val.h5"): - """ - Loads validation data from an HDF5 file. +def load_validation_data(): + val_dataset = ModulationH5Dataset( + "data/dataset/val.h5", + label_name="modulation", + data_key="validation_data" + ) + + X = np.stack([x.numpy() for x, _ in val_dataset]) # shape: (N, C, L) + y = np.array([y.item() for _, y in val_dataset]) # shape: (N,) + class_names = list(val_dataset.label_encoder.classes_) - Returns: - X_val: np.ndarray of shape (N, C, L) - y_val: np.ndarray of shape (N,) - class_names: list of class names - """ - with h5py.File(h5_path, "r") as f: - X = f["X"][:] # shape: (N, C, L) - y = f["y"][:] # shape: (N,) - if "class_names" in f: - class_names = [s.decode("utf-8") for s in f["class_names"][:]] - else: - class_names = [str(i) for i in np.unique(y)] return X, y, class_names -def build_model_from_ckpt(ckpt_path: str, in_channels: int, num_classes: int) -> torch.nn.Module: +def build_model_from_ckpt( + ckpt_path: str, in_channels: int, num_classes: int +) -> torch.nn.Module: """ Build and return a PyTorch model loaded from a checkpoint. """ @@ -37,13 +35,11 @@ def build_model_from_ckpt(ckpt_path: str, in_channels: int, num_classes: int) -> model=mobilenetv3( model_size="mobilenetv3_small_050", num_classes=num_classes, - in_chans=in_channels + in_chans=in_channels, ) ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - checkpoint = torch.load( - ckpt_path, weights_only=True, map_location=device - ) + checkpoint = torch.load(ckpt_path, weights_only=True, map_location=device) model.load_state_dict(checkpoint["state_dict"]) model.eval() return model @@ -54,13 +50,16 @@ def evaluate_checkpoint(ckpt_path: str): Loads the model from checkpoint and evaluates it on a validation set. Prints classification metrics and plots a confusion matrix. """ + # Load validation data X_val, y_true, class_names = load_validation_data() num_classes = len(class_names) in_channels = X_val.shape[1] # Load model - model = build_model_from_ckpt(ckpt_path, in_channels=in_channels, num_classes=num_classes) + model = build_model_from_ckpt( + ckpt_path, in_channels=in_channels, num_classes=num_classes + ) # Inference y_pred = [] @@ -73,7 +72,7 @@ def evaluate_checkpoint(ckpt_path: str): # Print classification report print("\nClassification Report:") - print(classification_report(y_true, y_pred, target_names=class_names)) + print(classification_report(y_true, y_pred, target_names=class_names, zero_division=0)) # Plot confusion matrix plot_confusion_matrix( @@ -81,7 +80,7 @@ def evaluate_checkpoint(ckpt_path: str): y_pred=np.array(y_pred), classes=class_names, normalize=True, - title="Normalized Confusion Matrix" + title="Normalized Confusion Matrix", ) plt.show() diff --git a/scripts/training/train.py b/scripts/training/train.py index aa334ce..5d334ec 100644 --- a/scripts/training/train.py +++ b/scripts/training/train.py @@ -131,7 +131,7 @@ def train_model(): trainer = L.Trainer( max_epochs=epochs, callbacks=[checkpoint_callback, CustomProgressBar()], - accelerator="gpu", + accelerator="cpu", devices=1, benchmark=True, precision="16-mixed",