Populating initial source code from RIA Utils project
This commit is contained in:
parent
25e39d3544
commit
d8a7dc16be
8
src/ria_toolkit/adt/__init__.py
Normal file
8
src/ria_toolkit/adt/__init__.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
"""
|
||||||
|
The Data package contains abstract data types tailored for radio machine learning, such as ``Recording``, as well
|
||||||
|
as the abstract interfaces for the radio dataset and radio dataset builder framework.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = ["Annotation", "Recording"]
|
||||||
|
from .annotation import Annotation
|
||||||
|
from .recording import Recording
|
128
src/ria_toolkit/adt/annotation.py
Normal file
128
src/ria_toolkit/adt/annotation.py
Normal file
|
@ -0,0 +1,128 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from sigmf import SigMFFile
|
||||||
|
|
||||||
|
|
||||||
|
class Annotation:
|
||||||
|
"""Signal annotations are labels or additional information associated with specific data points or segments within
|
||||||
|
a signal. These annotations could be used for tasks like supervised learning, where the goal is to train a model
|
||||||
|
to recognize patterns or characteristics in the signal associated with these annotations.
|
||||||
|
|
||||||
|
Annotations can be used to label interesting points in your recording.
|
||||||
|
|
||||||
|
:param sample_start: The index of the starting sample of the annotation.
|
||||||
|
:type sample_start: int
|
||||||
|
:param sample_count: The index of the ending sample of the annotation, inclusive.
|
||||||
|
:type sample_count: int
|
||||||
|
:param freq_lower_edge: The lower frequency of the annotation.
|
||||||
|
:type freq_lower_edge: float
|
||||||
|
:param freq_upper_edge: The upper frequency of the annotation.
|
||||||
|
:type freq_upper_edge: float
|
||||||
|
:param label: The label that will be displayed with the bounding box in compatible viewers including IQEngine.
|
||||||
|
Defaults to an emtpy string.
|
||||||
|
:type label: str, optional
|
||||||
|
:param comment: A human-readable comment. Defaults to an empty string.
|
||||||
|
:type comment: str, optional
|
||||||
|
:param detail: A dictionary of user defined annotation-specific metadata. Defaults to None.
|
||||||
|
:type detail: dict, optional
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
sample_start: int,
|
||||||
|
sample_count: int,
|
||||||
|
freq_lower_edge: float,
|
||||||
|
freq_upper_edge: float,
|
||||||
|
label: Optional[str] = "",
|
||||||
|
comment: Optional[str] = "",
|
||||||
|
detail: Optional[dict] = None,
|
||||||
|
):
|
||||||
|
"""Initialize a new Annotation instance."""
|
||||||
|
self.sample_start = int(sample_start)
|
||||||
|
self.sample_count = int(sample_count)
|
||||||
|
self.freq_lower_edge = float(freq_lower_edge)
|
||||||
|
self.freq_upper_edge = float(freq_upper_edge)
|
||||||
|
self.label = str(label)
|
||||||
|
self.comment = str(comment)
|
||||||
|
|
||||||
|
if detail is None:
|
||||||
|
self.detail = {}
|
||||||
|
elif not _is_jsonable(detail):
|
||||||
|
raise ValueError(f"Detail object is not json serializable: {detail}")
|
||||||
|
else:
|
||||||
|
self.detail = detail
|
||||||
|
|
||||||
|
def is_valid(self) -> bool:
|
||||||
|
"""
|
||||||
|
Check that the annotation sample count is > 0 and the freq_lower_edge<freq_upper_edge.
|
||||||
|
|
||||||
|
:returns: True if valid, False if not.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.sample_count > 0 and self.freq_lower_edge < self.freq_upper_edge
|
||||||
|
|
||||||
|
def overlap(self, other):
|
||||||
|
"""
|
||||||
|
Quantify how much the bounding box in this annotation overlaps with another annotation.
|
||||||
|
|
||||||
|
:param other: The other annotation.
|
||||||
|
:type other: Annotation
|
||||||
|
|
||||||
|
:returns: The area of the overlap in samples*frequency, or 0 if they do not overlap."""
|
||||||
|
|
||||||
|
sample_overlap_start = max(self.sample_start, other.sample_start)
|
||||||
|
sample_overlap_end = min(self.sample_start + self.sample_count, other.sample_start + other.sample_count)
|
||||||
|
|
||||||
|
freq_overlap_start = max(self.freq_lower_edge, other.freq_lower_edge)
|
||||||
|
freq_overlap_end = min(self.freq_upper_edge, other.freq_upper_edge)
|
||||||
|
|
||||||
|
if freq_overlap_start >= freq_overlap_end or sample_overlap_start >= sample_overlap_end:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
return (sample_overlap_end - sample_overlap_start) * (freq_overlap_end - freq_overlap_start)
|
||||||
|
|
||||||
|
def area(self):
|
||||||
|
"""
|
||||||
|
The 'area' of the bounding box, samples*frequency.
|
||||||
|
Useful to quantify annotation size.
|
||||||
|
|
||||||
|
:returns: sample length multiplied by bandwidth."""
|
||||||
|
|
||||||
|
return self.sample_count * (self.freq_upper_edge - self.freq_lower_edge)
|
||||||
|
|
||||||
|
def __eq__(self, other: Annotation) -> bool:
|
||||||
|
return self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def to_sigmf_format(self):
|
||||||
|
"""
|
||||||
|
Returns a JSON dictionary representing this annotation formatted to be saved in a .sigmf-meta file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
annotation_dict = {SigMFFile.START_INDEX_KEY: self.sample_start, SigMFFile.LENGTH_INDEX_KEY: self.sample_count}
|
||||||
|
|
||||||
|
annotation_dict["metadata"] = {
|
||||||
|
SigMFFile.LABEL_KEY: self.label,
|
||||||
|
SigMFFile.COMMENT_KEY: self.comment,
|
||||||
|
SigMFFile.FHI_KEY: self.freq_upper_edge,
|
||||||
|
SigMFFile.FLO_KEY: self.freq_lower_edge,
|
||||||
|
"ria:detail": self.detail,
|
||||||
|
}
|
||||||
|
|
||||||
|
if _is_jsonable(annotation_dict):
|
||||||
|
return annotation_dict
|
||||||
|
else:
|
||||||
|
raise ValueError("Annotation dictionary was not json serializable.")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_jsonable(x: Any) -> bool:
|
||||||
|
"""
|
||||||
|
:return: True if x is JSON serializable, False otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
json.dumps(x)
|
||||||
|
return True
|
||||||
|
except (TypeError, OverflowError):
|
||||||
|
return False
|
12
src/ria_toolkit/adt/datasets/__init__.py
Normal file
12
src/ria_toolkit/adt/datasets/__init__.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
"""
|
||||||
|
The Radio Dataset Subpackage defines the abstract interfaces and framework components for the management of machine
|
||||||
|
learning datasets tailored for radio signal processing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = ["RadioDataset", "IQDataset", "SpectDataset", "DatasetBuilder", "split", "random_split"]
|
||||||
|
|
||||||
|
from .dataset_builder import DatasetBuilder
|
||||||
|
from .iq_dataset import IQDataset
|
||||||
|
from .radio_dataset import RadioDataset
|
||||||
|
from .spect_dataset import SpectDataset
|
||||||
|
from .split import random_split, split
|
137
src/ria_toolkit/adt/datasets/dataset_builder.py
Normal file
137
src/ria_toolkit/adt/datasets/dataset_builder.py
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
"""
|
||||||
|
A `DatasetBuilder` is a creator class that manages the download, preparation, and creation of radio datasets.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from packaging.version import Version
|
||||||
|
|
||||||
|
from utils._utils.abstract_attribute import abstract_attribute
|
||||||
|
from utils.data.datasets.license.dataset_license import DatasetLicense
|
||||||
|
from utils.data.datasets.radio_dataset import RadioDataset
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetBuilder(ABC):
|
||||||
|
"""Abstract interface for radio dataset builders. These builder produce radio datasets for common and project
|
||||||
|
datasets related to radio science.
|
||||||
|
|
||||||
|
This class should not be instantiated directly. Instead, subclass it to define specific builders for different
|
||||||
|
datasets.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_url: str = abstract_attribute()
|
||||||
|
_SHA256: str # SHA256 checksum.
|
||||||
|
_name: str = abstract_attribute()
|
||||||
|
_author: str = abstract_attribute()
|
||||||
|
_license: DatasetLicense = abstract_attribute()
|
||||||
|
_version: Version = abstract_attribute()
|
||||||
|
_latest_version: Version = None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
"""
|
||||||
|
:return: The name of the dataset.
|
||||||
|
:type: str
|
||||||
|
"""
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def author(self) -> str:
|
||||||
|
"""
|
||||||
|
:return: The author of the dataset.
|
||||||
|
:type: str
|
||||||
|
"""
|
||||||
|
return self._author
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self) -> str:
|
||||||
|
"""
|
||||||
|
:return: The URL where the dataset was accessed.
|
||||||
|
:type: str
|
||||||
|
"""
|
||||||
|
return self._url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sha256(self) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
:return: The SHA256 checksum, or None if not set.
|
||||||
|
:type: str
|
||||||
|
"""
|
||||||
|
return self._SHA256
|
||||||
|
|
||||||
|
@property
|
||||||
|
def md5(self) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
:return: The MD5 checksum, or None if not set.
|
||||||
|
:type: str
|
||||||
|
"""
|
||||||
|
return self._MD5
|
||||||
|
|
||||||
|
@property
|
||||||
|
def version(self) -> Version:
|
||||||
|
"""
|
||||||
|
:return: The version identifier of the dataset.
|
||||||
|
:type: Version Identifier
|
||||||
|
"""
|
||||||
|
return self._version
|
||||||
|
|
||||||
|
@property
|
||||||
|
def latest_version(self) -> Optional[Version]:
|
||||||
|
"""
|
||||||
|
:return: The version identifier of the latest available version of the dataset, or None if not set.
|
||||||
|
:type: Version Identifier or None
|
||||||
|
"""
|
||||||
|
return self._latest_version
|
||||||
|
|
||||||
|
@property
|
||||||
|
def license(self) -> DatasetLicense:
|
||||||
|
"""
|
||||||
|
:return: The dataset license information.
|
||||||
|
:type: DatasetLicense
|
||||||
|
"""
|
||||||
|
return self._license
|
||||||
|
|
||||||
|
@property
|
||||||
|
def info(self) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
:return: Information about the dataset including the name, author, and version of the dataset.
|
||||||
|
:rtype: dict
|
||||||
|
"""
|
||||||
|
# TODO: We should increase the amount of information that's included here. See the information included in
|
||||||
|
# tdfs.core.DatasetInfo for more: https://www.tensorflow.org/datasets/api_docs/python/tfds/core/DatasetInfo.
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"author": self.author,
|
||||||
|
"url": self.url,
|
||||||
|
"sha256": self.sha256,
|
||||||
|
"md5": self.md5,
|
||||||
|
"version": self.version,
|
||||||
|
"license": self.license,
|
||||||
|
"latest_version": self.latest_version,
|
||||||
|
}
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def download_and_prepare(self) -> None:
|
||||||
|
"""Download and prepare the dataset for use as an HDF5 source file.
|
||||||
|
|
||||||
|
Once an HDF5 source file has been prepared, the downloaded files are deleted.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def as_dataset(self, backend: str) -> RadioDataset:
|
||||||
|
"""A factory method to manage the creation of radio datasets.
|
||||||
|
|
||||||
|
:param backend: Backend framework to use ("pytorch" or "tensorflow").
|
||||||
|
:type backend: str
|
||||||
|
|
||||||
|
Note: Depending on your installation, not all backends may be available.
|
||||||
|
|
||||||
|
:return: A new RadioDataset based on the signal representation and specified backend.
|
||||||
|
:type: RadioDataset
|
||||||
|
"""
|
||||||
|
pass
|
221
src/ria_toolkit/adt/datasets/h5helpers.py
Normal file
221
src/ria_toolkit/adt/datasets/h5helpers.py
Normal file
|
@ -0,0 +1,221 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
import h5py
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def copy_dataset_entry_by_index(
|
||||||
|
source: str | os.PathLike, destination: str | os.PathLike, dataset_path: str, idx: int
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Copies an entry from a dataset based on an index from the source HDF5 file to the destination HDF5 file.
|
||||||
|
|
||||||
|
:param source: The name of the original HDF5 file.
|
||||||
|
:type source: str
|
||||||
|
:param destination: The name of the new HDF5 file.
|
||||||
|
:type destination: str
|
||||||
|
:param dataset_path: The path of the dataset from the root of the file.
|
||||||
|
:type dataset_path: str
|
||||||
|
:param idx: The index of the specified example.
|
||||||
|
:type idx: int
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
# TODO: Generalize so that source and destination can be file objects or strings
|
||||||
|
with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
|
||||||
|
original_ds = original_file[dataset_path]
|
||||||
|
|
||||||
|
entry = original_ds[idx]
|
||||||
|
new_ds = new_file[dataset_path]
|
||||||
|
new_ds.resize(new_ds.shape[0] + 1, axis=0)
|
||||||
|
new_ds[-1] = entry
|
||||||
|
|
||||||
|
|
||||||
|
def copy_over_example(source: str | os.PathLike, destination: str | os.PathLike, idx: int) -> None:
|
||||||
|
"""
|
||||||
|
Copies over an example and it's corresponding metadata located at the given index to a new file.
|
||||||
|
It appends the new example to the end of the new file.
|
||||||
|
|
||||||
|
:param source: The name of the original HDF5 file.
|
||||||
|
:type source: str or os.PathLike
|
||||||
|
:param destination: The name of the new HDF5 file.
|
||||||
|
:type destination: str or os.PathLike
|
||||||
|
:param idx: The index of the example within the dataset.
|
||||||
|
:type idx: int
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
|
||||||
|
with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
|
||||||
|
ds, md = original_file["data"], original_file["metadata/metadata"]
|
||||||
|
|
||||||
|
new_ds, new_md = new_file["data"], new_file["metadata/metadata"]
|
||||||
|
|
||||||
|
new_ds.resize(new_ds.shape[0] + 1, axis=0)
|
||||||
|
new_md.resize(new_md.shape[0] + 1, axis=0)
|
||||||
|
|
||||||
|
new_ds[-1], new_md[-1] = ds[idx], md[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def append_entry_inplace(source: str | os.PathLike, dataset_path: str, entry: np.ndarray) -> None:
|
||||||
|
"""
|
||||||
|
Appends an entry to the specified dataset of the source HDF5 file. This operation is done inplace.
|
||||||
|
|
||||||
|
:param source: The name of the source HDF5 file.
|
||||||
|
:type source: str or os.PathLike
|
||||||
|
:param dataset_path: The path of the dataset from the root of the file.
|
||||||
|
:type dataset_path: str
|
||||||
|
:param entry: The entry that is being copied.
|
||||||
|
:type entry: np.ndarray
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
# TODO: Generalize so that source can be file object or string
|
||||||
|
with h5py.File(source, "a") as new_file:
|
||||||
|
new_ds = new_file[dataset_path]
|
||||||
|
new_ds.resize(new_ds.shape[0] + 1, axis=0)
|
||||||
|
new_ds[-1] = entry
|
||||||
|
|
||||||
|
|
||||||
|
def duplicate_entry_inplace(source: str | os.PathLike, dataset_path: str, idx: int) -> None:
|
||||||
|
"""
|
||||||
|
Appends the entry at index to the end of the dataset. This operation is done inplace.
|
||||||
|
|
||||||
|
:param source: The name of the source HDF5 file.
|
||||||
|
:type source: str or os.PathLike
|
||||||
|
:param dataset_path: The path of the dataset from the root of the file. This dataset is usually
|
||||||
|
'data' or 'metadata/metadata'.
|
||||||
|
:type dataset_path: str
|
||||||
|
:param idx: The index of the example within the dataset.
|
||||||
|
:type idx: int
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
# This function appends to dataset, so upon dataset creation, chunks has to = True and max_size has to = None
|
||||||
|
with h5py.File(source, "a") as f:
|
||||||
|
ds = f[dataset_path]
|
||||||
|
entry = ds[idx]
|
||||||
|
ds.resize(ds.shape[0] + 1, axis=0)
|
||||||
|
ds[-1] = entry
|
||||||
|
|
||||||
|
|
||||||
|
def copy_file(original_source: str | os.PathLike, new_source: str | os.PathLike) -> None:
|
||||||
|
"""Copies contents of source HDF5 file to a new HDF5 file.
|
||||||
|
|
||||||
|
:param original_source: The name of the original HDF5 source file.
|
||||||
|
:type original_source: str or os.PathLike
|
||||||
|
:param new_source: The copy of the HDF5 source file.
|
||||||
|
:type new_source: str or os.PathLike
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
original_file = h5py.File(original_source, "r")
|
||||||
|
|
||||||
|
with h5py.File(new_source, "w") as new_file:
|
||||||
|
for key in original_file.keys():
|
||||||
|
original_file.copy(key, new_file)
|
||||||
|
|
||||||
|
original_file.close()
|
||||||
|
|
||||||
|
|
||||||
|
def make_empty_clone(original_source: str | os.PathLike, new_source: str | os.PathLike, example_length: int) -> None:
|
||||||
|
"""Creates a new HDF5 file with the same structure but will leave metadata and dataset empty for operations.
|
||||||
|
|
||||||
|
:param original_source: The name of the original HDF5 source file.
|
||||||
|
:type original_source: str or os.PathLike
|
||||||
|
:param new_source: The name of the new HDF5 source file.
|
||||||
|
:type new_source: str or os.PathLike
|
||||||
|
:param example_length: The desired length of an example in the new file.
|
||||||
|
:type example_length: int
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
|
||||||
|
with h5py.File(new_source, "w") as new_file, h5py.File(original_source, "r") as original_file:
|
||||||
|
for key in original_file.keys():
|
||||||
|
if key == "data":
|
||||||
|
ds = original_file["data"]
|
||||||
|
channels = ds.shape[1]
|
||||||
|
new_file.create_dataset(
|
||||||
|
"data",
|
||||||
|
shape=(0, channels, example_length),
|
||||||
|
chunks=True,
|
||||||
|
maxshape=(None, None, None),
|
||||||
|
dtype=original_file["data"].dtype,
|
||||||
|
)
|
||||||
|
elif key == "metadata":
|
||||||
|
new_metadata_group = new_file.create_group("metadata")
|
||||||
|
new_metadata_group.create_dataset(
|
||||||
|
"metadata",
|
||||||
|
shape=(0,),
|
||||||
|
chunks=True,
|
||||||
|
maxshape=(None,),
|
||||||
|
dtype=original_file["metadata/metadata"].dtype,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
original_file.copy(key, new_file)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_example_inplace(source: str | os.PathLike, idx: int) -> None:
|
||||||
|
"""Deletes an example and it's corresponding metadata located at the given index.
|
||||||
|
This deletion is done by creating a temporary dataset and copying all contents
|
||||||
|
to the temporary dataset except for the example at idx. This operation is inplace.
|
||||||
|
|
||||||
|
:param source: The name of the source HDF5 file.
|
||||||
|
:type source: str or os.PathLike
|
||||||
|
:param idx: The index of the example and metadata to be deleted.
|
||||||
|
:type idx: int
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
|
||||||
|
with h5py.File(source, "a") as f:
|
||||||
|
ds, md = f["data"], f["metadata/metadata"]
|
||||||
|
m, c, n = ds.shape
|
||||||
|
assert 0 <= idx <= m - 1
|
||||||
|
assert len(ds) == len(md)
|
||||||
|
|
||||||
|
new_ds = f.create_dataset(
|
||||||
|
"data.temp",
|
||||||
|
shape=(m - 1, c, n),
|
||||||
|
chunks=True,
|
||||||
|
dtype=ds.dtype,
|
||||||
|
maxshape=(None, None, None), # Required to allow future mutations which expand the shape
|
||||||
|
)
|
||||||
|
new_md = f.create_dataset(
|
||||||
|
"metadata/metadata.temp", shape=len(md) - 1, chunks=True, dtype=md.dtype, maxshape=(None,)
|
||||||
|
)
|
||||||
|
|
||||||
|
for row in range(idx):
|
||||||
|
new_ds[row], new_md[row] = ds[row], md[row]
|
||||||
|
|
||||||
|
for row in range(idx + 1, len(md)):
|
||||||
|
new_ds[row - 1], new_md[row - 1] = ds[row], md[row]
|
||||||
|
|
||||||
|
del f["data"]
|
||||||
|
del f["metadata/metadata"]
|
||||||
|
|
||||||
|
f.move("data.temp", "data")
|
||||||
|
f.move("metadata/metadata.temp", "metadata/metadata")
|
||||||
|
|
||||||
|
|
||||||
|
def overwrite_file(source: str | os.PathLike, new_data: np.ndarray) -> None:
|
||||||
|
"""
|
||||||
|
Overwrites data in an HDF5 file with new data.
|
||||||
|
|
||||||
|
:param source: The copy of the HDF5 source file.
|
||||||
|
:type source: str or os.PathLike
|
||||||
|
:param new_data: The updated copy of the data that should be stored.
|
||||||
|
:type new_data: np.ndarray
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO: Might need to pass in dataset_path instead of datastet_name depending on file structure
|
||||||
|
# Update copy to include augmented data
|
||||||
|
|
||||||
|
with h5py.File(source, "r+") as f:
|
||||||
|
ds_name = tuple(f.keys())[0]
|
||||||
|
del f[ds_name]
|
||||||
|
f.create_dataset(ds_name, data=new_data)
|
||||||
|
f.close()
|
210
src/ria_toolkit/adt/datasets/iq_dataset.py
Normal file
210
src/ria_toolkit/adt/datasets/iq_dataset.py
Normal file
|
@ -0,0 +1,210 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from abc import ABC
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import h5py
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from utils.data.datasets.h5helpers import (
|
||||||
|
append_entry_inplace,
|
||||||
|
copy_dataset_entry_by_index,
|
||||||
|
)
|
||||||
|
from utils.data.datasets.radio_dataset import RadioDataset
|
||||||
|
|
||||||
|
|
||||||
|
class IQDataset(RadioDataset, ABC):
|
||||||
|
"""An ``IQDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
|
||||||
|
radiofrequency (RF) signals represented as In-phase (I) and Quadrature (Q) samples.
|
||||||
|
|
||||||
|
For machine learning tasks that involve processing spectrograms, please use
|
||||||
|
utils.data.datasets.SpectDataset instead.
|
||||||
|
|
||||||
|
This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
|
||||||
|
should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
|
||||||
|
learning backends.
|
||||||
|
|
||||||
|
:param source: Path to the dataset source file. For more information on dataset source files
|
||||||
|
and their format, see :doc:`radio_datasets`.
|
||||||
|
:type source: str or os.PathLike
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, source: str | os.PathLike):
|
||||||
|
"""Create a new IQDataset."""
|
||||||
|
super().__init__(source=source)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self) -> tuple[int]:
|
||||||
|
"""IQ datasets are M x C x N, where M is the number of examples, C is the number of channels, N is the length
|
||||||
|
of the signals.
|
||||||
|
|
||||||
|
:return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
|
||||||
|
dataset dimensions.
|
||||||
|
:type: tuple of ints
|
||||||
|
"""
|
||||||
|
return super().shape
|
||||||
|
|
||||||
|
def trim_examples(
|
||||||
|
self, trim_length: int, keep: Optional[str] = "start", inplace: Optional[bool] = False
|
||||||
|
) -> IQDataset | None:
|
||||||
|
"""Trims all examples in a dataset to a desired length.
|
||||||
|
|
||||||
|
:param trim_length: The desired length of the trimmed examples.
|
||||||
|
:type trim_length: int
|
||||||
|
:param keep: Specifies the part of the example to keep. Defaults to "start".
|
||||||
|
The options are:
|
||||||
|
- "start"
|
||||||
|
- "end"
|
||||||
|
- "middle"
|
||||||
|
- "random"
|
||||||
|
:type keep: str, optional
|
||||||
|
:param inplace: If True, the operation modifies the existing source file directly and returns None.
|
||||||
|
If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
|
||||||
|
dataset unchanged. Default is False.
|
||||||
|
:type inplace: bool
|
||||||
|
|
||||||
|
:raises ValueError: If trim_length is greater than or equal to the length of the examples.
|
||||||
|
:raises ValueError: If value of keep is not recognized.
|
||||||
|
:raises ValueError: If specified trim length is invalid for middle index.
|
||||||
|
|
||||||
|
:return: The dataset that is composed of shorter examples.
|
||||||
|
:rtype: IQDataset
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
>>> from ria.dataset_manager.builders import AWGN_Builder()
|
||||||
|
>>> builder = AWGN_Builder()
|
||||||
|
>>> builder.download_and_prepare()
|
||||||
|
>>> ds = builder.as_dataset()
|
||||||
|
>>> ds.shape
|
||||||
|
(5, 1, 3)
|
||||||
|
>>> new_ds = ds.trim_examples(2)
|
||||||
|
>>> new_ds.shape
|
||||||
|
(5, 1, 2)
|
||||||
|
"""
|
||||||
|
|
||||||
|
keep = keep.lower()
|
||||||
|
|
||||||
|
channels, example_length = np.shape(self[0])
|
||||||
|
|
||||||
|
if trim_length >= example_length:
|
||||||
|
raise ValueError(f"Trim length must be less than {example_length}")
|
||||||
|
|
||||||
|
if keep not in {"start", "end", "middle", "random"}:
|
||||||
|
raise ValueError('keep must be "start", "end", "middle", or "random"')
|
||||||
|
|
||||||
|
start = None
|
||||||
|
if keep == "middle":
|
||||||
|
start = int(example_length / 2)
|
||||||
|
if start + trim_length > example_length:
|
||||||
|
raise ValueError(f"Trim length of {trim_length} is invalid for middle index of: {start} ")
|
||||||
|
|
||||||
|
elif keep == "random":
|
||||||
|
start = np.random.randint(0, example_length - trim_length + 1)
|
||||||
|
|
||||||
|
if not inplace:
|
||||||
|
ds = self._create_next_dataset(example_length=trim_length)
|
||||||
|
|
||||||
|
with h5py.File(self.source, "a") as f:
|
||||||
|
data = f["data"]
|
||||||
|
for idx in range(len(self)):
|
||||||
|
|
||||||
|
trimmed_example = generate_trimmed_example(
|
||||||
|
example=data[idx],
|
||||||
|
keep=keep,
|
||||||
|
trim_length=trim_length,
|
||||||
|
start=start,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not inplace:
|
||||||
|
append_entry_inplace(source=ds.source, dataset_path="data", entry=trimmed_example)
|
||||||
|
copy_dataset_entry_by_index(
|
||||||
|
source=self.source, destination=ds.source, dataset_path="metadata/metadata", idx=idx
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
trimmed_example = np.pad(
|
||||||
|
trimmed_example, ((0, 0), (0, example_length - trim_length)), "constant", constant_values=0
|
||||||
|
)
|
||||||
|
data[idx] = trimmed_example
|
||||||
|
|
||||||
|
if not inplace:
|
||||||
|
return ds
|
||||||
|
else:
|
||||||
|
data.resize(trim_length, axis=2)
|
||||||
|
|
||||||
|
def split_examples(
|
||||||
|
self, split_factor: Optional[int] = None, example_length: Optional[int] = None, inplace: Optional[bool] = False
|
||||||
|
) -> IQDataset | None:
|
||||||
|
"""If the current example length is not evenly divisible by the provided example_length, excess samples are
|
||||||
|
discarded. Excess examples are always at the end of the slice. If the split factor results in non-integer
|
||||||
|
example lengths for the new example chunks, it rounds down.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
|
||||||
|
Requires either split_factor or example_length to be specified but not both. If both are provided,
|
||||||
|
split factor will be used by default, and a warning will be raised.
|
||||||
|
|
||||||
|
:param split_factor: the number of new example chunks produced from each original example, defaults to None.
|
||||||
|
:type split_factor: int, optional
|
||||||
|
:param example_length: the example length of the new example chunks, defaults to None.
|
||||||
|
:type example_length: int, optional
|
||||||
|
:param inplace: If True, the operation modifies the existing source file directly and returns None.
|
||||||
|
If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
|
||||||
|
dataset unchanged. Default is False.
|
||||||
|
:type inplace: bool, optional
|
||||||
|
|
||||||
|
:return: A dataset with more examples that are shorter.
|
||||||
|
:rtype: IQDataset
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
If the dataset has 100 examples of length 1024 and the split factor is 2, the resulting dataset
|
||||||
|
will have 200 examples of 512. No samples have been discarded.
|
||||||
|
|
||||||
|
If the example dataset has 100 examples of length 1024 and the example length is 100, the resulting dataset
|
||||||
|
will have 1000 examples of length 100. The remaining 24 samples from each example have been discarded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if split_factor is not None and example_length is not None:
|
||||||
|
# Raise warning and use split factor
|
||||||
|
raise Warning("split_factor and example_length should not both be specified.")
|
||||||
|
|
||||||
|
if not inplace:
|
||||||
|
# ds = self.create_new_dataset(example_length=example_length)
|
||||||
|
pass
|
||||||
|
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
def generate_trimmed_example(
|
||||||
|
example: np.ndarray, keep: str, trim_length: int, start: Optional[int] = None
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Takes in an IQ example as input and returns a trimmed example.
|
||||||
|
|
||||||
|
:param example: The example to be trimmed.
|
||||||
|
:type example: np.ndarray
|
||||||
|
:param keep: The position the trimming occurs from.
|
||||||
|
:type keep: str
|
||||||
|
:param trim_length: The desired length of the trimmed example:
|
||||||
|
:type trim_length: int
|
||||||
|
:param start: The starting index if keep = "middle" or "random"
|
||||||
|
:type start: int, optional
|
||||||
|
|
||||||
|
:return: The trimmed example
|
||||||
|
:rtype: np.ndarray
|
||||||
|
"""
|
||||||
|
|
||||||
|
if keep == "start":
|
||||||
|
return example[:, :trim_length]
|
||||||
|
|
||||||
|
elif keep == "end":
|
||||||
|
return example[:, -trim_length:]
|
||||||
|
|
||||||
|
elif keep == "middle":
|
||||||
|
return example[:, start : start + trim_length]
|
||||||
|
|
||||||
|
else:
|
||||||
|
return example[:, start : start + trim_length]
|
211
src/ria_toolkit/adt/datasets/license/__init__.py
Normal file
211
src/ria_toolkit/adt/datasets/license/__init__.py
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
"""
|
||||||
|
This package contains the ``DatasetLicense`` class and a bunch of off-the-shelf implementations for several common
|
||||||
|
license types.
|
||||||
|
|
||||||
|
Common license types for datasets courtesy of the University of Calgary:
|
||||||
|
`Common license types for datasets and what they mean <https://libanswers.ucalgary.ca/faq/200582>`_
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
License descriptions are provided for informational purposes only and should not be construed as legal advice.
|
||||||
|
For legal guidance, please refer to official licence documentation and consult with legal professionals specializing
|
||||||
|
in software and dataset licensing.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
When licensing datasets, it's recommended to use licenses specifically designed for data, rather than using
|
||||||
|
software licenses such as MIT, Apache, or GPL.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"DatasetLicense",
|
||||||
|
"PUBLIC_DOMAIN",
|
||||||
|
"CC_0",
|
||||||
|
"CC_BY",
|
||||||
|
"CC_BY_NC",
|
||||||
|
"CC_BY_NC_ND",
|
||||||
|
"CC_BY_NC_SA",
|
||||||
|
"CC_BY_ND",
|
||||||
|
"CC_BY_SA",
|
||||||
|
"ODC_BY",
|
||||||
|
"ODC_PDDL",
|
||||||
|
"ODC_ODbL",
|
||||||
|
"RESTRICTED",
|
||||||
|
]
|
||||||
|
|
||||||
|
from .dataset_license import DatasetLicense
|
||||||
|
|
||||||
|
PUBLIC_DOMAIN = DatasetLicense(
|
||||||
|
name="Public Domain (No License)",
|
||||||
|
identifier=None,
|
||||||
|
description="Technically not a license, the public domain mark relinquishes all rights to a dataset and "
|
||||||
|
"dedicates the dataset to the public domain.",
|
||||||
|
licence="https://creativecommons.org/public-domain/pdm/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Public Domain <https://creativecommons.org/public-domain/pdm/>`_: Technically not a license, the public domain mark
|
||||||
|
relinquishes all rights to a dataset and dedicates the dataset to the public domain.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CC_0 = DatasetLicense(
|
||||||
|
name="Creative Commons Public Domain Dedication",
|
||||||
|
identifier="CC0-1.0",
|
||||||
|
description="A Creative Commons license and is like a public domain dedication. The copyright holder "
|
||||||
|
"surrenders rights in a dataset using this license.",
|
||||||
|
licence="https://creativecommons.org/publicdomain/zero/1.0/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Creative Commons Public Domain Dedication <https://creativecommons.org/public-domain/pdm/>`_: A Creative Commons
|
||||||
|
license and is like a public domain dedication. The copyright holder surrenders rights in a dataset using this license.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
ODC_PDDL = DatasetLicense(
|
||||||
|
name="Open Data Commons Public Domain Dedication and License",
|
||||||
|
identifier="PDDL-1.0",
|
||||||
|
description="This license is one of the Open Data Commons licenses and is like a public domain dedication. "
|
||||||
|
"The copyright holder surrenders rights in a dataset using this license.",
|
||||||
|
licence="https://opendatacommons.org/licenses/pddl/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Open Data Commons Public Domain Dedication and License <https://opendatacommons.org/licenses/pddl/>`_: This license
|
||||||
|
is one of the Open Data Commons licenses and is like a public domain dedication. The copyright holder surrenders rights
|
||||||
|
in a dataset using this license.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CC_BY = DatasetLicense(
|
||||||
|
name="Creative Commons Attribution 4.0 International",
|
||||||
|
identifier="CC-BY-4.0",
|
||||||
|
description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
|
||||||
|
"the dataset so long as they give credit to the copyright holder.",
|
||||||
|
licence="https://creativecommons.org/licenses/by/4.0/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Creative Commons Attribution 4.0 International <https://creativecommons.org/licenses/by/4.0/>`_: This license is one
|
||||||
|
of the open Creative Commons licenses and allows users to share and adapt the dataset so long as they give credit to
|
||||||
|
the copyright holder.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
ODC_BY = DatasetLicense(
|
||||||
|
name="Open Data Commons Attribution License",
|
||||||
|
identifier="ODC-By-1.0",
|
||||||
|
description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
|
||||||
|
"dataset as long as they give credit to the copyright holder.",
|
||||||
|
licence="https://opendatacommons.org/licenses/by/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Open Data Commons Attribution License <https://opendatacommons.org/licenses/by/>`_: This license is one of the Open
|
||||||
|
Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
|
||||||
|
holder.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CC_BY_SA = DatasetLicense(
|
||||||
|
name="Creative Commons Attribution-ShareAlike 4.0 International",
|
||||||
|
identifier="CC-BY-SA-4.0",
|
||||||
|
description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
|
||||||
|
"the dataset as long as they give credit to the copyright holder and distribute any additions, "
|
||||||
|
"transformations or changes to the dataset under this same license.",
|
||||||
|
licence="https://creativecommons.org/licenses/by-sa/4.0/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Creative Commons Attribution-ShareAlike 4.0 International <https://creativecommons.org/licenses/by-sa/4.0/>`_: This
|
||||||
|
license is one of the open Creative Commons licenses and allows users to share and adapt the dataset as long as they
|
||||||
|
give credit to the copyright holder and distribute any additions, transformations or changes to the dataset under
|
||||||
|
this same license.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
ODC_ODbL = DatasetLicense(
|
||||||
|
name="Open Data Commons Open Database License",
|
||||||
|
identifier="ODbL-1.0",
|
||||||
|
description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
|
||||||
|
"dataset as long as they give credit to the copyright holder and distribute any additions, "
|
||||||
|
"transformation or changes to the dataset.",
|
||||||
|
licence="https://opendatacommons.org/licenses/odbl/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Open Data Commons Open Database License <https://opendatacommons.org/licenses/odbl/>`_: This license is one of the
|
||||||
|
Open Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
|
||||||
|
holder and distribute any additions, transformation or changes to the dataset.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CC_BY_NC = DatasetLicense(
|
||||||
|
name="Creative Commons Attribution-NonCommercial 4.0 International",
|
||||||
|
identifier="CC-BY-NC-4.0",
|
||||||
|
description="This license is one of the Creative Commons licenses and allows users to share and adapt the "
|
||||||
|
"dataset if they give credit to the copyright holder and do not use the dataset for any "
|
||||||
|
"commercial purposes.",
|
||||||
|
licence="https://creativecommons.org/licenses/by-nc/4.0/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Creative Commons Attribution-NonCommercial 4.0 International <https://creativecommons.org/licenses/by-nc/4.0/>`_: This
|
||||||
|
license is one of the Creative Commons licenses and allows users to share and adapt the dataset if they give credit to
|
||||||
|
the copyright holder and do not use the dataset for any commercial purposes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CC_BY_ND = DatasetLicense(
|
||||||
|
name="Creative Commons Attribution-NoDerivatives 4.0 International",
|
||||||
|
identifier="CC-BY-ND-4.0",
|
||||||
|
description="This license is one of the Creative Commons licenses and allows users to share the dataset if "
|
||||||
|
"they give credit to copyright holder, but they cannot make any additions, transformations or "
|
||||||
|
"changes to the dataset under this license.",
|
||||||
|
licence="https://creativecommons.org/licenses/by-nd/4.0/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Creative Commons Attribution-NoDerivatives 4.0 International <https://creativecommons.org/licenses/by-nd/4.0/>`_: This
|
||||||
|
license is one of the Creative Commons licenses and allows users to share the dataset if they give credit to copyright
|
||||||
|
holder, but they cannot make any additions, transformations or changes to the dataset under this license.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CC_BY_NC_SA = DatasetLicense(
|
||||||
|
name="Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International",
|
||||||
|
identifier="CC-BY-NC-SA-4.0",
|
||||||
|
description="This license is one of the Creative Commons licenses and allows users to share the dataset only "
|
||||||
|
"if they (1) give credit to the copyright holder, (2) do not use the dataset for any commercial "
|
||||||
|
"purposes, and (3) distribute any additions, transformations or changes to the dataset under this "
|
||||||
|
"same license.",
|
||||||
|
licence="https://creativecommons.org/licenses/by-nc-sa/4.0/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
|
||||||
|
<https://creativecommons.org/licenses/by-nc-sa/4.0/>`_: This license is one of the Creative Commons licenses and allows
|
||||||
|
users to share the dataset only if they (1) give credit to the copyright holder, (2) do not use the dataset for any
|
||||||
|
commercial purposes, and (3) distribute any additions, transformations or changes to the dataset under this same
|
||||||
|
license.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CC_BY_NC_ND = DatasetLicense(
|
||||||
|
name="Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International",
|
||||||
|
identifier="CC-BY-NC-ND-4.0",
|
||||||
|
description="This license is one of the Creative Commons licenses and allows users to use only your "
|
||||||
|
"unmodified dataset if they give credit to the copyright holder and do not share it for "
|
||||||
|
"commercial purposes. Users cannot make any additions, transformations or changes to the dataset"
|
||||||
|
"under this license.",
|
||||||
|
licence="https://creativecommons.org/licenses/by-nc-nd/4.0/",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
`Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International
|
||||||
|
<https://creativecommons.org/licenses/by-nc-nd/4.0/>`_: This license is one of the Creative Commons licenses and allows
|
||||||
|
users to use only your unmodified dataset if they give credit to the copyright holder and do not share it for
|
||||||
|
commercial purposes. Users cannot make any additions, transformations or changes to the dataset under this license.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
RESTRICTED = DatasetLicense(
|
||||||
|
name="Restricted (All Rights Reserved)",
|
||||||
|
identifier="Restricted",
|
||||||
|
description="All rights reserved. No permissions granted for use, modification, or distribution of the dataset.",
|
||||||
|
licence="Restricted (All Rights Reserved)",
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
Restricted (All Rights Reserved): No permissions granted for use, modification, or distribution of the dataset.
|
||||||
|
"""
|
13
src/ria_toolkit/adt/datasets/license/dataset_license.py
Normal file
13
src/ria_toolkit/adt/datasets/license/dataset_license.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DatasetLicense:
|
||||||
|
"""
|
||||||
|
Represents a dataset license.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str #: The name or title of the license.
|
||||||
|
identifier: str | None #: SPDX short identifier, or None if one does not exist.
|
||||||
|
description: str #: A description of the license.
|
||||||
|
licence: str #: Full license text or URL if the license is available online.
|
1081
src/ria_toolkit/adt/datasets/radio_dataset.py
Normal file
1081
src/ria_toolkit/adt/datasets/radio_dataset.py
Normal file
File diff suppressed because it is too large
Load Diff
57
src/ria_toolkit/adt/datasets/spect_dataset.py
Normal file
57
src/ria_toolkit/adt/datasets/spect_dataset.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from abc import ABC
|
||||||
|
|
||||||
|
from utils.data.datasets.radio_dataset import RadioDataset
|
||||||
|
|
||||||
|
|
||||||
|
class SpectDataset(RadioDataset, ABC):
|
||||||
|
"""A ``SpectDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
|
||||||
|
radiofrequency (RF) signals represented as spectrograms. This class is integrated with vision frameworks,
|
||||||
|
allowing you to leverage models and techniques from the field of computer vision for analyzing and processing
|
||||||
|
radio signal spectrograms.
|
||||||
|
|
||||||
|
For machine learning tasks that involve processing on IQ samples, please use
|
||||||
|
utils.data.datasets.IQDataset instead.
|
||||||
|
|
||||||
|
This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
|
||||||
|
should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
|
||||||
|
learning backends.
|
||||||
|
|
||||||
|
:param source: Path to the dataset source file. For more information on dataset source files
|
||||||
|
and their format, see :doc:`radio_datasets`.
|
||||||
|
:type source: str or os.PathLike
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, source: str | os.PathLike):
|
||||||
|
"""Create a new SpectDataset."""
|
||||||
|
super().__init__(source=source)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self) -> tuple[int]:
|
||||||
|
"""Spectrogram datasets are M x C x H x W, where M is the number of examples, C is the number of image
|
||||||
|
channels, H is the height of the spectrogram, and W is the width of the spectrogram.
|
||||||
|
|
||||||
|
:return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
|
||||||
|
dataset dimensions.
|
||||||
|
:type: tuple of ints
|
||||||
|
"""
|
||||||
|
return super().shape
|
||||||
|
|
||||||
|
def default_augmentations(self) -> list[callable]:
|
||||||
|
"""Returns the list of default augmentations for spectrogram datasets.
|
||||||
|
|
||||||
|
.. todo:: This method is not yet implemented.
|
||||||
|
|
||||||
|
:return: A list of default augmentations.
|
||||||
|
:rtype: list[callable]
|
||||||
|
"""
|
||||||
|
# Consider the following list of default augmentations:
|
||||||
|
# #. horizontal_flip
|
||||||
|
# #. vertical_flip
|
||||||
|
# #. sharpen
|
||||||
|
# #. darken
|
||||||
|
# #. lighten
|
||||||
|
# #. linear_rotate
|
||||||
|
raise NotImplementedError
|
317
src/ria_toolkit/adt/datasets/split.py
Normal file
317
src/ria_toolkit/adt/datasets/split.py
Normal file
|
@ -0,0 +1,317 @@
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
from collections import Counter
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy.random import Generator
|
||||||
|
|
||||||
|
from utils.data.datasets import RadioDataset
|
||||||
|
from utils.data.datasets.h5helpers import copy_over_example, make_empty_clone
|
||||||
|
|
||||||
|
|
||||||
|
def split(dataset: RadioDataset, lengths: list[int | float]) -> list[RadioDataset]:
|
||||||
|
"""Split a radio dataset into non-overlapping new datasets of given lengths.
|
||||||
|
|
||||||
|
Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
|
||||||
|
synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
|
||||||
|
longer-form tapes into shorter units called slices.
|
||||||
|
|
||||||
|
For each slice in the dataset, the metadata should include the unique ID of the recording from which the example
|
||||||
|
was cut ('rec_id'). To avoid leakage, all examples with the same 'rec_id' are assigned only to one of the new
|
||||||
|
datasets. This ensures, for example, that slices cut from the same recording do not appear in both the training
|
||||||
|
and test datasets.
|
||||||
|
|
||||||
|
This restriction makes it challenging to generate datasets with the exact lengths specified. To get as close as
|
||||||
|
possible, this method uses a greedy algorithm, which assigns the recordings with the most slices first, working
|
||||||
|
down to those with the fewest. This may not always provide a perfect split, but it works well in most practical
|
||||||
|
cases.
|
||||||
|
|
||||||
|
This function is deterministic, meaning it will always produce the same split. For a random split, see
|
||||||
|
utils.data.datasets.random_split.
|
||||||
|
|
||||||
|
:param dataset: Dataset to be split.
|
||||||
|
:type dataset: RadioDataset
|
||||||
|
:param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
|
||||||
|
sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
|
||||||
|
provided, and any remainders will be distributed in round-robin fashion.
|
||||||
|
:type lengths: list of ints (lengths) or floats (fractions)
|
||||||
|
|
||||||
|
:return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
|
||||||
|
'lengths' list.
|
||||||
|
:rtype: list of RadioDataset
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
>>> import random
|
||||||
|
>>> import string
|
||||||
|
>>> import numpy as np
|
||||||
|
>>> import pandas as pd
|
||||||
|
>>> from utils.data.datasets import split
|
||||||
|
|
||||||
|
First, let's generate some random data:
|
||||||
|
|
||||||
|
>>> shape = (24, 1, 1024) # 24 examples, each of length 1024
|
||||||
|
>>> real_part, imag_part = np.random.randint(0, 12, size=shape), np.random.randint(0, 79, size=shape)
|
||||||
|
>>> data = real_part + 1j * imag_part
|
||||||
|
|
||||||
|
Then, a list of recording IDs. Let's pretend this data was cut from 4 separate recordings:
|
||||||
|
|
||||||
|
>>> rec_id_options = [''.join(random.choices(string.ascii_lowercase + string.digits, k=256)) for _ in range(4)]
|
||||||
|
>>> rec_id = [np.random.choice(rec_id_options) for _ in range(shape[0])]
|
||||||
|
|
||||||
|
Using this data and metadata, let's initialize a dataset:
|
||||||
|
|
||||||
|
>>> metadata = pd.DataFrame(data={"rec_id": rec_id}).to_records(index=False)
|
||||||
|
>>> fid = os.path.join(os.getcwd(), "source_file.hdf5")
|
||||||
|
>>> ds = RadioDataset(source=fid)
|
||||||
|
|
||||||
|
Finally, let's do an 80/20 train-test split:
|
||||||
|
|
||||||
|
>>> train_ds, test_ds = split(ds, lengths=[0.8, 0.2])
|
||||||
|
"""
|
||||||
|
if not isinstance(dataset, RadioDataset):
|
||||||
|
raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
|
||||||
|
|
||||||
|
lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
|
||||||
|
|
||||||
|
if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
|
||||||
|
raise ValueError("Dataset missing string field 'rec_id'.")
|
||||||
|
|
||||||
|
rec_ids = dict(Counter(dataset.metadata["rec_id"]))
|
||||||
|
|
||||||
|
if len(rec_ids) < len(lengths_):
|
||||||
|
raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
|
||||||
|
|
||||||
|
# Sort the rec_ids in descending order by frequency.
|
||||||
|
ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
|
||||||
|
sorted_indices = np.flip(np.argsort(freqs))
|
||||||
|
sorted_rec_ids = [ids[x] for x in sorted_indices]
|
||||||
|
sorted_freqs = [freqs[x] for x in sorted_indices]
|
||||||
|
|
||||||
|
# Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
|
||||||
|
split_key_ids = [[] for _ in range(len(lengths_))]
|
||||||
|
split_key_freqs = [[] for _ in range(len(lengths_))]
|
||||||
|
|
||||||
|
for i in range(len(rec_ids)):
|
||||||
|
# Find the subset whose current length is farthest from its target length.
|
||||||
|
current_lengths = [sum(subkey) for subkey in split_key_freqs]
|
||||||
|
diffs = [lengths_[j] - current_lengths[j] for j in range(len(lengths_))]
|
||||||
|
index = np.argmax(diffs)
|
||||||
|
|
||||||
|
# Add the 'rec_id' with the highest frequency to the subset farthest from its target.
|
||||||
|
split_key_freqs[index].append(sorted_freqs[i])
|
||||||
|
split_key_ids[index].append(sorted_rec_ids[i])
|
||||||
|
|
||||||
|
_validate_sublists(list_of_lists=split_key_ids, ids=ids)
|
||||||
|
|
||||||
|
return _split_datasets(dataset=dataset, key=split_key_ids)
|
||||||
|
|
||||||
|
|
||||||
|
def random_split(
|
||||||
|
dataset: RadioDataset, lengths: list[int | float], generator: Optional[Generator] = None
|
||||||
|
) -> list[RadioDataset]:
|
||||||
|
"""Randomly split a radio dataset into non-overlapping new datasets of given lengths.
|
||||||
|
|
||||||
|
Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
|
||||||
|
synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
|
||||||
|
longer-form tapes into shorter units called slices.
|
||||||
|
|
||||||
|
For each slice in the dataset, the metadata should include the unique recording ID ('rec_id') of the recording
|
||||||
|
from which the example was cut. To avoid leakage, all examples with the same 'rec_id' are assigned only to one of
|
||||||
|
the new datasets. This ensures, for example, that slices cut from the same recording do not appear in both the
|
||||||
|
training and test datasets.
|
||||||
|
|
||||||
|
This restriction makes it unlikely that a random split will produce datasets with the exact lengths specified.
|
||||||
|
If it is important to ensure the closest possible split, consider using utils.data.datasets.split instead.
|
||||||
|
|
||||||
|
:param dataset: Dataset to be split.
|
||||||
|
:type dataset: RadioDataset
|
||||||
|
:param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
|
||||||
|
sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
|
||||||
|
provided, and any remainders will be distributed in round-robin fashion.
|
||||||
|
:type lengths: list of ints (lengths) or floats (fractions)
|
||||||
|
|
||||||
|
:param generator: Random generator. Defaults to None.
|
||||||
|
:type generator: NumPy Generator Object, optional.
|
||||||
|
|
||||||
|
:return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
|
||||||
|
'lengths' list.
|
||||||
|
:rtype: list of RadioDataset
|
||||||
|
|
||||||
|
See Also:
|
||||||
|
utils.data.datasets.split: Usage is the same as for ``random_split()``.
|
||||||
|
"""
|
||||||
|
if not isinstance(dataset, RadioDataset):
|
||||||
|
raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
|
||||||
|
|
||||||
|
lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
|
||||||
|
|
||||||
|
if generator is None:
|
||||||
|
rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
|
||||||
|
else:
|
||||||
|
rng = generator
|
||||||
|
|
||||||
|
if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
|
||||||
|
raise ValueError("Dataset missing string field 'rec_id'.")
|
||||||
|
|
||||||
|
rec_ids = dict(Counter(dataset.metadata["rec_id"]))
|
||||||
|
|
||||||
|
if len(rec_ids) < len(lengths_):
|
||||||
|
raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
|
||||||
|
|
||||||
|
ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
|
||||||
|
sorted_indices = np.flip(np.argsort(freqs))
|
||||||
|
sorted_rec_ids = [ids[x] for x in sorted_indices]
|
||||||
|
sorted_freqs = [freqs[x] for x in sorted_indices]
|
||||||
|
|
||||||
|
# Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
|
||||||
|
n = len(lengths_)
|
||||||
|
split_key_ids = [[] for _ in range(n)]
|
||||||
|
split_key_freqs = [[] for _ in range(n)]
|
||||||
|
|
||||||
|
# Taking from the bottom (least frequent), assign one recording to each subset. This is important to ensure we
|
||||||
|
# don't end up with any empty subsets, and serves to help randomize the results.
|
||||||
|
top_rec_ids, bottom_rec_ids = sorted_rec_ids[:-n], sorted_rec_ids[-n:]
|
||||||
|
top_freqs, bottom_freqs = sorted_freqs[:-n], sorted_freqs[-n:]
|
||||||
|
bottom_indices = rng.permutation(x=np.asarray(range(n)))
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
split_key_freqs[i].append(bottom_freqs[bottom_indices[i]])
|
||||||
|
split_key_ids[i].append(bottom_rec_ids[bottom_indices[i]])
|
||||||
|
|
||||||
|
for i in range(len(top_rec_ids)):
|
||||||
|
# Find the subset whose current length is farthest from its target length.
|
||||||
|
current_lengths = np.array([sum(subkey) for subkey in split_key_freqs])
|
||||||
|
diffs = np.array([lengths_[j] - current_lengths[j] for j in range(n)])
|
||||||
|
|
||||||
|
# Use the normalized diffs as probabilities. This results in a higher probability for larger diffs.
|
||||||
|
diffs = np.asarray([0 if d < 0 else d for d in diffs]) # Don't add to full or overfull subsets.
|
||||||
|
probabilities = diffs / sum(diffs)
|
||||||
|
|
||||||
|
index = rng.choice(range(n), p=probabilities)
|
||||||
|
|
||||||
|
# Add the 'rec_id' with the highest frequency to the chosen subset.
|
||||||
|
split_key_freqs[index].append(top_freqs[i])
|
||||||
|
split_key_ids[index].append(top_rec_ids[i])
|
||||||
|
|
||||||
|
_validate_sublists(list_of_lists=split_key_ids, ids=ids)
|
||||||
|
|
||||||
|
return _split_datasets(dataset=dataset, key=split_key_ids, generator=rng)
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_lengths(dataset: RadioDataset, lengths: list[int | float]) -> list[int]:
|
||||||
|
"""Validate lengths. If lengths are fractions of splits, lengths will be computed automatically.
|
||||||
|
|
||||||
|
:param dataset: Dataset to be split.
|
||||||
|
:type dataset: RadioDataset
|
||||||
|
:param: lengths: Lengths or fractions of splits to be produced.
|
||||||
|
:type lengths: list of ints (lengths) or floats (fractions)
|
||||||
|
|
||||||
|
:return: List of lengths to be produced.
|
||||||
|
:rtype: list of ints
|
||||||
|
"""
|
||||||
|
if not isinstance(lengths, list):
|
||||||
|
raise ValueError(f"'lengths' must be a list of ints or a list of floats, got {type(lengths)}.")
|
||||||
|
|
||||||
|
if len(lengths) < 2:
|
||||||
|
raise ValueError("'lengths' list must contain at least 2 elements.")
|
||||||
|
|
||||||
|
if not all(isinstance(sub, type(lengths[0])) for sub in lengths[1:]):
|
||||||
|
raise ValueError("All elements of 'lengths' must be of the same type.")
|
||||||
|
|
||||||
|
if sum(lengths) == len(dataset):
|
||||||
|
return [int(i) for i in lengths]
|
||||||
|
|
||||||
|
elif math.isclose(sum(lengths), 1, abs_tol=1e-9):
|
||||||
|
# Fractions of splits, which add to 1.
|
||||||
|
lengths_ = [math.floor(f * len(dataset)) for f in lengths]
|
||||||
|
|
||||||
|
# Distribute remainders in round-robin fashion to the lengths until there are no remainders left.
|
||||||
|
i = 0
|
||||||
|
while len(dataset) > sum(lengths_):
|
||||||
|
lengths_[i] = lengths_[i] + 1
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
return lengths_
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("'lengths' must sum to either the length of 'dataset' or 1.")
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_sublists(list_of_lists: list[list[str]], ids: list[str]) -> None:
|
||||||
|
"""Ensure that each ID is present in one and only one sublist."""
|
||||||
|
all_elements = [item for sublist in list_of_lists for item in sublist]
|
||||||
|
|
||||||
|
assert len(all_elements) == len(set(all_elements)) and list(set(ids)).sort() == list(set(all_elements)).sort()
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_split_source_filenames(
|
||||||
|
parent_dataset: RadioDataset, n_new_datasets: int, generator: Generator
|
||||||
|
) -> list[str]:
|
||||||
|
"""Generate source filenames for each new dataset.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
.../file_name.hdf5 -> [
|
||||||
|
.../file_name.split66ce07f-0.hdf5,
|
||||||
|
.../file_name.split66ce07f-1.hdf5,
|
||||||
|
.../file_name.split66ce07f-2.hdf5
|
||||||
|
]
|
||||||
|
|
||||||
|
.../file_name.002.hdf5 -> [
|
||||||
|
.../file_name.002.split156afd7-0.hdf5,
|
||||||
|
.../file_name.002.split156afd7-1.hdf5,
|
||||||
|
.../file_name.002.split156afd7-2.hdf5
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
parent_file_name = str(parent_dataset.source)
|
||||||
|
parent_base_name = os.path.splitext(parent_file_name)[0]
|
||||||
|
|
||||||
|
random_tag = generator.bytes(length=4).hex()[:7]
|
||||||
|
|
||||||
|
return [f"{parent_base_name}.split{random_tag}-{i}.hdf5" for i in range(n_new_datasets)]
|
||||||
|
|
||||||
|
|
||||||
|
def _split_datasets(
|
||||||
|
dataset: RadioDataset, key: list[list[str]], generator: Optional[Generator] = None
|
||||||
|
) -> list[RadioDataset]:
|
||||||
|
"""Once we know how we'd like to split up the dataset (i.e., which slices are to be included in which new
|
||||||
|
dataset), this helper function does the actual split.
|
||||||
|
|
||||||
|
:param dataset: Dataset to be split.
|
||||||
|
:type dataset: RadioDataset
|
||||||
|
:param key: A key indicating which slices are to be included in which dataset. This is a list of lists, where
|
||||||
|
each sublist contains the recordings IDs of the slices to be included in the corresponding subset.
|
||||||
|
:type key: A list of lists
|
||||||
|
|
||||||
|
:param generator: Random generator. Defaults to None.
|
||||||
|
:type generator: NumPy Generator Object, optional.
|
||||||
|
|
||||||
|
:return: Non-overlapping datasets
|
||||||
|
:rtype: list of RadioDataset
|
||||||
|
"""
|
||||||
|
if generator is None:
|
||||||
|
rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
|
||||||
|
else:
|
||||||
|
rng = generator
|
||||||
|
|
||||||
|
new_source_filenames = _generate_split_source_filenames(
|
||||||
|
parent_dataset=dataset, n_new_datasets=len(key), generator=rng
|
||||||
|
)
|
||||||
|
|
||||||
|
for new_source in new_source_filenames:
|
||||||
|
make_empty_clone(original_source=dataset.source, new_source=new_source, example_length=len(dataset.data[0, 0]))
|
||||||
|
|
||||||
|
new_datasets = [dataset.__class__(source=new_source) for new_source in new_source_filenames]
|
||||||
|
|
||||||
|
rec_ids = list(dataset.metadata["rec_id"])
|
||||||
|
|
||||||
|
for i, sublist in enumerate(key):
|
||||||
|
for rec_id in sublist:
|
||||||
|
# The examples at these indices are part of the corresponding new dataset.
|
||||||
|
indices = [index for index, value in enumerate(rec_ids) if value == rec_id]
|
||||||
|
for idx in indices:
|
||||||
|
copy_over_example(source=dataset.source, destination=new_datasets[i].source, idx=idx)
|
||||||
|
|
||||||
|
return new_datasets
|
763
src/ria_toolkit/adt/recording.py
Normal file
763
src/ria_toolkit/adt/recording.py
Normal file
|
@ -0,0 +1,763 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import warnings
|
||||||
|
from typing import Any, Iterator, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy.typing import ArrayLike
|
||||||
|
from quantiphy import Quantity
|
||||||
|
|
||||||
|
from utils.data.annotation import Annotation
|
||||||
|
|
||||||
|
PROTECTED_KEYS = ["rec_id", "timestamp"]
|
||||||
|
|
||||||
|
|
||||||
|
class Recording:
|
||||||
|
"""Tape of complex IQ (in-phase and quadrature) samples with associated metadata and annotations.
|
||||||
|
|
||||||
|
Recording data is a complex array of shape C x N, where C is the number of channels
|
||||||
|
and N is the number of samples in each channel.
|
||||||
|
|
||||||
|
Metadata is stored in a dictionary of key value pairs,
|
||||||
|
to include information such as sample_rate and center_frequency.
|
||||||
|
|
||||||
|
Annotations are a list of :ref:`Annotation <utils.data.Annotation>`,
|
||||||
|
defining bounding boxes in time and frequency with labels and metadata.
|
||||||
|
|
||||||
|
Here, signal data is represented as a NumPy array. This class is then extended in the RIA Backends to provide
|
||||||
|
support for different data structures, such as Tensors.
|
||||||
|
|
||||||
|
Recordings are long-form tapes can be obtained either from a software-defined radio (SDR) or generated
|
||||||
|
synthetically. Then, machine learning datasets are curated from collection of recordings by segmenting these
|
||||||
|
longer-form tapes into shorter units called slices.
|
||||||
|
|
||||||
|
All recordings are assigned a unique 64-character recording ID, ``rec_id``. If this field is missing from the
|
||||||
|
provided metadata, a new ID will be generated upon object instantiation.
|
||||||
|
|
||||||
|
:param data: Signal data as a tape IQ samples, either C x N complex, where C is the number of
|
||||||
|
channels and N is number of samples in the signal. If data is a one-dimensional array of complex samples with
|
||||||
|
length N, it will be reshaped to a two-dimensional array with dimensions 1 x N.
|
||||||
|
:type data: array_like
|
||||||
|
|
||||||
|
:param metadata: Additional information associated with the recording.
|
||||||
|
:type metadata: dict, optional
|
||||||
|
:param annotations: A collection of ``Annotation`` objects defining bounding boxes.
|
||||||
|
:type annotations: list of Annotations, optional
|
||||||
|
|
||||||
|
:param dtype: Explicitly specify the data-type of the complex samples. Must be a complex NumPy type, such as
|
||||||
|
``np.complex64`` or ``np.complex128``. Default is None, in which case the type is determined implicitly. If
|
||||||
|
``data`` is a NumPy array, the Recording will use the dtype of ``data`` directly without any conversion.
|
||||||
|
:type dtype: numpy dtype object, optional
|
||||||
|
:param timestamp: The timestamp when the recording data was generated. If provided, it should be a float or integer
|
||||||
|
representing the time in seconds since epoch (e.g., ``time.time()``). Only used if the `timestamp` field is not
|
||||||
|
present in the provided metadata.
|
||||||
|
:type dtype: float or int, optional
|
||||||
|
|
||||||
|
:raises ValueError: If data is not complex 1xN or CxN.
|
||||||
|
:raises ValueError: If metadata is not a python dict.
|
||||||
|
:raises ValueError: If metadata is not json serializable.
|
||||||
|
:raises ValueError: If annotations is not a list of valid annotation objects.
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording, Annotation
|
||||||
|
|
||||||
|
>>> # Create an array of complex samples, just 1s in this case.
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
|
||||||
|
>>> # Create a dictionary of relevant metadata.
|
||||||
|
>>> sample_rate = 1e6
|
||||||
|
>>> center_frequency = 2.44e9
|
||||||
|
>>> metadata = {
|
||||||
|
... "sample_rate": sample_rate,
|
||||||
|
... "center_frequency": center_frequency,
|
||||||
|
... "author": "me",
|
||||||
|
... }
|
||||||
|
|
||||||
|
>>> # Create an annotation for the annotations list.
|
||||||
|
>>> annotations = [
|
||||||
|
... Annotation(
|
||||||
|
... sample_start=0,
|
||||||
|
... sample_count=1000,
|
||||||
|
... freq_lower_edge=center_frequency - (sample_rate / 2),
|
||||||
|
... freq_upper_edge=center_frequency + (sample_rate / 2),
|
||||||
|
... label="example",
|
||||||
|
... )
|
||||||
|
... ]
|
||||||
|
|
||||||
|
>>> # Store samples, metadata, and annotations together in a convenient object.
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata, annotations=annotations)
|
||||||
|
>>> print(recording.metadata)
|
||||||
|
{'sample_rate': 1000000.0, 'center_frequency': 2440000000.0, 'author': 'me'}
|
||||||
|
>>> print(recording.annotations[0].label)
|
||||||
|
'example'
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__( # noqa C901
|
||||||
|
self,
|
||||||
|
data: ArrayLike | list[list],
|
||||||
|
metadata: Optional[dict[str, any]] = None,
|
||||||
|
dtype: Optional[np.dtype] = None,
|
||||||
|
timestamp: Optional[float | int] = None,
|
||||||
|
annotations: Optional[list[Annotation]] = None,
|
||||||
|
):
|
||||||
|
|
||||||
|
data_arr = np.asarray(data)
|
||||||
|
|
||||||
|
if np.iscomplexobj(data_arr):
|
||||||
|
# Expect C x N
|
||||||
|
if data_arr.ndim == 1:
|
||||||
|
self._data = np.expand_dims(data_arr, axis=0) # N -> 1 x N
|
||||||
|
elif data_arr.ndim == 2:
|
||||||
|
self._data = data_arr
|
||||||
|
else:
|
||||||
|
raise ValueError("Complex data must be C x N.")
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("Input data must be complex.")
|
||||||
|
|
||||||
|
if dtype is not None:
|
||||||
|
self._data = self._data.astype(dtype)
|
||||||
|
|
||||||
|
assert np.iscomplexobj(self._data)
|
||||||
|
|
||||||
|
if metadata is None:
|
||||||
|
self._metadata = {}
|
||||||
|
elif isinstance(metadata, dict):
|
||||||
|
self._metadata = metadata
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Metadata must be a python dict, but was {type(metadata)}.")
|
||||||
|
|
||||||
|
if not _is_jsonable(metadata):
|
||||||
|
raise ValueError("Value must be JSON serializable.")
|
||||||
|
|
||||||
|
if "timestamp" not in self.metadata:
|
||||||
|
if timestamp is not None:
|
||||||
|
if not isinstance(timestamp, (int, float)):
|
||||||
|
raise ValueError(f"timestamp must be int or float, not {type(timestamp)}")
|
||||||
|
self._metadata["timestamp"] = timestamp
|
||||||
|
else:
|
||||||
|
self._metadata["timestamp"] = time.time()
|
||||||
|
else:
|
||||||
|
if not isinstance(self._metadata["timestamp"], (int, float)):
|
||||||
|
raise ValueError("timestamp must be int or float, not ", type(self._metadata["timestamp"]))
|
||||||
|
|
||||||
|
if "rec_id" not in self.metadata:
|
||||||
|
self._metadata["rec_id"] = generate_recording_id(data=self.data, timestamp=self._metadata["timestamp"])
|
||||||
|
|
||||||
|
if annotations is None:
|
||||||
|
self._annotations = []
|
||||||
|
elif isinstance(annotations, list):
|
||||||
|
self._annotations = annotations
|
||||||
|
else:
|
||||||
|
raise ValueError("Annotations must be a list or None.")
|
||||||
|
|
||||||
|
if not all(isinstance(annotation, Annotation) for annotation in self._annotations):
|
||||||
|
raise ValueError("All elements in self._annotations must be of type Annotation.")
|
||||||
|
|
||||||
|
self._index = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data(self) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
:return: Recording data, as a complex array.
|
||||||
|
:type: np.ndarray
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
For recordings with more than 1,024 samples, this property returns a read-only view of the data.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
To access specific samples, consider indexing the object directly with ``rec[c, n]``.
|
||||||
|
"""
|
||||||
|
if self._data.size > 1024:
|
||||||
|
# Returning a read-only view prevents mutation at a distance while maintaining performance.
|
||||||
|
v = self._data.view()
|
||||||
|
v.setflags(write=False)
|
||||||
|
return v
|
||||||
|
else:
|
||||||
|
return self._data.copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def metadata(self) -> dict:
|
||||||
|
"""
|
||||||
|
:return: Dictionary of recording metadata.
|
||||||
|
:type: dict
|
||||||
|
"""
|
||||||
|
return self._metadata.copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def annotations(self) -> list[Annotation]:
|
||||||
|
"""
|
||||||
|
:return: List of recording annotations
|
||||||
|
:type: list of Annotation objects
|
||||||
|
"""
|
||||||
|
return self._annotations.copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self) -> tuple[int]:
|
||||||
|
"""
|
||||||
|
:return: The shape of the data array.
|
||||||
|
:type: tuple of ints
|
||||||
|
"""
|
||||||
|
return np.shape(self.data)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def n_chan(self) -> int:
|
||||||
|
"""
|
||||||
|
:return: The number of channels in the recording.
|
||||||
|
:type: int
|
||||||
|
"""
|
||||||
|
return self.shape[0]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rec_id(self) -> str:
|
||||||
|
"""
|
||||||
|
:return: Recording ID.
|
||||||
|
:type: str
|
||||||
|
"""
|
||||||
|
return self.metadata["rec_id"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dtype(self) -> str:
|
||||||
|
"""
|
||||||
|
:return: Data-type of the data array's elements.
|
||||||
|
:type: numpy dtype object
|
||||||
|
"""
|
||||||
|
return self.data.dtype
|
||||||
|
|
||||||
|
@property
|
||||||
|
def timestamp(self) -> float | int:
|
||||||
|
"""
|
||||||
|
:return: Recording timestamp (time in seconds since epoch).
|
||||||
|
:type: float or int
|
||||||
|
"""
|
||||||
|
return self.metadata["timestamp"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sample_rate(self) -> float | None:
|
||||||
|
"""
|
||||||
|
:return: Sample rate of the recording, or None is 'sample_rate' is not in metadata.
|
||||||
|
:type: str
|
||||||
|
"""
|
||||||
|
return self.metadata.get("sample_rate")
|
||||||
|
|
||||||
|
@sample_rate.setter
|
||||||
|
def sample_rate(self, sample_rate: float | int) -> None:
|
||||||
|
"""Set the sample rate of the recording.
|
||||||
|
|
||||||
|
:param sample_rate: The sample rate of the recording.
|
||||||
|
:type sample_rate: float or int
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
self.add_to_metadata(key="sample_rate", value=sample_rate)
|
||||||
|
|
||||||
|
def astype(self, dtype: np.dtype) -> Recording:
|
||||||
|
"""Copy of the recording, data cast to a specified type.
|
||||||
|
|
||||||
|
.. todo: This method is not yet implemented.
|
||||||
|
|
||||||
|
:param dtype: Data-type to which the array is cast. Must be a complex scalar type, such as ``np.complex64`` or
|
||||||
|
``np.complex128``.
|
||||||
|
:type dtype: NumPy data type, optional
|
||||||
|
|
||||||
|
.. note: Casting to a data type with less precision can risk losing data by truncating or rounding values,
|
||||||
|
potentially resulting in a loss of accuracy and significant information.
|
||||||
|
|
||||||
|
:return: A new recording with the same metadata and data, with dtype.
|
||||||
|
|
||||||
|
TODO: Add example usage.
|
||||||
|
"""
|
||||||
|
# Rather than check for a valid datatype, let's cast and check the result. This makes it easier to provide
|
||||||
|
# cross-platform support where the types are aliased across platforms.
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter("ignore") # Casting may generate user warnings. E.g., complex -> real
|
||||||
|
data = self.data.astype(dtype)
|
||||||
|
|
||||||
|
if np.iscomplexobj(data):
|
||||||
|
return Recording(data=data, metadata=self.metadata, annotations=self.annotations)
|
||||||
|
else:
|
||||||
|
raise ValueError("dtype must be a complex number scalar type.")
|
||||||
|
|
||||||
|
def add_to_metadata(self, key: str, value: Any) -> None:
|
||||||
|
"""Add a new key-value pair to the recording metadata.
|
||||||
|
|
||||||
|
:param key: New metadata key, must be snake_case.
|
||||||
|
:type key: str
|
||||||
|
:param value: Corresponding metadata value.
|
||||||
|
:type value: any
|
||||||
|
|
||||||
|
:raises ValueError: If key is already in metadata or if key is not a valid metadata key.
|
||||||
|
:raises ValueError: If value is not JSON serializable.
|
||||||
|
|
||||||
|
:return: None.
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording and add metadata:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
>>>
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
>>> metadata = {
|
||||||
|
>>> "sample_rate": 1e6,
|
||||||
|
>>> "center_frequency": 2.44e9,
|
||||||
|
>>> }
|
||||||
|
>>>
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> print(recording.metadata)
|
||||||
|
{'sample_rate': 1000000.0,
|
||||||
|
'center_frequency': 2440000000.0,
|
||||||
|
'timestamp': 17369...,
|
||||||
|
'rec_id': 'fda0f41...'}
|
||||||
|
>>>
|
||||||
|
>>> recording.add_to_metadata(key="author", value="me")
|
||||||
|
>>> print(recording.metadata)
|
||||||
|
{'sample_rate': 1000000.0,
|
||||||
|
'center_frequency': 2440000000.0,
|
||||||
|
'author': 'me',
|
||||||
|
'timestamp': 17369...,
|
||||||
|
'rec_id': 'fda0f41...'}
|
||||||
|
"""
|
||||||
|
if key in self.metadata:
|
||||||
|
raise ValueError(
|
||||||
|
f"Key {key} already in metadata. Use Recording.update_metadata() to modify existing fields."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not _is_valid_metadata_key(key):
|
||||||
|
raise ValueError(f"Invalid metadata key: {key}.")
|
||||||
|
|
||||||
|
if not _is_jsonable(value):
|
||||||
|
raise ValueError("Value must be JSON serializable.")
|
||||||
|
|
||||||
|
self._metadata[key] = value
|
||||||
|
|
||||||
|
def update_metadata(self, key: str, value: Any) -> None:
|
||||||
|
"""Update the value of an existing metadata key,
|
||||||
|
or add the key value pair if it does not already exist.
|
||||||
|
|
||||||
|
:param key: Existing metadata key.
|
||||||
|
:type key: str
|
||||||
|
:param value: New value to enter at key.
|
||||||
|
:type value: any
|
||||||
|
|
||||||
|
:raises ValueError: If value is not JSON serializable
|
||||||
|
:raises ValueError: If key is protected.
|
||||||
|
|
||||||
|
:return: None.
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording and update metadata:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
>>> metadata = {
|
||||||
|
>>> "sample_rate": 1e6,
|
||||||
|
>>> "center_frequency": 2.44e9,
|
||||||
|
>>> "author": "me"
|
||||||
|
>>> }
|
||||||
|
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> print(recording.metadata)
|
||||||
|
{'sample_rate': 1000000.0,
|
||||||
|
'center_frequency': 2440000000.0,
|
||||||
|
'author': "me",
|
||||||
|
'timestamp': 17369...
|
||||||
|
'rec_id': 'fda0f41...'}
|
||||||
|
|
||||||
|
>>> recording.update_metadata(key="author", value=you")
|
||||||
|
>>> print(recording.metadata)
|
||||||
|
{'sample_rate': 1000000.0,
|
||||||
|
'center_frequency': 2440000000.0,
|
||||||
|
'author': "you",
|
||||||
|
'timestamp': 17369...
|
||||||
|
'rec_id': 'fda0f41...'}
|
||||||
|
"""
|
||||||
|
if key not in self.metadata:
|
||||||
|
self.add_to_metadata(key=key, value=value)
|
||||||
|
|
||||||
|
if not _is_jsonable(value):
|
||||||
|
raise ValueError("Value must be JSON serializable.")
|
||||||
|
|
||||||
|
if key in PROTECTED_KEYS: # Check protected keys.
|
||||||
|
raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
|
||||||
|
|
||||||
|
else:
|
||||||
|
self._metadata[key] = value
|
||||||
|
|
||||||
|
def remove_from_metadata(self, key: str):
|
||||||
|
"""
|
||||||
|
Remove a key from the recording metadata.
|
||||||
|
Does not remove key if it is protected.
|
||||||
|
|
||||||
|
:param key: The key to remove.
|
||||||
|
:type key: str
|
||||||
|
|
||||||
|
:raises ValueError: If key is protected.
|
||||||
|
|
||||||
|
:return: None.
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording and add metadata:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
>>> metadata = {
|
||||||
|
... "sample_rate": 1e6,
|
||||||
|
... "center_frequency": 2.44e9,
|
||||||
|
... }
|
||||||
|
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> print(recording.metadata)
|
||||||
|
{'sample_rate': 1000000.0,
|
||||||
|
'center_frequency': 2440000000.0,
|
||||||
|
'timestamp': 17369..., # Example value
|
||||||
|
'rec_id': 'fda0f41...'} # Example value
|
||||||
|
|
||||||
|
>>> recording.add_to_metadata(key="author", value="me")
|
||||||
|
>>> print(recording.metadata)
|
||||||
|
{'sample_rate': 1000000.0,
|
||||||
|
'center_frequency': 2440000000.0,
|
||||||
|
'author': 'me',
|
||||||
|
'timestamp': 17369..., # Example value
|
||||||
|
'rec_id': 'fda0f41...'} # Example value
|
||||||
|
"""
|
||||||
|
if key not in PROTECTED_KEYS:
|
||||||
|
self._metadata.pop(key)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
|
||||||
|
|
||||||
|
def view(self, output_path: Optional[str] = "images/signal.png", **kwargs) -> None:
|
||||||
|
"""Create a plot of various signal visualizations as a PNG image.
|
||||||
|
|
||||||
|
:param output_path: The output image path. Defaults to "images/signal.png".
|
||||||
|
:type output_path: str, optional
|
||||||
|
:param kwargs: Keyword arguments passed on to utils.view.view_sig.
|
||||||
|
:type: dict of keyword arguments
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording and view it as a plot in a .png image:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
>>> metadata = {
|
||||||
|
>>> "sample_rate": 1e6,
|
||||||
|
>>> "center_frequency": 2.44e9,
|
||||||
|
>>> }
|
||||||
|
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> recording.view()
|
||||||
|
"""
|
||||||
|
from utils.view import view_sig
|
||||||
|
|
||||||
|
view_sig(recording=self, output_path=output_path, **kwargs)
|
||||||
|
|
||||||
|
def to_sigmf(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
|
||||||
|
"""Write recording to a set of SigMF files.
|
||||||
|
|
||||||
|
The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
|
||||||
|
|
||||||
|
:param recording: The recording to be written to file.
|
||||||
|
:type recording: utils.data.Recording
|
||||||
|
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
|
||||||
|
:type filename: os.PathLike or str, optional
|
||||||
|
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
|
||||||
|
:type path: os.PathLike or str, optional
|
||||||
|
|
||||||
|
:raises IOError: If there is an issue encountered during the file writing process.
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording and view it as a plot in a `.png` image:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
>>> metadata = {
|
||||||
|
... "sample_rate": 1e6,
|
||||||
|
... "center_frequency": 2.44e9,
|
||||||
|
... }
|
||||||
|
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> recording.view()
|
||||||
|
"""
|
||||||
|
from utils.io.recording import to_sigmf
|
||||||
|
|
||||||
|
to_sigmf(filename=filename, path=path, recording=self)
|
||||||
|
|
||||||
|
def to_npy(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
|
||||||
|
"""Write recording to ``.npy`` binary file.
|
||||||
|
|
||||||
|
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
|
||||||
|
:type filename: os.PathLike or str, optional
|
||||||
|
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
|
||||||
|
:type path: os.PathLike or str, optional
|
||||||
|
|
||||||
|
:raises IOError: If there is an issue encountered during the file writing process.
|
||||||
|
|
||||||
|
:return: Path where the file was saved.
|
||||||
|
:rtype: str
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording and save it to a .npy file:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
>>> metadata = {
|
||||||
|
>>> "sample_rate": 1e6,
|
||||||
|
>>> "center_frequency": 2.44e9,
|
||||||
|
>>> }
|
||||||
|
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> recording.to_npy()
|
||||||
|
"""
|
||||||
|
from utils.io.recording import to_npy
|
||||||
|
|
||||||
|
to_npy(recording=self, filename=filename, path=path)
|
||||||
|
|
||||||
|
def trim(self, num_samples: int, start_sample: Optional[int] = 0) -> Recording:
|
||||||
|
"""Trim Recording samples to a desired length, shifting annotations to maintain alignment.
|
||||||
|
|
||||||
|
:param start_sample: The start index of the desired trimmed recording. Defaults to 0.
|
||||||
|
:type start_sample: int, optional
|
||||||
|
:param num_samples: The number of samples that the output trimmed recording will have.
|
||||||
|
:type num_samples: int
|
||||||
|
:raises IndexError: If start_sample + num_samples is greater than the length of the recording.
|
||||||
|
:raises IndexError: If sample_start < 0 or num_samples < 0.
|
||||||
|
|
||||||
|
:return: The trimmed Recording.
|
||||||
|
:rtype: Recording
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording and trim it:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
|
||||||
|
>>> metadata = {
|
||||||
|
... "sample_rate": 1e6,
|
||||||
|
... "center_frequency": 2.44e9,
|
||||||
|
... }
|
||||||
|
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> print(len(recording))
|
||||||
|
10000
|
||||||
|
|
||||||
|
>>> trimmed_recording = recording.trim(start_sample=1000, num_samples=1000)
|
||||||
|
>>> print(len(trimmed_recording))
|
||||||
|
1000
|
||||||
|
"""
|
||||||
|
|
||||||
|
if start_sample < 0:
|
||||||
|
raise IndexError("start_sample cannot be < 0.")
|
||||||
|
elif start_sample + num_samples > len(self):
|
||||||
|
raise IndexError(
|
||||||
|
f"start_sample {start_sample} + num_samples {num_samples} > recording length {len(self)}."
|
||||||
|
)
|
||||||
|
|
||||||
|
end_sample = start_sample + num_samples
|
||||||
|
|
||||||
|
data = self.data[:, start_sample:end_sample]
|
||||||
|
|
||||||
|
new_annotations = copy.deepcopy(self.annotations)
|
||||||
|
for annotation in new_annotations:
|
||||||
|
# trim annotation if it goes outside the trim boundaries
|
||||||
|
if annotation.sample_start < start_sample:
|
||||||
|
annotation.sample_count = annotation.sample_count - (start_sample - annotation.sample_start)
|
||||||
|
annotation.sample_start = start_sample
|
||||||
|
|
||||||
|
if annotation.sample_start + annotation.sample_count > end_sample:
|
||||||
|
annotation.sample_count = end_sample - annotation.sample_start
|
||||||
|
|
||||||
|
# shift annotation to align with the new start point
|
||||||
|
annotation.sample_start = annotation.sample_start - start_sample
|
||||||
|
|
||||||
|
return Recording(data=data, metadata=self.metadata, annotations=new_annotations)
|
||||||
|
|
||||||
|
def normalize(self) -> Recording:
|
||||||
|
"""Scale the recording data, relative to its maximum value, so that the magnitude of the maximum sample is 1.
|
||||||
|
|
||||||
|
:return: Recording where the maximum sample amplitude is 1.
|
||||||
|
:rtype: Recording
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
Create a recording with maximum amplitude 0.5 and normalize to a maximum amplitude of 1:
|
||||||
|
|
||||||
|
>>> import numpy
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
|
||||||
|
>>> samples = numpy.ones(10000, dtype=numpy.complex64) * 0.5
|
||||||
|
>>> metadata = {
|
||||||
|
... "sample_rate": 1e6,
|
||||||
|
... "center_frequency": 2.44e9,
|
||||||
|
... }
|
||||||
|
|
||||||
|
>>> recording = Recording(data=samples, metadata=metadata)
|
||||||
|
>>> print(numpy.max(numpy.abs(recording.data)))
|
||||||
|
0.5
|
||||||
|
|
||||||
|
>>> normalized_recording = recording.normalize()
|
||||||
|
>>> print(numpy.max(numpy.abs(normalized_recording.data)))
|
||||||
|
1
|
||||||
|
"""
|
||||||
|
scaled_data = self.data / np.max(abs(self.data))
|
||||||
|
return Recording(data=scaled_data, metadata=self.metadata, annotations=self.annotations)
|
||||||
|
|
||||||
|
def generate_filename(self, tag: Optional[str] = "rec"):
|
||||||
|
"""Generate a filename from metadata.
|
||||||
|
|
||||||
|
:param tag: The string at the beginning of the generated filename. Default is "rec".
|
||||||
|
:type tag: str, optional
|
||||||
|
|
||||||
|
:return: A filename without an extension.
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
# TODO: This method should be refactored to use the first 7 characters of the 'rec_id' field.
|
||||||
|
|
||||||
|
tag = tag + "_"
|
||||||
|
source = self.metadata.get("source", "")
|
||||||
|
if source != "":
|
||||||
|
source = source + "_"
|
||||||
|
|
||||||
|
# converts 1000 to 1k for example
|
||||||
|
center_frequency = str(Quantity(self.metadata.get("center_frequency", 0)))
|
||||||
|
if center_frequency != "0":
|
||||||
|
num = center_frequency[:-1]
|
||||||
|
suffix = center_frequency[-1]
|
||||||
|
num = int(np.round(float(num)))
|
||||||
|
else:
|
||||||
|
num = 0
|
||||||
|
suffix = ""
|
||||||
|
center_frequency = str(num) + suffix + "Hz_"
|
||||||
|
|
||||||
|
timestamp = int(self.timestamp)
|
||||||
|
timestamp = datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d_%H-%M-%S") + "_"
|
||||||
|
|
||||||
|
# Add first seven characters of rec_id for uniqueness
|
||||||
|
rec_id = self.rec_id[0:7]
|
||||||
|
return tag + source + center_frequency + timestamp + rec_id
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""The length of a recording is defined by the number of complex samples in each channel of the recording."""
|
||||||
|
return self.shape[1]
|
||||||
|
|
||||||
|
def __eq__(self, other: Recording) -> bool:
|
||||||
|
"""Two Recordings are equal if all data, metadata, and annotations are the same."""
|
||||||
|
|
||||||
|
# counter used to allow for differently ordered annotation lists
|
||||||
|
return (
|
||||||
|
np.array_equal(self.data, other.data)
|
||||||
|
and self.metadata == other.metadata
|
||||||
|
and self.annotations == other.annotations
|
||||||
|
)
|
||||||
|
|
||||||
|
def __ne__(self, other: Recording) -> bool:
|
||||||
|
"""Two Recordings are equal if all data, and metadata, and annotations are the same."""
|
||||||
|
return not self.__eq__(other=other)
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator:
|
||||||
|
self._index = 0
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self) -> np.ndarray:
|
||||||
|
if self._index < self.n_chan:
|
||||||
|
to_ret = self.data[self._index]
|
||||||
|
self._index += 1
|
||||||
|
return to_ret
|
||||||
|
else:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
def __getitem__(self, key: int | tuple[int] | slice) -> np.ndarray | np.complexfloating:
|
||||||
|
"""If key is an integer, tuple of integers, or a slice, return the corresponding samples.
|
||||||
|
|
||||||
|
For arrays with 1,024 or fewer samples, return a copy of the recording data. For larger arrays, return a
|
||||||
|
read-only view. This prevents mutation at a distance while maintaining performance.
|
||||||
|
"""
|
||||||
|
if isinstance(key, (int, tuple, slice)):
|
||||||
|
v = self._data[key]
|
||||||
|
if isinstance(v, np.complexfloating):
|
||||||
|
return v
|
||||||
|
elif v.size > 1024:
|
||||||
|
v.setflags(write=False) # Make view read-only.
|
||||||
|
return v
|
||||||
|
else:
|
||||||
|
return v.copy()
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Key must be an integer, tuple, or slice but was {type(key)}.")
|
||||||
|
|
||||||
|
def __setitem__(self, *args, **kwargs) -> None:
|
||||||
|
"""Raise an error if an attempt is made to assign to the recording."""
|
||||||
|
raise ValueError("Assignment to Recording is not allowed.")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_recording_id(data: np.ndarray, timestamp: Optional[float | int] = None) -> str:
|
||||||
|
"""Generate unique 64-character recording ID. The recording ID is generated by hashing the recording data with
|
||||||
|
the datetime that the recording data was generated. If no datatime is provided, the current datatime is used.
|
||||||
|
|
||||||
|
:param data: Tape of IQ samples, as a NumPy array.
|
||||||
|
:type data: np.ndarray
|
||||||
|
:param timestamp: Unix timestamp in seconds. Defaults to None.
|
||||||
|
:type timestamp: float or int, optional
|
||||||
|
|
||||||
|
:return: 256-character hash, to be used as the recording ID.
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
if timestamp is None:
|
||||||
|
timestamp = time.time()
|
||||||
|
|
||||||
|
byte_sequence = data.tobytes() + str(timestamp).encode("utf-8")
|
||||||
|
sha256_hash = hashlib.sha256(byte_sequence)
|
||||||
|
|
||||||
|
return sha256_hash.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_jsonable(x: Any) -> bool:
|
||||||
|
"""
|
||||||
|
:return: True if x is JSON serializable, False otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
json.dumps(x)
|
||||||
|
return True
|
||||||
|
except (TypeError, OverflowError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _is_valid_metadata_key(key: Any) -> bool:
|
||||||
|
"""
|
||||||
|
:return: True if key is a valid metadata key, False otherwise.
|
||||||
|
"""
|
||||||
|
if isinstance(key, str) and key.islower() and re.match(pattern=r"^[a-z_]+$", string=key) is not None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
else:
|
||||||
|
return False
|
22
src/ria_toolkit/io/__init__.py
Normal file
22
src/ria_toolkit/io/__init__.py
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
"""
|
||||||
|
The IO package contains utilities for input and output operations, such as loading and saving recordings to and from
|
||||||
|
file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Common:
|
||||||
|
"exists",
|
||||||
|
"copy",
|
||||||
|
"move",
|
||||||
|
"validate",
|
||||||
|
# Recording:
|
||||||
|
"save_recording",
|
||||||
|
"load_recording",
|
||||||
|
"to_sigmf",
|
||||||
|
"from_sigmf",
|
||||||
|
"to_npy",
|
||||||
|
"from_npy",
|
||||||
|
]
|
||||||
|
|
||||||
|
from .common import copy, exists, move, validate
|
||||||
|
from .recording import from_npy, from_sigmf, load_recording, to_npy, to_sigmf
|
331
src/ria_toolkit/io/recording.py
Normal file
331
src/ria_toolkit/io/recording.py
Normal file
|
@ -0,0 +1,331 @@
|
||||||
|
"""
|
||||||
|
Utilities for input/output operations on the utils.data.Recording object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import datetime as dt
|
||||||
|
import os
|
||||||
|
from datetime import timezone
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import sigmf
|
||||||
|
from sigmf import SigMFFile, sigmffile
|
||||||
|
from sigmf.utils import get_data_type_str
|
||||||
|
|
||||||
|
from utils.data import Annotation
|
||||||
|
from utils.data.recording import Recording
|
||||||
|
|
||||||
|
|
||||||
|
def load_rec(file: os.PathLike) -> Recording:
|
||||||
|
"""Load a recording from file.
|
||||||
|
|
||||||
|
:param file: The directory path to the file(s) to load, **with** the file extension.
|
||||||
|
To loading from SigMF, the file extension must be one of *sigmf*, *sigmf-data*, or *sigmf-meta*,
|
||||||
|
either way both the SigMF data and meta files must be present for a successful read.
|
||||||
|
:type file: os.PathLike
|
||||||
|
|
||||||
|
:raises IOError: If there is an issue encountered during the file reading process.
|
||||||
|
|
||||||
|
:raises ValueError: If the inferred file extension is not supported.
|
||||||
|
|
||||||
|
:return: The recording, as initialized from file(s).
|
||||||
|
:rtype: utils.data.Recording
|
||||||
|
"""
|
||||||
|
_, extension = os.path.splitext(file)
|
||||||
|
extension = extension.lstrip(".")
|
||||||
|
|
||||||
|
if extension.lower() in ["sigmf", "sigmf-data", "sigmf-meta"]:
|
||||||
|
return from_sigmf(file=file)
|
||||||
|
|
||||||
|
elif extension.lower() == "npy":
|
||||||
|
return from_npy(file=file)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"File extension {extension} not supported.")
|
||||||
|
|
||||||
|
|
||||||
|
SIGMF_KEY_CONVERSION = {
|
||||||
|
SigMFFile.AUTHOR_KEY: "author",
|
||||||
|
SigMFFile.COLLECTION_KEY: "sigmf:collection",
|
||||||
|
SigMFFile.DATASET_KEY: "sigmf:dataset",
|
||||||
|
SigMFFile.DATATYPE_KEY: "datatype",
|
||||||
|
SigMFFile.DATA_DOI_KEY: "data_doi",
|
||||||
|
SigMFFile.DESCRIPTION_KEY: "description",
|
||||||
|
SigMFFile.EXTENSIONS_KEY: "sigmf:extensions",
|
||||||
|
SigMFFile.GEOLOCATION_KEY: "geolocation",
|
||||||
|
SigMFFile.HASH_KEY: "sigmf:hash",
|
||||||
|
SigMFFile.HW_KEY: "sdr",
|
||||||
|
SigMFFile.LICENSE_KEY: "license",
|
||||||
|
SigMFFile.META_DOI_KEY: "metadata",
|
||||||
|
SigMFFile.METADATA_ONLY_KEY: "sigmf:metadata_only",
|
||||||
|
SigMFFile.NUM_CHANNELS_KEY: "sigmf:num_channels",
|
||||||
|
SigMFFile.RECORDER_KEY: "source_software",
|
||||||
|
SigMFFile.SAMPLE_RATE_KEY: "sample_rate",
|
||||||
|
SigMFFile.START_OFFSET_KEY: "sigmf:start_offset",
|
||||||
|
SigMFFile.TRAILING_BYTES_KEY: "sigmf:trailing_bytes",
|
||||||
|
SigMFFile.VERSION_KEY: "sigmf:version",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_serializable(obj):
|
||||||
|
"""
|
||||||
|
Recursively convert a JSON-compatible structure into a fully JSON-serializable one.
|
||||||
|
Handles cases like NumPy data types, nested dicts, lists, and sets.
|
||||||
|
"""
|
||||||
|
if isinstance(obj, np.integer):
|
||||||
|
return int(obj) # Convert NumPy int to Python int
|
||||||
|
elif isinstance(obj, np.floating):
|
||||||
|
return float(obj) # Convert NumPy float to Python float
|
||||||
|
elif isinstance(obj, np.ndarray):
|
||||||
|
return obj.tolist() # Convert NumPy array to list
|
||||||
|
elif isinstance(obj, (list, tuple)):
|
||||||
|
return [convert_to_serializable(item) for item in obj] # Process list or tuple
|
||||||
|
elif isinstance(obj, dict):
|
||||||
|
return {key: convert_to_serializable(value) for key, value in obj.items()} # Process dict
|
||||||
|
elif isinstance(obj, set):
|
||||||
|
return list(obj) # Convert set to list
|
||||||
|
elif obj in [float("inf"), float("-inf"), None]: # Handle infinity or None
|
||||||
|
return None
|
||||||
|
elif isinstance(obj, (str, int, float, bool)) or obj is None:
|
||||||
|
return obj # Base case: already serializable
|
||||||
|
else:
|
||||||
|
raise TypeError(f"Value of type {type(obj)} is not JSON serializable: {obj}")
|
||||||
|
|
||||||
|
|
||||||
|
def to_sigmf(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
|
||||||
|
"""Write recording to a set of SigMF files.
|
||||||
|
|
||||||
|
The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
|
||||||
|
|
||||||
|
:param recording: The recording to be written to file.
|
||||||
|
:type recording: utils.data.Recording
|
||||||
|
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
|
||||||
|
:type filename: os.PathLike or str, optional
|
||||||
|
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
|
||||||
|
:type path: os.PathLike or str, optional
|
||||||
|
|
||||||
|
:raises IOError: If there is an issue encountered during the file writing process.
|
||||||
|
|
||||||
|
:return: None
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
>>> from utils.sdr import Synth
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
>>> from utils.io import to_sigmf
|
||||||
|
>>> sdr = Synth()
|
||||||
|
>>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
|
||||||
|
>>> to_sigmf(recording=rec, file="sample_recording")
|
||||||
|
"""
|
||||||
|
|
||||||
|
if filename is not None:
|
||||||
|
filename, _ = os.path.splitext(filename)
|
||||||
|
else:
|
||||||
|
filename = recording.generate_filename()
|
||||||
|
|
||||||
|
if path is None:
|
||||||
|
path = "recordings"
|
||||||
|
|
||||||
|
if not os.path.exists(path):
|
||||||
|
os.makedirs(path)
|
||||||
|
|
||||||
|
multichannel_samples = recording.data
|
||||||
|
metadata = recording.metadata
|
||||||
|
annotations = recording.annotations
|
||||||
|
|
||||||
|
if multichannel_samples.shape[0] > 1:
|
||||||
|
raise NotImplementedError("SigMF File Saving Not Implemented for Multichannel Recordings")
|
||||||
|
else:
|
||||||
|
# extract single channel
|
||||||
|
samples = multichannel_samples[0]
|
||||||
|
|
||||||
|
data_file_path = os.path.join(path, f"{filename}.sigmf-data")
|
||||||
|
|
||||||
|
samples.tofile(data_file_path)
|
||||||
|
global_info = {
|
||||||
|
SigMFFile.DATATYPE_KEY: get_data_type_str(samples),
|
||||||
|
SigMFFile.VERSION_KEY: sigmf.__version__,
|
||||||
|
SigMFFile.RECORDER_KEY: "RIA",
|
||||||
|
}
|
||||||
|
|
||||||
|
converted_metadata = {
|
||||||
|
sigmf_key: metadata[metadata_key]
|
||||||
|
for sigmf_key, metadata_key in SIGMF_KEY_CONVERSION.items()
|
||||||
|
if metadata_key in metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
# Merge dictionaries, giving priority to sigmf_meta
|
||||||
|
global_info = {**converted_metadata, **global_info}
|
||||||
|
|
||||||
|
ria_metadata = {f"ria:{key}": value for key, value in metadata.items()}
|
||||||
|
ria_metadata = convert_to_serializable(ria_metadata)
|
||||||
|
global_info.update(ria_metadata)
|
||||||
|
|
||||||
|
sigMF_metafile = SigMFFile(
|
||||||
|
data_file=data_file_path,
|
||||||
|
global_info=global_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
for annotation_object in annotations:
|
||||||
|
annotation_dict = annotation_object.to_sigmf_format()
|
||||||
|
annotation_dict = convert_to_serializable(annotation_dict)
|
||||||
|
sigMF_metafile.add_annotation(
|
||||||
|
start_index=annotation_dict[SigMFFile.START_INDEX_KEY],
|
||||||
|
length=annotation_dict[SigMFFile.LENGTH_INDEX_KEY],
|
||||||
|
metadata=annotation_dict["metadata"],
|
||||||
|
)
|
||||||
|
|
||||||
|
sigMF_metafile.add_capture(
|
||||||
|
0,
|
||||||
|
metadata={
|
||||||
|
SigMFFile.FREQUENCY_KEY: metadata.get("center_frequency", 0),
|
||||||
|
SigMFFile.DATETIME_KEY: dt.datetime.fromtimestamp(float(metadata.get("timestamp", 0)), tz=timezone.utc)
|
||||||
|
.isoformat()
|
||||||
|
.replace("+00:00", "Z"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
meta_dict = sigMF_metafile.ordered_metadata()
|
||||||
|
meta_dict["ria"] = metadata
|
||||||
|
|
||||||
|
sigMF_metafile.tofile(f"{os.path.join(path,filename)}.sigmf-meta")
|
||||||
|
|
||||||
|
|
||||||
|
def from_sigmf(file: os.PathLike | str) -> Recording:
|
||||||
|
"""Load a recording from a set of SigMF files.
|
||||||
|
|
||||||
|
:param file: The directory path to the SigMF recording files, without any file extension.
|
||||||
|
The recording will be initialized from ``file_name.sigmf-data`` and ``file_name.sigmf-meta``.
|
||||||
|
Both the data and meta files must be present for a successful read.
|
||||||
|
:type file: str or os.PathLike
|
||||||
|
|
||||||
|
:raises IOError: If there is an issue encountered during the file reading process.
|
||||||
|
|
||||||
|
:return: The recording, as initialized from the SigMF files.
|
||||||
|
:rtype: utils.data.Recording
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(file) > 11:
|
||||||
|
if file[-11:-5] != ".sigmf":
|
||||||
|
file = file + ".sigmf-data"
|
||||||
|
|
||||||
|
sigmf_file = sigmffile.fromfile(file)
|
||||||
|
|
||||||
|
data = sigmf_file.read_samples()
|
||||||
|
global_metadata = sigmf_file.get_global_info()
|
||||||
|
dict_annotations = sigmf_file.get_annotations()
|
||||||
|
|
||||||
|
processed_metadata = {}
|
||||||
|
for key, value in global_metadata.items():
|
||||||
|
# Process core keys
|
||||||
|
if key.startswith("core:"):
|
||||||
|
base_key = key[5:] # Remove 'core:' prefix
|
||||||
|
converted_key = SIGMF_KEY_CONVERSION.get(base_key, base_key)
|
||||||
|
# Process ria keys
|
||||||
|
elif key.startswith("ria:"):
|
||||||
|
converted_key = key[4:] # Remove 'ria:' prefix
|
||||||
|
else:
|
||||||
|
# Load non-core/ria keys as is
|
||||||
|
converted_key = key
|
||||||
|
|
||||||
|
processed_metadata[converted_key] = value
|
||||||
|
|
||||||
|
annotations = []
|
||||||
|
|
||||||
|
for dict in dict_annotations:
|
||||||
|
annotations.append(
|
||||||
|
Annotation(
|
||||||
|
sample_start=dict[SigMFFile.START_INDEX_KEY],
|
||||||
|
sample_count=dict[SigMFFile.LENGTH_INDEX_KEY],
|
||||||
|
freq_lower_edge=dict.get(SigMFFile.FLO_KEY, None),
|
||||||
|
freq_upper_edge=dict.get(SigMFFile.FHI_KEY, None),
|
||||||
|
label=dict.get(SigMFFile.LABEL_KEY, None),
|
||||||
|
comment=dict.get(SigMFFile.COMMENT_KEY, None),
|
||||||
|
detail=dict.get("ria:detail", None),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
output_recording = Recording(data=data, metadata=processed_metadata, annotations=annotations)
|
||||||
|
return output_recording
|
||||||
|
|
||||||
|
|
||||||
|
def to_npy(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
|
||||||
|
"""Write recording to ``.npy`` binary file.
|
||||||
|
|
||||||
|
:param recording: The recording to be written to file.
|
||||||
|
:type recording: utils.data.Recording
|
||||||
|
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
|
||||||
|
:type filename: os.PathLike or str, optional
|
||||||
|
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
|
||||||
|
:type path: os.PathLike or str, optional
|
||||||
|
|
||||||
|
:raises IOError: If there is an issue encountered during the file writing process.
|
||||||
|
|
||||||
|
:return: Path where the file was saved.
|
||||||
|
:rtype: str
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
>>> from utils.sdr import Synth
|
||||||
|
>>> from utils.data import Recording
|
||||||
|
>>> from utils.io import to_npy
|
||||||
|
>>> sdr = Synth()
|
||||||
|
>>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
|
||||||
|
>>> to_npy(recording=rec, file="sample_recording.npy")
|
||||||
|
"""
|
||||||
|
if filename is not None:
|
||||||
|
filename, _ = os.path.splitext(filename)
|
||||||
|
else:
|
||||||
|
filename = recording.generate_filename()
|
||||||
|
filename = filename + ".npy"
|
||||||
|
|
||||||
|
if path is None:
|
||||||
|
path = "recordings"
|
||||||
|
|
||||||
|
if not os.path.exists(path):
|
||||||
|
os.makedirs(path)
|
||||||
|
fullpath = os.path.join(path, filename)
|
||||||
|
|
||||||
|
data = np.array(recording.data)
|
||||||
|
metadata = recording.metadata
|
||||||
|
annotations = recording.annotations
|
||||||
|
|
||||||
|
with open(file=fullpath, mode="wb") as f:
|
||||||
|
np.save(f, data)
|
||||||
|
np.save(f, metadata)
|
||||||
|
np.save(f, annotations)
|
||||||
|
|
||||||
|
# print(f"Saved recording to {os.getcwd()}/{fullpath}")
|
||||||
|
return str(fullpath)
|
||||||
|
|
||||||
|
|
||||||
|
def from_npy(file: os.PathLike | str) -> Recording:
|
||||||
|
"""Load a recording from a ``.npy`` binary file.
|
||||||
|
|
||||||
|
:param file: The directory path to the recording file, with or without the ``.npy`` file extension.
|
||||||
|
:type file: str or os.PathLike
|
||||||
|
|
||||||
|
:raises IOError: If there is an issue encountered during the file reading process.
|
||||||
|
|
||||||
|
:return: The recording, as initialized from the ``.npy`` file.
|
||||||
|
:rtype: utils.data.Recording
|
||||||
|
"""
|
||||||
|
|
||||||
|
filename, extension = os.path.splitext(file)
|
||||||
|
if extension != ".npy" and extension != "":
|
||||||
|
raise ValueError("Cannot use from_npy if file extension is not .npy")
|
||||||
|
|
||||||
|
# Rebuild with .npy extension.
|
||||||
|
filename = str(filename) + ".npy"
|
||||||
|
|
||||||
|
with open(file=filename, mode="rb") as f:
|
||||||
|
data = np.load(f, allow_pickle=True)
|
||||||
|
metadata = np.load(f, allow_pickle=True)
|
||||||
|
metadata = metadata.tolist()
|
||||||
|
try:
|
||||||
|
annotations = list(np.load(f, allow_pickle=True))
|
||||||
|
except EOFError:
|
||||||
|
annotations = []
|
||||||
|
|
||||||
|
recording = Recording(data=data, metadata=metadata, annotations=annotations)
|
||||||
|
return recording
|
8
src/ria_toolkit/transforms/__init__.py
Normal file
8
src/ria_toolkit/transforms/__init__.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
"""
|
||||||
|
The transforms package houses a collection of functions to manipulate and transform radio data.
|
||||||
|
|
||||||
|
This package contains various functions that operate on NumPy arrays. These functions are utilized within the machine
|
||||||
|
learning backends to build transforms and functions that seamlessly integrate with those from the respective backend.
|
||||||
|
|
||||||
|
All the transforms in this package expect data in the complex 1xN format.
|
||||||
|
"""
|
717
src/ria_toolkit/transforms/iq_augmentations.py
Normal file
717
src/ria_toolkit/transforms/iq_augmentations.py
Normal file
|
@ -0,0 +1,717 @@
|
||||||
|
"""
|
||||||
|
This module comprises the functionals of various transforms designed to create new training examples by augmenting
|
||||||
|
existing examples or recordings using a variety of techniques These transforms take an ArrayLike object as input
|
||||||
|
and return a corresponding numpy.ndarray with the impairment model applied;
|
||||||
|
we call the latter the impaired data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy.typing import ArrayLike
|
||||||
|
|
||||||
|
from utils.data.recording import Recording
|
||||||
|
from utils.helpers.array_conversion import convert_to_2xn
|
||||||
|
|
||||||
|
# TODO: For round 2 of index generation, should j be at min 2 spots away from where it was to prevent adjacent patches.
|
||||||
|
|
||||||
|
# TODO: All the transforms with some randomness need to be refactored to use a random generator.
|
||||||
|
|
||||||
|
|
||||||
|
def generate_awgn(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
|
||||||
|
"""Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
|
||||||
|
provided `signal` array or `Recording`.
|
||||||
|
|
||||||
|
This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of
|
||||||
|
the noise which matches the specified SNR. Then, the AWGN is generated after calculating the variance and
|
||||||
|
randomly calculating the amplitude and phase of the noise.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param snr: The signal-to-noise ratio in dB. Default is 1.
|
||||||
|
:type snr: float, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array representing the generated noise which matches the SNR of `signal`. If `signal` is a
|
||||||
|
Recording, returns a Recording object with its `data` attribute containing the generated noise array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[2 + 5j, 1 + 8j]])
|
||||||
|
>>> new_rec = generate_awgn(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[2.15991777 + 0.69673915j, 0.2814541 - 0.12111976j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
snr_linear = 10 ** (snr / 10)
|
||||||
|
|
||||||
|
# Calculate the RMS power of the signal to solve for the RMS power of the noise
|
||||||
|
signal_rms_power = np.sqrt(np.mean(np.abs(data) ** 2))
|
||||||
|
noise_rms_power = signal_rms_power / snr_linear
|
||||||
|
|
||||||
|
# Generate the AWGN noise which has the same shape as data
|
||||||
|
variance = noise_rms_power**2
|
||||||
|
magnitude = np.random.normal(loc=0, scale=np.sqrt(variance), size=(c, n))
|
||||||
|
phase = np.random.uniform(low=0, high=2 * np.pi, size=(c, n))
|
||||||
|
complex_awgn = magnitude * np.exp(1j * phase)
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=complex_awgn, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return complex_awgn
|
||||||
|
|
||||||
|
|
||||||
|
def time_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
|
||||||
|
"""Reverses the order of the I (In-phase) and Q (Quadrature) data samples along the time axis of the provided
|
||||||
|
`signal` array or `Recording`.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array containing the reversed I and Q data samples if `signal` is an array.
|
||||||
|
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
|
||||||
|
reversed array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[1+2j, 3+4j, 5+6j]])
|
||||||
|
>>> new_rec = time_reversal(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[5+6j, 3+4j, 1+2j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
# If 1xN complex
|
||||||
|
reversed_data = np.squeeze(data)[::-1]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=reversed_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return reversed_data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def spectral_inversion(signal: ArrayLike | Recording) -> np.ndarray | Recording:
|
||||||
|
"""Negates the imaginary components (Q, Quadrature) of the data samples contained within the
|
||||||
|
provided `signal` array or `Recording`.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array containing the original I and negated Q data samples if `signal` is an array.
|
||||||
|
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
|
||||||
|
inverted array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[0+45j, 2-10j]])
|
||||||
|
>>> new_rec = spectral_inversion(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[0-45j, 2+10j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
new_data = np.squeeze(data).real - 1j * np.squeeze(data).imag
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=new_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return new_data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def channel_swap(signal: ArrayLike | Recording) -> np.ndarray | Recording:
|
||||||
|
"""Switches the I (In-phase) with the and Q (Quadrature) data samples for each sample within the
|
||||||
|
provided `signal` array or `Recording`.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array containing the swapped I and Q data samples if `signal` is an array.
|
||||||
|
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
|
||||||
|
swapped array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[10+20j, 7+35j]])
|
||||||
|
>>> new_rec = channel_swap(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[20+10j, 35+7j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
swapped_data = np.squeeze(data).imag + 1j * np.squeeze(data).real
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=swapped_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return swapped_data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def amplitude_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
|
||||||
|
"""Negates the amplitudes of both the I (In-phase) and Q (Quadrature) data samples contained within the
|
||||||
|
provided `signal` array or `Recording`.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array containing the negated I and Q data samples if `signal` is an array.
|
||||||
|
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
|
||||||
|
negated array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[4-3j, -5-2j, -9+1j]])
|
||||||
|
>>> new_rec = amplitude_reversal(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[-4+3j, 5+2j, 9-1j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
reversed_data = -1 * np.squeeze(data).real - 1j * np.squeeze(data).imag
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=reversed_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return reversed_data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def drop_samples( # noqa: C901 # TODO: Simplify function
|
||||||
|
signal: ArrayLike | Recording, max_section_size: Optional[int] = 2, fill_type: Optional[str] = "zeros"
|
||||||
|
) -> np.ndarray | Recording:
|
||||||
|
"""Randomly drops IQ data samples contained within the provided `signal` array or `Recording`.
|
||||||
|
|
||||||
|
This function randomly selects sections of the signal and replaces the current data samples in the specified
|
||||||
|
section with another value dependent on the fill type.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param max_section_size: Maximum allowable size of the section to be dropped and replaced. Default is 2.
|
||||||
|
:type max_section_size: int, optional
|
||||||
|
:param fill_type: Fill option used to replace dropped section of data (back-fill, front-fill, mean, zeros).
|
||||||
|
Default is "zeros".
|
||||||
|
|
||||||
|
|
||||||
|
"back-fill": replace dropped section with the data sample occuring before the section.
|
||||||
|
|
||||||
|
"front-fill": replace dropped section with the data sample occuring after the section.
|
||||||
|
|
||||||
|
"mean": replace dropped section with mean of the entire signal.
|
||||||
|
|
||||||
|
"zeros": replace dropped section with constant value of 0+0j.
|
||||||
|
:type fill_type: str, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
:raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
|
||||||
|
|
||||||
|
:return: A numpy array containing the I and Q data samples with replaced subsections if
|
||||||
|
`signal` is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data`
|
||||||
|
attribute containing the array with dropped samples.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
|
||||||
|
>>> new_rec = drop_samples(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[2+5j, 0, 0, 0, 4+9j]])
|
||||||
|
"""
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if max_section_size < 1 or max_section_size >= n:
|
||||||
|
raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
data = np.squeeze(data)
|
||||||
|
|
||||||
|
if fill_type == "mean":
|
||||||
|
mean = np.mean(data)
|
||||||
|
|
||||||
|
i = -1
|
||||||
|
j = -1
|
||||||
|
|
||||||
|
# Pointers i and j point to exact positions
|
||||||
|
while i < n:
|
||||||
|
# Generate valid starting point so that at least 1 drop occurs
|
||||||
|
i = np.random.randint(j + 1, j + n - max_section_size + 2)
|
||||||
|
j = np.random.randint(i, i + max_section_size)
|
||||||
|
|
||||||
|
if j > n - 1: # Check that the full drop is within the dataset
|
||||||
|
break
|
||||||
|
|
||||||
|
# Generate fill based on fill_type
|
||||||
|
if fill_type == "back-fill":
|
||||||
|
fill = data[i - 1] if i > 0 else data[i]
|
||||||
|
elif fill_type == "front-fill":
|
||||||
|
fill = data[j + 1] if j < n - 1 else data[j]
|
||||||
|
elif fill_type == "mean":
|
||||||
|
fill = mean
|
||||||
|
elif fill_type == "zeros":
|
||||||
|
fill = 0 + 0j
|
||||||
|
else:
|
||||||
|
raise ValueError(f"fill_type {fill_type} not recognized.")
|
||||||
|
|
||||||
|
# Replaces dropped samples with fill values
|
||||||
|
data[i : j + 1] = fill
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def quantize_tape(
|
||||||
|
signal: ArrayLike | Recording, bin_number: Optional[int] = 4, rounding_type: Optional[str] = "floor"
|
||||||
|
) -> np.ndarray | Recording:
|
||||||
|
"""Quantizes the IQ data of the provided `signal` array or `Recording` by a few bits.
|
||||||
|
|
||||||
|
This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
|
||||||
|
The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param bin_number: The number of bins the signal should be divided into. Default is 4.
|
||||||
|
:type bin_number: int, optional
|
||||||
|
:param rounding_type: The type of rounding applied during processing. Default is "floor".
|
||||||
|
|
||||||
|
"floor": rounds down to the lower bound of the bin.
|
||||||
|
|
||||||
|
"ceiling": rounds up to the upper bound of the bin.
|
||||||
|
:type rounding_type: str, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
:raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
|
||||||
|
|
||||||
|
:return: A numpy array containing the quantized I and Q data samples if `signal` is an array.
|
||||||
|
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
|
||||||
|
the quantized array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[1+1j, 4+4j, 1+2j, 1+4j]])
|
||||||
|
>>> new_rec = quantize_tape(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[4+4j, 3+3j, 4+1j, 4+3j]])
|
||||||
|
"""
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if rounding_type not in {"ceiling", "floor"}:
|
||||||
|
raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
iq_data = convert_to_2xn(data)
|
||||||
|
maximum, minimum = iq_data.max(), iq_data.min()
|
||||||
|
bin_edges = np.linspace(minimum, maximum, bin_number + 1)
|
||||||
|
indices = np.digitize(iq_data, bin_edges, right=True)
|
||||||
|
|
||||||
|
# If data falls outside the first bin, map it back into the first bin, data will not fall outside of last bin
|
||||||
|
indices[indices == 0] = 1
|
||||||
|
|
||||||
|
# Map the data points to the correct bins
|
||||||
|
if rounding_type == "ceiling":
|
||||||
|
modified_iq_data = bin_edges[indices]
|
||||||
|
else:
|
||||||
|
modified_iq_data = bin_edges[indices - 1]
|
||||||
|
|
||||||
|
new_data = modified_iq_data[0] + 1j * modified_iq_data[1]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=new_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return new_data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def quantize_parts(
|
||||||
|
signal: ArrayLike | Recording,
|
||||||
|
max_section_size: Optional[int] = 2,
|
||||||
|
bin_number: Optional[int] = 4,
|
||||||
|
rounding_type: Optional[str] = "floor",
|
||||||
|
) -> np.ndarray | Recording:
|
||||||
|
"""Quantizes random parts of the IQ data within the provided `signal` array or `Recording` by a few bits.
|
||||||
|
|
||||||
|
This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
|
||||||
|
The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param max_section_size: Maximum allowable size of the section to be quantized. Default is 2.
|
||||||
|
:type max_section_size: int, optional
|
||||||
|
:param bin_number: The number of bins the signal should be divided into. Default is 4.
|
||||||
|
:type bin_number: int, optional
|
||||||
|
:param rounding_type: Type of rounding applied during processing. Default is "floor".
|
||||||
|
|
||||||
|
"floor": rounds down to the lower bound of the bin.
|
||||||
|
|
||||||
|
"ceiling": rounds up to the upper bound of the bin.
|
||||||
|
:type rounding_type: str, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
:raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
|
||||||
|
|
||||||
|
:return: A numpy array containing the I and Q data samples with quantized subsections if `signal`
|
||||||
|
is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute
|
||||||
|
containing the partially quantized array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
|
||||||
|
>>> new_rec = quantize_parts(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[2+5j, 1+8j, 3.66666667+3.66666667j, 3+7j, 4+9j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if rounding_type not in {"ceiling", "floor"}:
|
||||||
|
raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
iq_data = convert_to_2xn(data)
|
||||||
|
i_data, q_data = iq_data
|
||||||
|
maximum, minimum = iq_data.max(), iq_data.min()
|
||||||
|
bin_edges = np.linspace(minimum, maximum, bin_number + 1)
|
||||||
|
indices = np.digitize(iq_data, bin_edges, right=True)
|
||||||
|
|
||||||
|
# Map everything from bin 0 to bin 1
|
||||||
|
indices[indices == 0] = 1
|
||||||
|
|
||||||
|
i = -1
|
||||||
|
j = -1
|
||||||
|
|
||||||
|
# Pointers i and j point to exact positions
|
||||||
|
while i < n:
|
||||||
|
# Generate valid starting point so that at least 1 drop occurs
|
||||||
|
i = np.random.randint(j + 1, j + n - max_section_size + 2)
|
||||||
|
j = np.random.randint(i, i + max_section_size)
|
||||||
|
|
||||||
|
if j > n - 1: # Check that the full drop is within the dataset
|
||||||
|
break
|
||||||
|
|
||||||
|
if rounding_type == "ceiling":
|
||||||
|
i_data[i : j + 1] = bin_edges[indices[0][i : j + 1]]
|
||||||
|
q_data[i : j + 1] = bin_edges[indices[1][i : j + 1]]
|
||||||
|
else:
|
||||||
|
i_data[i : j + 1] = bin_edges[indices[0][i : j + 1] - 1]
|
||||||
|
q_data[i : j + 1] = bin_edges[indices[1][i : j + 1] - 1]
|
||||||
|
|
||||||
|
quantized_data = i_data + 1j * q_data
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=quantized_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return quantized_data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def magnitude_rescale(
|
||||||
|
signal: ArrayLike | Recording,
|
||||||
|
starting_bounds: Optional[tuple] = None,
|
||||||
|
max_magnitude: Optional[int] = 1,
|
||||||
|
) -> np.ndarray | Recording:
|
||||||
|
"""Selects a random starting point from within the specified starting bounds and multiplies IQ data of the
|
||||||
|
provided `signal` array or `Recording` by a random constant.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param starting_bounds: The bounds (inclusive) as indices in which the starting position of the rescaling occurs.
|
||||||
|
Default is None, but if user does not assign any bounds, the bounds become (random index, N-1).
|
||||||
|
:type starting_bounds: tuple, optional
|
||||||
|
:param max_magnitude: The maximum value of the constant that is used to rescale the data. Default is 1.
|
||||||
|
:type max_magnitude: int, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array containing the I and Q data samples with the rescaled magnitude after the random
|
||||||
|
starting point if `signal` is an array. If `signal` is a `Recording`, returns a `Recording`
|
||||||
|
object with its `data` attribute containing the rescaled array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
|
||||||
|
>>> new_rec = magniute_rescale(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[2+5j, 1+8j, 6+4j, 3+7j, 3.03181761+6.82158963j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if starting_bounds is None:
|
||||||
|
starting_bounds = (np.random.randint(0, n), n - 1)
|
||||||
|
|
||||||
|
if starting_bounds[0] < 0 or starting_bounds[1] > n - 1:
|
||||||
|
raise ValueError("starting_bounds must be valid indices for the dataset.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
data = np.squeeze(data)
|
||||||
|
starting_point = np.random.randint(starting_bounds[0], starting_bounds[1] + 1)
|
||||||
|
magnitude = np.random.rand() * max_magnitude
|
||||||
|
|
||||||
|
rescaled_section = data[starting_point:] * magnitude
|
||||||
|
rescaled_data = np.concatenate((data[:starting_point], rescaled_section))
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=rescaled_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return rescaled_data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def cut_out( # noqa: C901 # TODO: Simplify function
|
||||||
|
signal: ArrayLike | Recording, max_section_size: Optional[int] = 3, fill_type: Optional[str] = "ones"
|
||||||
|
) -> np.ndarray | Recording:
|
||||||
|
"""Cuts out random sections of IQ data and replaces them with either 0s, 1s, or low, average, or high
|
||||||
|
sound-to-noise ratio (SNR) additive white gausssian noise (AWGN) within the provided `signal` array or
|
||||||
|
`Recording`.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param max_section_size: Maximum allowable size of the section to be quantized. Default is 3.
|
||||||
|
:type max_section_size: int, optional
|
||||||
|
:param fill_type: Fill option used to replace cutout section of data (zeros, ones, low-snr, avg-snr-1, avg-snr-2).
|
||||||
|
Default is "ones".
|
||||||
|
|
||||||
|
"zeros": replace cutout section with 0s.
|
||||||
|
|
||||||
|
"ones": replace cutout section with 1s.
|
||||||
|
|
||||||
|
"low-snr": replace cutout section with AWGN with an SNR of 0.5.
|
||||||
|
|
||||||
|
"avg-snr": replace cutout section with AWGN with an SNR of 1.
|
||||||
|
|
||||||
|
"high-snr": replace cutout section with AWGN with an SNR of 2.
|
||||||
|
:type fill_type: str, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
:raises UserWarning: If fill_type is not "zeros", "ones", "low-snr", "avg-snr", or "high-snr", "ones" is selected
|
||||||
|
by default.
|
||||||
|
:raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
|
||||||
|
|
||||||
|
:return: A numpy array containing the I and Q data samples with random sections cut out and replaced according to
|
||||||
|
`fill_type` if `signal` is an array. If `signal` is a `Recording`, returns a `Recording` object
|
||||||
|
with its `data` attribute containing the cut out and replaced array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
|
||||||
|
>>> new_rec = cut_out(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[2+5j, 1+8j, 1+1j, 1+1j, 1+1j]])
|
||||||
|
"""
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if fill_type not in {"zeros", "ones", "low-snr", "avg-snr", "high-snr"}:
|
||||||
|
raise UserWarning(
|
||||||
|
"""fill_type must be "zeros", "ones", "low-snr", "avg-snr", or "high-snr",
|
||||||
|
"ones" has been selected by default"""
|
||||||
|
)
|
||||||
|
|
||||||
|
if max_section_size < 1 or max_section_size >= n:
|
||||||
|
raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
data = np.squeeze(data)
|
||||||
|
|
||||||
|
i = -1
|
||||||
|
j = -1
|
||||||
|
|
||||||
|
# Pointers i and j point to exact positions
|
||||||
|
while i < n:
|
||||||
|
# Generate valid starting point so that at least 1 drop occurs
|
||||||
|
i = np.random.randint(j + 1, j + n - max_section_size + 2)
|
||||||
|
j = np.random.randint(i, i + max_section_size)
|
||||||
|
|
||||||
|
if j > n - 1: # Check that the full drop is within the dataset
|
||||||
|
break
|
||||||
|
|
||||||
|
# TODO: Check if we can collapse last three options which depends on what snr value the user enters
|
||||||
|
if fill_type == "zeros":
|
||||||
|
fill = 0 + 0j
|
||||||
|
elif fill_type == "ones":
|
||||||
|
fill = 1 + 1j
|
||||||
|
elif fill_type == "low-snr":
|
||||||
|
fill = generate_awgn([data[i : j + 1]], 0.5)
|
||||||
|
elif fill_type == "avg-snr":
|
||||||
|
fill = generate_awgn([data[i : j + 1]], 1)
|
||||||
|
else:
|
||||||
|
fill = generate_awgn([data[i : j + 1]], 2)
|
||||||
|
|
||||||
|
data[i : j + 1] = fill
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return data.reshape(c, n)
|
||||||
|
|
||||||
|
|
||||||
|
def patch_shuffle(signal: ArrayLike | Recording, max_patch_size: Optional[int] = 3) -> np.ndarray | Recording:
|
||||||
|
"""Selects random patches of the IQ data and randomly shuffles the data samples within the specified patch of
|
||||||
|
the provided `signal` array or `Recording`.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param max_patch_size: Maximum allowable patch size of the data that can be shuffled. Default is 3.
|
||||||
|
:type max_patch_size: int, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
:raises ValueError: If `max_patch_size` is less than or equal to 1 or greater than length of `signal`.
|
||||||
|
|
||||||
|
:return: A numpy array containing the I and Q data samples with randomly shuffled regions if `signal` is
|
||||||
|
an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
|
||||||
|
the shuffled array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
|
||||||
|
>>> new_rec = patch_shuffle(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[2+5j, 1+8j, 3+4j, 6+9j, 4+7j]])
|
||||||
|
"""
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data.copy() # Cannot shuffle read-only array.
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if max_patch_size > n or max_patch_size <= 1:
|
||||||
|
raise ValueError("max_patch_size must be less than or equal to the length of signal and greater than 1.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
data = np.squeeze(data)
|
||||||
|
|
||||||
|
i = -1
|
||||||
|
j = -1
|
||||||
|
|
||||||
|
# Pointers i and j point to exact positions
|
||||||
|
while i < n:
|
||||||
|
# Generate valid starting point so that at least 1 drop occurs
|
||||||
|
i = np.random.randint(j + 1, j + n - max_patch_size + 2)
|
||||||
|
j = np.random.randint(i, i + max_patch_size)
|
||||||
|
|
||||||
|
if j > n - 1: # Check that the full drop is within the dataset
|
||||||
|
break
|
||||||
|
|
||||||
|
np.random.shuffle(data.real[i : j + 1])
|
||||||
|
np.random.shuffle(data.imag[i : j + 1])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return data.reshape(c, n)
|
365
src/ria_toolkit/transforms/iq_impairments.py
Normal file
365
src/ria_toolkit/transforms/iq_impairments.py
Normal file
|
@ -0,0 +1,365 @@
|
||||||
|
"""
|
||||||
|
This module comprises various transforms designed to represent signal impairments.
|
||||||
|
These transforms take a recording as input and return a corresponding recording with
|
||||||
|
the impairment model applied; we call the latter an impaired recording.
|
||||||
|
|
||||||
|
Signals travel through transmission media, which are not perfect. The imperfection
|
||||||
|
causes signal impairment, meaning that the signal at the beginning of the medium is
|
||||||
|
not the same as the signal at the end of the medium. What is sent is not what is received.
|
||||||
|
Three causes of impairment are attenuation, distortion, and noise.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy.typing import ArrayLike
|
||||||
|
from scipy.signal import resample_poly
|
||||||
|
|
||||||
|
from utils.data import Recording
|
||||||
|
from utils.transforms import iq_augmentations
|
||||||
|
|
||||||
|
|
||||||
|
def add_awgn_to_signal(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
|
||||||
|
"""Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
|
||||||
|
provided `signal` array or `Recording`.
|
||||||
|
|
||||||
|
This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of the noise
|
||||||
|
which matches the specified SNR. Then, the AWGN is generated after calculating the variance and randomly
|
||||||
|
calculating the amplitude and phase of the noise. Then, this generated AWGN is added to the original signal and
|
||||||
|
returned.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex ``C x N`` array or `Recording`, where ``C`` is the number of channels
|
||||||
|
and ``N`` is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param snr: The signal-to-noise ratio in dB. Default is 1.
|
||||||
|
:type snr: float, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array which is the sum of the noise (which matches the SNR) and the original signal. If `signal`
|
||||||
|
is a `Recording`, returns a `Recording object` with its `data` attribute containing the noisy signal array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[1+1j, 2+2j]])
|
||||||
|
>>> new_rec = add_awgn_to_signal(rec)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[0.83141973+0.32529242j, -1.00909846+2.39282713j]])
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim != 2 or not np.iscomplexobj(data):
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
noise = iq_augmentations.generate_awgn(signal=data, snr=snr)
|
||||||
|
print(f"noise is {noise}")
|
||||||
|
|
||||||
|
noisy_signal = data + noise
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=noisy_signal, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return noisy_signal
|
||||||
|
|
||||||
|
|
||||||
|
def time_shift(signal: ArrayLike | Recording, shift: Optional[int] = 1) -> np.ndarray | Recording:
|
||||||
|
"""Apply a time shift to a signal.
|
||||||
|
|
||||||
|
After the time shift is applied, we fill any empty regions with zeros.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param shift: The number of indices to shift by. Default is 1.
|
||||||
|
:type shift: int, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
:raises UserWarning: If `shift` is greater than length of `signal`.
|
||||||
|
|
||||||
|
:return: A numpy array which represents the time-shifted signal. If `signal` is a `Recording`,
|
||||||
|
returns a `Recording object` with its `data` attribute containing the time-shifted array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j, 5+5j]])
|
||||||
|
>>> new_rec = time_shift(rec, -2)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[3+3j, 4+4j, 5+5j, 0+0j, 0+0j]])
|
||||||
|
"""
|
||||||
|
# TODO: Additional info needs to be added to docstring description
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if shift > n:
|
||||||
|
raise UserWarning("shift is greater than signal length")
|
||||||
|
|
||||||
|
shifted_data = np.zeros_like(data)
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
# New iq array shifted left or right depending on sign of shift
|
||||||
|
# This should work even if shift > iqdata.shape[1]
|
||||||
|
if shift >= 0:
|
||||||
|
# Shift to right
|
||||||
|
shifted_data[:, shift:] = data[:, :-shift]
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Shift to the left
|
||||||
|
shifted_data[:, :shift] = data[:, -shift:]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=shifted_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return shifted_data
|
||||||
|
|
||||||
|
|
||||||
|
def frequency_shift(signal: ArrayLike | Recording, shift: Optional[float] = 0.5) -> np.ndarray | Recording:
|
||||||
|
"""Apply a frequency shift to a signal.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The frequency shift is applied relative to the sample rate.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param shift: The frequency shift relative to the sample rate. Must be in the range ``[-0.5, 0.5]``.
|
||||||
|
Default is 0.5.
|
||||||
|
:type shift: float, optional
|
||||||
|
|
||||||
|
:raises ValueError: If the provided frequency shift is not in the range ``[-0.5, 0.5]``.
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array which represents the frequency-shifted signal. If `signal` is a `Recording`,
|
||||||
|
returns a `Recording object` with its `data` attribute containing the frequency-shifted array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
|
||||||
|
>>> new_rec = frequency_shift(rec, -0.4)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[1+1j, -0.44246348-2.79360449j, -1.92611857+3.78022053j, 5.04029404-2.56815809j]])
|
||||||
|
"""
|
||||||
|
# TODO: Additional info needs to be added to docstring description
|
||||||
|
|
||||||
|
if shift > 0.5 or shift < -0.5:
|
||||||
|
raise ValueError("Frequency shift must be in the range [-0.5, 0.5]")
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
shifted_data = np.zeros_like(data)
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
# Calculate the phase shift for the frequency shift
|
||||||
|
phase_shift_ = 2.0 * np.pi * shift * np.arange(n)
|
||||||
|
|
||||||
|
# Use trigonometric identities to apply the frequency shift
|
||||||
|
shifted_data.real = data.real * np.cos(phase_shift_) - data.imag * np.sin(phase_shift_)
|
||||||
|
shifted_data.imag = data.real * np.sin(phase_shift_) + data.imag * np.cos(phase_shift_)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=shifted_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return shifted_data
|
||||||
|
|
||||||
|
|
||||||
|
def phase_shift(signal: ArrayLike | Recording, phase: Optional[float] = np.pi) -> np.ndarray | Recording:
|
||||||
|
"""Apply a phase shift to a signal.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param phase: The phase angle by which to rotate the IQ samples, in radians. Must be in the range ``[-π, π]``.
|
||||||
|
Default is π.
|
||||||
|
:type phase: float, optional
|
||||||
|
|
||||||
|
:raises ValueError: If the provided phase rotation is not in the range ``[-π, π]``.
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array which represents the phase-shifted signal. If `signal` is a `Recording`,
|
||||||
|
returns a `Recording object` with its `data` attribute containing the phase-shifted array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
|
||||||
|
>>> new_rec = phase_shift(rec, np.pi/2)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[-1+1j, -2+2j -3+3j -4+4j]])
|
||||||
|
"""
|
||||||
|
# TODO: Additional info needs to be added to docstring description
|
||||||
|
|
||||||
|
if phase > np.pi or phase < -np.pi:
|
||||||
|
raise ValueError("Phase rotation must be in the range [-π, π]")
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
shifted_data = data * np.exp(1j * phase)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=shifted_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return shifted_data
|
||||||
|
|
||||||
|
|
||||||
|
def iq_imbalance(
|
||||||
|
signal: ArrayLike | Recording,
|
||||||
|
amplitude_imbalance: Optional[float] = 1.5,
|
||||||
|
phase_imbalance: Optional[float] = np.pi,
|
||||||
|
dc_offset: Optional[float] = 1.5,
|
||||||
|
) -> np.ndarray | Recording:
|
||||||
|
"""Apply an IQ Imbalance to a signal.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Based on MathWorks' `I/Q Imbalance <https://www.mathworks.com/help/comm/ref/iqimbalance.html>`_.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param amplitude_imbalance: The IQ amplitude imbalance to apply, in dB. Default is 1.5.
|
||||||
|
:type amplitude_imbalance: float, optional
|
||||||
|
:param phase_imbalance: The IQ phase imbalance to apply, in radians. Default is π.
|
||||||
|
Must be in the range ``[-π, π]``.
|
||||||
|
:type phase_imbalance: float, optional
|
||||||
|
:param dc_offset: The IQ DC offset to apply, in dB. Default is 1.5.
|
||||||
|
:type dc_offset: float, optional
|
||||||
|
|
||||||
|
:raises ValueError: If the phase imbalance is not in the range ``[-π, π]``.
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array which is the original signal with an applied IQ imbalance. If `signal` is a `Recording`,
|
||||||
|
returns a `Recording object` with its `data` attribute containing the IQ imbalanced signal array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[2+18j, -34+2j, 3+9j]])
|
||||||
|
>>> new_rec = iq_imbalance(rec, 1, np.pi, 2)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[-38.38613587-4.78555031j, -4.26512621+81.35435535j, -19.19306793-7.17832547j]])
|
||||||
|
"""
|
||||||
|
# TODO: Additional info needs to be added to docstring description
|
||||||
|
|
||||||
|
if phase_imbalance > np.pi or phase_imbalance < -np.pi:
|
||||||
|
raise ValueError("Phase imbalance must be in the range [-π, π].")
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
# Apply amplitude imbalance
|
||||||
|
data = (
|
||||||
|
10 ** (0.5 * amplitude_imbalance / 20.0) * data.real
|
||||||
|
+ 1j * 10 ** (-0.5 * amplitude_imbalance / 20.0) * data.imag
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply phase imbalance
|
||||||
|
data = (
|
||||||
|
np.exp(-1j * phase_imbalance / 2.0) * data.real
|
||||||
|
+ np.exp(1j * (np.pi / 2.0 + phase_imbalance / 2.0)) * data.imag
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply DC offset
|
||||||
|
imbalanced_data = data + (10 ** (dc_offset / 20.0) * data.real + 1j * 10 ** (dc_offset / 20.0) * data.imag)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=imbalanced_data, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return imbalanced_data
|
||||||
|
|
||||||
|
|
||||||
|
def resample(signal: ArrayLike | Recording, up: Optional[int] = 4, down: Optional[int] = 2) -> np.ndarray | Recording:
|
||||||
|
"""Resample a signal using polyphase filtering.
|
||||||
|
|
||||||
|
Uses scipy.signal.resample_poly to upsample the signal by the
|
||||||
|
factor *up*, apply a zero-phase low-pass FIR filter, and downsample the
|
||||||
|
signal by the factor *down*.
|
||||||
|
|
||||||
|
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
|
||||||
|
is the length of the IQ examples.
|
||||||
|
:type signal: array_like or utils.data.Recording
|
||||||
|
:param up: The upsampling factor. Default is 4.
|
||||||
|
:type up: int, optional
|
||||||
|
:param down: The downsampling factor. Default is 2.
|
||||||
|
:type down: int, optional
|
||||||
|
|
||||||
|
:raises ValueError: If `signal` is not CxN complex.
|
||||||
|
|
||||||
|
:return: A numpy array which represents the resampled signal If `signal` is a `Recording`,
|
||||||
|
returns a `Recording object` with its `data` attribute containing the resampled array.
|
||||||
|
:rtype: np.ndarray or utils.data.Recording
|
||||||
|
|
||||||
|
>>> rec = Recording(data=[[1+1j, 2+2j]])
|
||||||
|
>>> new_rec = resample(rec, 2, 1)
|
||||||
|
>>> new_rec.data
|
||||||
|
array([[1.00051747+1.00051747j, 1.90020207+1.90020207j]])
|
||||||
|
"""
|
||||||
|
# TODO: Additional info needs to be added to docstring description
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
data = signal.data
|
||||||
|
else:
|
||||||
|
data = np.asarray(signal)
|
||||||
|
|
||||||
|
if data.ndim == 2 and np.iscomplexobj(data):
|
||||||
|
c, n = data.shape
|
||||||
|
else:
|
||||||
|
raise ValueError("signal must be CxN complex.")
|
||||||
|
|
||||||
|
if c == 1:
|
||||||
|
data = np.squeeze(data)
|
||||||
|
resampled_iqdata = resample_poly(x=data, up=up, down=down)
|
||||||
|
|
||||||
|
# Reshape array so that slicing operations work on resampled data
|
||||||
|
resampled_iqdata = np.reshape(resampled_iqdata, newshape=(1, len(resampled_iqdata)))
|
||||||
|
|
||||||
|
if resampled_iqdata.shape[1] > n:
|
||||||
|
resampled_iqdata = resampled_iqdata[:, :n]
|
||||||
|
|
||||||
|
else:
|
||||||
|
empty_array = np.zeros(resampled_iqdata.shape, dtype=resampled_iqdata.dtype)
|
||||||
|
empty_array[:, : resampled_iqdata.shape[1]] = resampled_iqdata
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
if isinstance(signal, Recording):
|
||||||
|
return Recording(data=resampled_iqdata, metadata=signal.metadata)
|
||||||
|
else:
|
||||||
|
return resampled_iqdata
|
9
src/ria_toolkit/utils/__init__.py
Normal file
9
src/ria_toolkit/utils/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
"""
|
||||||
|
The Helpers module contains a bunch of helper functions, including array conversion utilities.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"bytes_to_samples",
|
||||||
|
]
|
||||||
|
|
||||||
|
from .bytes_to_samples import bytes_to_samples
|
80
src/ria_toolkit/utils/array_conversion.py
Normal file
80
src/ria_toolkit/utils/array_conversion.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
"""
|
||||||
|
IQ data represents the in-phase (I) and quadrature (Q) components of a signal. There are two ways to represent
|
||||||
|
single-channel IQ signals:
|
||||||
|
|
||||||
|
#. **Complex 1xN Format:** In the complex 1xN format, the IQ data is represented as a 2D array of complex numbers with
|
||||||
|
shape 1xN. In this format, the real part of each complex number represents the in-phase component, while the
|
||||||
|
imaginary part represents the quadrature component.
|
||||||
|
#. **Real 2xN Format:** In the real 2xN format, the IQ data is represented as a 2D array of real numbers with shape
|
||||||
|
2xN. In this format, the first row contains the in-phase components, while the second row contains the quadrature
|
||||||
|
components.
|
||||||
|
|
||||||
|
This submodule provides functions to verify and convert between these two formats.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy.typing import ArrayLike
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_2xn(arr: np.ndarray) -> np.ndarray:
|
||||||
|
"""Convert arr to the real 2xN format. If arr is already real 2xN, then you'll get back a copy.
|
||||||
|
|
||||||
|
:param arr: Array of IQ samples, in the complex 1XN format.
|
||||||
|
:type arr: array_like
|
||||||
|
|
||||||
|
:return: The provided signal, in the real 2xN format.
|
||||||
|
:rtype: np.ndarray
|
||||||
|
"""
|
||||||
|
if is_1xn(arr):
|
||||||
|
return np.vstack((np.real(arr[0]), np.imag(arr[0])))
|
||||||
|
|
||||||
|
elif is_2xn(arr):
|
||||||
|
return np.copy(arr)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("arr is neither complex 1xN nor real 2xN.")
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_1xn(arr: np.ndarray) -> np.ndarray:
|
||||||
|
"""Convert arr to the complex 1xN format. If arr is already complex 1xN, then you'll get back a copy.
|
||||||
|
|
||||||
|
:param arr: Array of IQ samples, in the real 2xN format.
|
||||||
|
:type arr: np.ndarray
|
||||||
|
|
||||||
|
:return: The provided signal, in the complex 1xN format.
|
||||||
|
:rtype: np.ndarray
|
||||||
|
"""
|
||||||
|
if is_2xn(arr):
|
||||||
|
return np.expand_dims(a=arr[0, :] + 1j * arr[1, :], axis=0)
|
||||||
|
|
||||||
|
elif is_1xn(arr):
|
||||||
|
return np.copy(arr)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("arr is neither complex 1xN nor real 2xN.")
|
||||||
|
|
||||||
|
|
||||||
|
def is_1xn(arr: ArrayLike) -> bool:
|
||||||
|
"""
|
||||||
|
:return: True is arr is complex 1xN, False otherwise.
|
||||||
|
:rtype: bool
|
||||||
|
"""
|
||||||
|
a = np.asarray(arr)
|
||||||
|
|
||||||
|
if a.ndim == 2 and a.shape[0] == 1 and np.iscomplexobj(a):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_2xn(arr: ArrayLike) -> bool:
|
||||||
|
"""
|
||||||
|
:return: True is arr is real 2xN, False otherwise.
|
||||||
|
:rtype: bool
|
||||||
|
"""
|
||||||
|
a = np.asarray(arr)
|
||||||
|
|
||||||
|
if a.ndim == 2 and a.shape[0] == 2 and not np.iscomplexobj(a):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
18
src/ria_toolkit/utils/bytes_to_samples.py
Normal file
18
src/ria_toolkit/utils/bytes_to_samples.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from numpy.typing import NDArray
|
||||||
|
|
||||||
|
|
||||||
|
def bytes_to_samples(data: bytes) -> NDArray:
|
||||||
|
"""Convert bytes to IQ samples, in the complex 1xN format.
|
||||||
|
|
||||||
|
:param data: Array of bytes
|
||||||
|
:type data: bytes
|
||||||
|
|
||||||
|
:return: Tape of IQ samples, as numpy complex type
|
||||||
|
:rtype: np.ndarray
|
||||||
|
"""
|
||||||
|
# samples = np.frombuffer(data, dtype=np.int16).astype(np.float32)
|
||||||
|
# samples /= 2048
|
||||||
|
# samples = samples[::2] + 1j * samples[1::2]
|
||||||
|
# # samples = samples.view(np.complex64)
|
||||||
|
# return samples
|
||||||
|
raise NotImplementedError
|
12
src/ria_toolkit/viz/__init__.py
Normal file
12
src/ria_toolkit/viz/__init__.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
"""
|
||||||
|
The package contains assorted plotting and report generation utilities to help visualize RIA components such as
|
||||||
|
recordings and radio datasets.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"view_annotations",
|
||||||
|
"view_channels",
|
||||||
|
"view_sig",
|
||||||
|
]
|
||||||
|
|
||||||
|
from .view_signal import view_annotations, view_channels, view_sig
|
192
src/ria_toolkit/viz/recording.py
Normal file
192
src/ria_toolkit/viz/recording.py
Normal file
|
@ -0,0 +1,192 @@
|
||||||
|
import numpy as np
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
import scipy.signal as signal
|
||||||
|
from plotly.graph_objs import Figure
|
||||||
|
from scipy.fft import fft, fftshift
|
||||||
|
|
||||||
|
from utils.data import Recording
|
||||||
|
|
||||||
|
|
||||||
|
def spectrogram(rec: Recording, thumbnail: bool = False) -> Figure:
|
||||||
|
"""Create a spectrogram for the recording.
|
||||||
|
|
||||||
|
:param rec: Signal to plot.
|
||||||
|
:type rec: utils.data.Recording
|
||||||
|
:param thumbnail: Whether to return a small thumbnail version or full plot.
|
||||||
|
:type thumbnail: bool
|
||||||
|
|
||||||
|
:return: Spectrogram, as a Plotly figure.
|
||||||
|
"""
|
||||||
|
complex_signal = rec.data[0]
|
||||||
|
sample_rate = int(rec.metadata.get("sample_rate", 1))
|
||||||
|
plot_length = len(complex_signal)
|
||||||
|
|
||||||
|
# Determine FFT size
|
||||||
|
if plot_length < 2000:
|
||||||
|
fft_size = 64
|
||||||
|
elif plot_length < 10000:
|
||||||
|
fft_size = 256
|
||||||
|
elif plot_length < 1000000:
|
||||||
|
fft_size = 1024
|
||||||
|
else:
|
||||||
|
fft_size = 2048
|
||||||
|
|
||||||
|
frequencies, times, Sxx = signal.spectrogram(
|
||||||
|
complex_signal,
|
||||||
|
fs=sample_rate,
|
||||||
|
nfft=fft_size,
|
||||||
|
nperseg=fft_size,
|
||||||
|
noverlap=fft_size // 8,
|
||||||
|
scaling="density",
|
||||||
|
mode="complex",
|
||||||
|
return_onesided=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert complex values to amplitude and then to log scale for visualization
|
||||||
|
Sxx_magnitude = np.abs(Sxx)
|
||||||
|
Sxx_log = np.log10(Sxx_magnitude + 1e-6)
|
||||||
|
|
||||||
|
# Normalize spectrogram values between 0 and 1 for plotting
|
||||||
|
Sxx_log_shifted = Sxx_log - np.min(Sxx_log)
|
||||||
|
Sxx_log_norm = Sxx_log_shifted / np.max(Sxx_log_shifted)
|
||||||
|
|
||||||
|
# Shift frequency bins and spectrogram rows so frequencies run from negative to positive
|
||||||
|
frequencies_shifted = np.fft.fftshift(frequencies)
|
||||||
|
Sxx_shifted = np.fft.fftshift(Sxx_log_norm, axes=0)
|
||||||
|
|
||||||
|
fig = go.Figure(
|
||||||
|
data=go.Heatmap(
|
||||||
|
z=Sxx_shifted,
|
||||||
|
x=times / 1e6,
|
||||||
|
y=frequencies_shifted,
|
||||||
|
colorscale="Viridis",
|
||||||
|
zmin=0,
|
||||||
|
zmax=1,
|
||||||
|
reversescale=False,
|
||||||
|
showscale=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if thumbnail:
|
||||||
|
fig.update_xaxes(showticklabels=False)
|
||||||
|
fig.update_yaxes(showticklabels=False)
|
||||||
|
fig.update_layout(
|
||||||
|
template="plotly_dark",
|
||||||
|
width=200,
|
||||||
|
height=100,
|
||||||
|
margin=dict(l=5, r=5, t=5, b=5),
|
||||||
|
xaxis=dict(scaleanchor=None),
|
||||||
|
yaxis=dict(scaleanchor=None),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
fig.update_layout(
|
||||||
|
title="Spectrogram",
|
||||||
|
xaxis_title="Time [s]",
|
||||||
|
yaxis_title="Frequency [Hz]",
|
||||||
|
template="plotly_dark",
|
||||||
|
height=300,
|
||||||
|
width=800,
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def iq_time_series(rec: Recording) -> Figure:
|
||||||
|
"""Create a time series plot of the real and imaginary parts of signal.
|
||||||
|
|
||||||
|
:param rec: Signal to plot.
|
||||||
|
:type rec: utils.data.Recording
|
||||||
|
|
||||||
|
:return: Time series plot as a Plotly figure.
|
||||||
|
"""
|
||||||
|
complex_signal = rec.data[0]
|
||||||
|
sample_rate = int(rec.metadata.get("sample_rate", 1))
|
||||||
|
plot_length = len(complex_signal)
|
||||||
|
t = np.arange(0, plot_length, 1) / sample_rate
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(go.Scatter(x=t, y=complex_signal.real, mode="lines", name="I (In-phase)", line=dict(width=0.6)))
|
||||||
|
fig.add_trace(go.Scatter(x=t, y=complex_signal.imag, mode="lines", name="Q (Quadrature)", line=dict(width=0.6)))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title="IQ Time Series",
|
||||||
|
xaxis_title="Time [s]",
|
||||||
|
yaxis_title="Amplitude",
|
||||||
|
template="plotly_dark",
|
||||||
|
height=300,
|
||||||
|
width=800,
|
||||||
|
showlegend=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def frequency_spectrum(rec: Recording) -> Figure:
|
||||||
|
"""Create a frequency spectrum plot from the recording.
|
||||||
|
|
||||||
|
:param rec: Input signal to plot.
|
||||||
|
:type rec: utils.data.Recording
|
||||||
|
|
||||||
|
:return: Frequency spectrum as a Plotly figure.
|
||||||
|
"""
|
||||||
|
complex_signal = rec.data[0]
|
||||||
|
center_frequency = int(rec.metadata.get("center_frequency", 0))
|
||||||
|
sample_rate = int(rec.metadata.get("sample_rate", 1))
|
||||||
|
|
||||||
|
epsilon = 1e-10
|
||||||
|
spectrum = np.abs(fftshift(fft(complex_signal)))
|
||||||
|
freqs = np.linspace(-sample_rate / 2, sample_rate / 2, len(complex_signal)) + center_frequency
|
||||||
|
log_spectrum = np.log10(spectrum + epsilon)
|
||||||
|
scaled_log_spectrum = (log_spectrum - log_spectrum.min()) / (log_spectrum.max() - log_spectrum.min())
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(go.Scatter(x=freqs, y=scaled_log_spectrum, mode="lines", name="Spectrum", line=dict(width=0.4)))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title="Frequency Spectrum",
|
||||||
|
xaxis_title="Frequency [Hz]",
|
||||||
|
yaxis_title="Magnitude",
|
||||||
|
yaxis_type="log",
|
||||||
|
template="plotly_dark",
|
||||||
|
height=300,
|
||||||
|
width=800,
|
||||||
|
showlegend=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def constellation(rec: Recording) -> Figure:
|
||||||
|
"""Create a constellation plot from the recording.
|
||||||
|
|
||||||
|
:param rec: Input signal to plot.
|
||||||
|
:type rec: utils.data.Recording
|
||||||
|
|
||||||
|
:return: Constellation as a Plotly figure.
|
||||||
|
"""
|
||||||
|
complex_signal = rec.data[0]
|
||||||
|
|
||||||
|
# Downsample the IQ samples to a target number of points
|
||||||
|
# This reduces the amount of data plotted, improving performance and interactivity
|
||||||
|
# without losing significant detail in the constellation visualization.
|
||||||
|
target_number_of_points = 5000
|
||||||
|
step = max(1, len(complex_signal) // target_number_of_points)
|
||||||
|
i_ds = complex_signal.real[::step]
|
||||||
|
q_ds = complex_signal.imag[::step]
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(go.Scatter(x=i_ds, y=q_ds, mode="lines", name="Constellation", line=dict(width=0.2)))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title="Constellation",
|
||||||
|
xaxis_title="In-phase (I)",
|
||||||
|
yaxis_title="Quadrature (Q)",
|
||||||
|
template="plotly_dark",
|
||||||
|
height=400,
|
||||||
|
width=400,
|
||||||
|
showlegend=False,
|
||||||
|
xaxis=dict(range=[-1.1, 1.1]),
|
||||||
|
yaxis=dict(range=[-1.1, 1.1]),
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
Loading…
Reference in New Issue
Block a user