Populating initial source code from RIA Utils project

This commit is contained in:
Michael Luciuk 2025-09-02 11:35:41 -04:00
parent 25e39d3544
commit d8a7dc16be
22 changed files with 4912 additions and 0 deletions

View File

@ -0,0 +1,8 @@
"""
The Data package contains abstract data types tailored for radio machine learning, such as ``Recording``, as well
as the abstract interfaces for the radio dataset and radio dataset builder framework.
"""
__all__ = ["Annotation", "Recording"]
from .annotation import Annotation
from .recording import Recording

View File

@ -0,0 +1,128 @@
from __future__ import annotations
import json
from typing import Any, Optional
from sigmf import SigMFFile
class Annotation:
"""Signal annotations are labels or additional information associated with specific data points or segments within
a signal. These annotations could be used for tasks like supervised learning, where the goal is to train a model
to recognize patterns or characteristics in the signal associated with these annotations.
Annotations can be used to label interesting points in your recording.
:param sample_start: The index of the starting sample of the annotation.
:type sample_start: int
:param sample_count: The index of the ending sample of the annotation, inclusive.
:type sample_count: int
:param freq_lower_edge: The lower frequency of the annotation.
:type freq_lower_edge: float
:param freq_upper_edge: The upper frequency of the annotation.
:type freq_upper_edge: float
:param label: The label that will be displayed with the bounding box in compatible viewers including IQEngine.
Defaults to an emtpy string.
:type label: str, optional
:param comment: A human-readable comment. Defaults to an empty string.
:type comment: str, optional
:param detail: A dictionary of user defined annotation-specific metadata. Defaults to None.
:type detail: dict, optional
"""
def __init__(
self,
sample_start: int,
sample_count: int,
freq_lower_edge: float,
freq_upper_edge: float,
label: Optional[str] = "",
comment: Optional[str] = "",
detail: Optional[dict] = None,
):
"""Initialize a new Annotation instance."""
self.sample_start = int(sample_start)
self.sample_count = int(sample_count)
self.freq_lower_edge = float(freq_lower_edge)
self.freq_upper_edge = float(freq_upper_edge)
self.label = str(label)
self.comment = str(comment)
if detail is None:
self.detail = {}
elif not _is_jsonable(detail):
raise ValueError(f"Detail object is not json serializable: {detail}")
else:
self.detail = detail
def is_valid(self) -> bool:
"""
Check that the annotation sample count is > 0 and the freq_lower_edge<freq_upper_edge.
:returns: True if valid, False if not.
"""
return self.sample_count > 0 and self.freq_lower_edge < self.freq_upper_edge
def overlap(self, other):
"""
Quantify how much the bounding box in this annotation overlaps with another annotation.
:param other: The other annotation.
:type other: Annotation
:returns: The area of the overlap in samples*frequency, or 0 if they do not overlap."""
sample_overlap_start = max(self.sample_start, other.sample_start)
sample_overlap_end = min(self.sample_start + self.sample_count, other.sample_start + other.sample_count)
freq_overlap_start = max(self.freq_lower_edge, other.freq_lower_edge)
freq_overlap_end = min(self.freq_upper_edge, other.freq_upper_edge)
if freq_overlap_start >= freq_overlap_end or sample_overlap_start >= sample_overlap_end:
return 0
else:
return (sample_overlap_end - sample_overlap_start) * (freq_overlap_end - freq_overlap_start)
def area(self):
"""
The 'area' of the bounding box, samples*frequency.
Useful to quantify annotation size.
:returns: sample length multiplied by bandwidth."""
return self.sample_count * (self.freq_upper_edge - self.freq_lower_edge)
def __eq__(self, other: Annotation) -> bool:
return self.__dict__ == other.__dict__
def to_sigmf_format(self):
"""
Returns a JSON dictionary representing this annotation formatted to be saved in a .sigmf-meta file.
"""
annotation_dict = {SigMFFile.START_INDEX_KEY: self.sample_start, SigMFFile.LENGTH_INDEX_KEY: self.sample_count}
annotation_dict["metadata"] = {
SigMFFile.LABEL_KEY: self.label,
SigMFFile.COMMENT_KEY: self.comment,
SigMFFile.FHI_KEY: self.freq_upper_edge,
SigMFFile.FLO_KEY: self.freq_lower_edge,
"ria:detail": self.detail,
}
if _is_jsonable(annotation_dict):
return annotation_dict
else:
raise ValueError("Annotation dictionary was not json serializable.")
def _is_jsonable(x: Any) -> bool:
"""
:return: True if x is JSON serializable, False otherwise.
"""
try:
json.dumps(x)
return True
except (TypeError, OverflowError):
return False

View File

@ -0,0 +1,12 @@
"""
The Radio Dataset Subpackage defines the abstract interfaces and framework components for the management of machine
learning datasets tailored for radio signal processing.
"""
__all__ = ["RadioDataset", "IQDataset", "SpectDataset", "DatasetBuilder", "split", "random_split"]
from .dataset_builder import DatasetBuilder
from .iq_dataset import IQDataset
from .radio_dataset import RadioDataset
from .spect_dataset import SpectDataset
from .split import random_split, split

View File

@ -0,0 +1,137 @@
"""
A `DatasetBuilder` is a creator class that manages the download, preparation, and creation of radio datasets.
"""
from abc import ABC, abstractmethod
from typing import Any, Optional
from packaging.version import Version
from utils._utils.abstract_attribute import abstract_attribute
from utils.data.datasets.license.dataset_license import DatasetLicense
from utils.data.datasets.radio_dataset import RadioDataset
class DatasetBuilder(ABC):
"""Abstract interface for radio dataset builders. These builder produce radio datasets for common and project
datasets related to radio science.
This class should not be instantiated directly. Instead, subclass it to define specific builders for different
datasets.
"""
_url: str = abstract_attribute()
_SHA256: str # SHA256 checksum.
_name: str = abstract_attribute()
_author: str = abstract_attribute()
_license: DatasetLicense = abstract_attribute()
_version: Version = abstract_attribute()
_latest_version: Version = None
def __init__(self):
super().__init__()
@property
def name(self) -> str:
"""
:return: The name of the dataset.
:type: str
"""
return self._name
@property
def author(self) -> str:
"""
:return: The author of the dataset.
:type: str
"""
return self._author
@property
def url(self) -> str:
"""
:return: The URL where the dataset was accessed.
:type: str
"""
return self._url
@property
def sha256(self) -> Optional[str]:
"""
:return: The SHA256 checksum, or None if not set.
:type: str
"""
return self._SHA256
@property
def md5(self) -> Optional[str]:
"""
:return: The MD5 checksum, or None if not set.
:type: str
"""
return self._MD5
@property
def version(self) -> Version:
"""
:return: The version identifier of the dataset.
:type: Version Identifier
"""
return self._version
@property
def latest_version(self) -> Optional[Version]:
"""
:return: The version identifier of the latest available version of the dataset, or None if not set.
:type: Version Identifier or None
"""
return self._latest_version
@property
def license(self) -> DatasetLicense:
"""
:return: The dataset license information.
:type: DatasetLicense
"""
return self._license
@property
def info(self) -> dict[str, Any]:
"""
:return: Information about the dataset including the name, author, and version of the dataset.
:rtype: dict
"""
# TODO: We should increase the amount of information that's included here. See the information included in
# tdfs.core.DatasetInfo for more: https://www.tensorflow.org/datasets/api_docs/python/tfds/core/DatasetInfo.
return {
"name": self.name,
"author": self.author,
"url": self.url,
"sha256": self.sha256,
"md5": self.md5,
"version": self.version,
"license": self.license,
"latest_version": self.latest_version,
}
@abstractmethod
def download_and_prepare(self) -> None:
"""Download and prepare the dataset for use as an HDF5 source file.
Once an HDF5 source file has been prepared, the downloaded files are deleted.
"""
pass
@abstractmethod
def as_dataset(self, backend: str) -> RadioDataset:
"""A factory method to manage the creation of radio datasets.
:param backend: Backend framework to use ("pytorch" or "tensorflow").
:type backend: str
Note: Depending on your installation, not all backends may be available.
:return: A new RadioDataset based on the signal representation and specified backend.
:type: RadioDataset
"""
pass

View File

@ -0,0 +1,221 @@
import os
import h5py
import numpy as np
def copy_dataset_entry_by_index(
source: str | os.PathLike, destination: str | os.PathLike, dataset_path: str, idx: int
) -> None:
"""
Copies an entry from a dataset based on an index from the source HDF5 file to the destination HDF5 file.
:param source: The name of the original HDF5 file.
:type source: str
:param destination: The name of the new HDF5 file.
:type destination: str
:param dataset_path: The path of the dataset from the root of the file.
:type dataset_path: str
:param idx: The index of the specified example.
:type idx: int
:return: None
"""
# TODO: Generalize so that source and destination can be file objects or strings
with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
original_ds = original_file[dataset_path]
entry = original_ds[idx]
new_ds = new_file[dataset_path]
new_ds.resize(new_ds.shape[0] + 1, axis=0)
new_ds[-1] = entry
def copy_over_example(source: str | os.PathLike, destination: str | os.PathLike, idx: int) -> None:
"""
Copies over an example and it's corresponding metadata located at the given index to a new file.
It appends the new example to the end of the new file.
:param source: The name of the original HDF5 file.
:type source: str or os.PathLike
:param destination: The name of the new HDF5 file.
:type destination: str or os.PathLike
:param idx: The index of the example within the dataset.
:type idx: int
:return: None
"""
with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
ds, md = original_file["data"], original_file["metadata/metadata"]
new_ds, new_md = new_file["data"], new_file["metadata/metadata"]
new_ds.resize(new_ds.shape[0] + 1, axis=0)
new_md.resize(new_md.shape[0] + 1, axis=0)
new_ds[-1], new_md[-1] = ds[idx], md[idx]
def append_entry_inplace(source: str | os.PathLike, dataset_path: str, entry: np.ndarray) -> None:
"""
Appends an entry to the specified dataset of the source HDF5 file. This operation is done inplace.
:param source: The name of the source HDF5 file.
:type source: str or os.PathLike
:param dataset_path: The path of the dataset from the root of the file.
:type dataset_path: str
:param entry: The entry that is being copied.
:type entry: np.ndarray
:return: None
"""
# TODO: Generalize so that source can be file object or string
with h5py.File(source, "a") as new_file:
new_ds = new_file[dataset_path]
new_ds.resize(new_ds.shape[0] + 1, axis=0)
new_ds[-1] = entry
def duplicate_entry_inplace(source: str | os.PathLike, dataset_path: str, idx: int) -> None:
"""
Appends the entry at index to the end of the dataset. This operation is done inplace.
:param source: The name of the source HDF5 file.
:type source: str or os.PathLike
:param dataset_path: The path of the dataset from the root of the file. This dataset is usually
'data' or 'metadata/metadata'.
:type dataset_path: str
:param idx: The index of the example within the dataset.
:type idx: int
:return: None
"""
# This function appends to dataset, so upon dataset creation, chunks has to = True and max_size has to = None
with h5py.File(source, "a") as f:
ds = f[dataset_path]
entry = ds[idx]
ds.resize(ds.shape[0] + 1, axis=0)
ds[-1] = entry
def copy_file(original_source: str | os.PathLike, new_source: str | os.PathLike) -> None:
"""Copies contents of source HDF5 file to a new HDF5 file.
:param original_source: The name of the original HDF5 source file.
:type original_source: str or os.PathLike
:param new_source: The copy of the HDF5 source file.
:type new_source: str or os.PathLike
:return: None
"""
original_file = h5py.File(original_source, "r")
with h5py.File(new_source, "w") as new_file:
for key in original_file.keys():
original_file.copy(key, new_file)
original_file.close()
def make_empty_clone(original_source: str | os.PathLike, new_source: str | os.PathLike, example_length: int) -> None:
"""Creates a new HDF5 file with the same structure but will leave metadata and dataset empty for operations.
:param original_source: The name of the original HDF5 source file.
:type original_source: str or os.PathLike
:param new_source: The name of the new HDF5 source file.
:type new_source: str or os.PathLike
:param example_length: The desired length of an example in the new file.
:type example_length: int
:return: None
"""
with h5py.File(new_source, "w") as new_file, h5py.File(original_source, "r") as original_file:
for key in original_file.keys():
if key == "data":
ds = original_file["data"]
channels = ds.shape[1]
new_file.create_dataset(
"data",
shape=(0, channels, example_length),
chunks=True,
maxshape=(None, None, None),
dtype=original_file["data"].dtype,
)
elif key == "metadata":
new_metadata_group = new_file.create_group("metadata")
new_metadata_group.create_dataset(
"metadata",
shape=(0,),
chunks=True,
maxshape=(None,),
dtype=original_file["metadata/metadata"].dtype,
)
else:
original_file.copy(key, new_file)
def delete_example_inplace(source: str | os.PathLike, idx: int) -> None:
"""Deletes an example and it's corresponding metadata located at the given index.
This deletion is done by creating a temporary dataset and copying all contents
to the temporary dataset except for the example at idx. This operation is inplace.
:param source: The name of the source HDF5 file.
:type source: str or os.PathLike
:param idx: The index of the example and metadata to be deleted.
:type idx: int
:return: None
"""
with h5py.File(source, "a") as f:
ds, md = f["data"], f["metadata/metadata"]
m, c, n = ds.shape
assert 0 <= idx <= m - 1
assert len(ds) == len(md)
new_ds = f.create_dataset(
"data.temp",
shape=(m - 1, c, n),
chunks=True,
dtype=ds.dtype,
maxshape=(None, None, None), # Required to allow future mutations which expand the shape
)
new_md = f.create_dataset(
"metadata/metadata.temp", shape=len(md) - 1, chunks=True, dtype=md.dtype, maxshape=(None,)
)
for row in range(idx):
new_ds[row], new_md[row] = ds[row], md[row]
for row in range(idx + 1, len(md)):
new_ds[row - 1], new_md[row - 1] = ds[row], md[row]
del f["data"]
del f["metadata/metadata"]
f.move("data.temp", "data")
f.move("metadata/metadata.temp", "metadata/metadata")
def overwrite_file(source: str | os.PathLike, new_data: np.ndarray) -> None:
"""
Overwrites data in an HDF5 file with new data.
:param source: The copy of the HDF5 source file.
:type source: str or os.PathLike
:param new_data: The updated copy of the data that should be stored.
:type new_data: np.ndarray
:return: None
"""
# TODO: Might need to pass in dataset_path instead of datastet_name depending on file structure
# Update copy to include augmented data
with h5py.File(source, "r+") as f:
ds_name = tuple(f.keys())[0]
del f[ds_name]
f.create_dataset(ds_name, data=new_data)
f.close()

View File

@ -0,0 +1,210 @@
from __future__ import annotations
import os
from abc import ABC
from typing import Optional
import h5py
import numpy as np
from utils.data.datasets.h5helpers import (
append_entry_inplace,
copy_dataset_entry_by_index,
)
from utils.data.datasets.radio_dataset import RadioDataset
class IQDataset(RadioDataset, ABC):
"""An ``IQDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
radiofrequency (RF) signals represented as In-phase (I) and Quadrature (Q) samples.
For machine learning tasks that involve processing spectrograms, please use
utils.data.datasets.SpectDataset instead.
This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
learning backends.
:param source: Path to the dataset source file. For more information on dataset source files
and their format, see :doc:`radio_datasets`.
:type source: str or os.PathLike
"""
def __init__(self, source: str | os.PathLike):
"""Create a new IQDataset."""
super().__init__(source=source)
@property
def shape(self) -> tuple[int]:
"""IQ datasets are M x C x N, where M is the number of examples, C is the number of channels, N is the length
of the signals.
:return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
dataset dimensions.
:type: tuple of ints
"""
return super().shape
def trim_examples(
self, trim_length: int, keep: Optional[str] = "start", inplace: Optional[bool] = False
) -> IQDataset | None:
"""Trims all examples in a dataset to a desired length.
:param trim_length: The desired length of the trimmed examples.
:type trim_length: int
:param keep: Specifies the part of the example to keep. Defaults to "start".
The options are:
- "start"
- "end"
- "middle"
- "random"
:type keep: str, optional
:param inplace: If True, the operation modifies the existing source file directly and returns None.
If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
dataset unchanged. Default is False.
:type inplace: bool
:raises ValueError: If trim_length is greater than or equal to the length of the examples.
:raises ValueError: If value of keep is not recognized.
:raises ValueError: If specified trim length is invalid for middle index.
:return: The dataset that is composed of shorter examples.
:rtype: IQDataset
**Examples:**
>>> from ria.dataset_manager.builders import AWGN_Builder()
>>> builder = AWGN_Builder()
>>> builder.download_and_prepare()
>>> ds = builder.as_dataset()
>>> ds.shape
(5, 1, 3)
>>> new_ds = ds.trim_examples(2)
>>> new_ds.shape
(5, 1, 2)
"""
keep = keep.lower()
channels, example_length = np.shape(self[0])
if trim_length >= example_length:
raise ValueError(f"Trim length must be less than {example_length}")
if keep not in {"start", "end", "middle", "random"}:
raise ValueError('keep must be "start", "end", "middle", or "random"')
start = None
if keep == "middle":
start = int(example_length / 2)
if start + trim_length > example_length:
raise ValueError(f"Trim length of {trim_length} is invalid for middle index of: {start} ")
elif keep == "random":
start = np.random.randint(0, example_length - trim_length + 1)
if not inplace:
ds = self._create_next_dataset(example_length=trim_length)
with h5py.File(self.source, "a") as f:
data = f["data"]
for idx in range(len(self)):
trimmed_example = generate_trimmed_example(
example=data[idx],
keep=keep,
trim_length=trim_length,
start=start,
)
if not inplace:
append_entry_inplace(source=ds.source, dataset_path="data", entry=trimmed_example)
copy_dataset_entry_by_index(
source=self.source, destination=ds.source, dataset_path="metadata/metadata", idx=idx
)
else:
trimmed_example = np.pad(
trimmed_example, ((0, 0), (0, example_length - trim_length)), "constant", constant_values=0
)
data[idx] = trimmed_example
if not inplace:
return ds
else:
data.resize(trim_length, axis=2)
def split_examples(
self, split_factor: Optional[int] = None, example_length: Optional[int] = None, inplace: Optional[bool] = False
) -> IQDataset | None:
"""If the current example length is not evenly divisible by the provided example_length, excess samples are
discarded. Excess examples are always at the end of the slice. If the split factor results in non-integer
example lengths for the new example chunks, it rounds down.
For example:
Requires either split_factor or example_length to be specified but not both. If both are provided,
split factor will be used by default, and a warning will be raised.
:param split_factor: the number of new example chunks produced from each original example, defaults to None.
:type split_factor: int, optional
:param example_length: the example length of the new example chunks, defaults to None.
:type example_length: int, optional
:param inplace: If True, the operation modifies the existing source file directly and returns None.
If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
dataset unchanged. Default is False.
:type inplace: bool, optional
:return: A dataset with more examples that are shorter.
:rtype: IQDataset
**Examples:**
If the dataset has 100 examples of length 1024 and the split factor is 2, the resulting dataset
will have 200 examples of 512. No samples have been discarded.
If the example dataset has 100 examples of length 1024 and the example length is 100, the resulting dataset
will have 1000 examples of length 100. The remaining 24 samples from each example have been discarded.
"""
if split_factor is not None and example_length is not None:
# Raise warning and use split factor
raise Warning("split_factor and example_length should not both be specified.")
if not inplace:
# ds = self.create_new_dataset(example_length=example_length)
pass
raise NotImplementedError
def generate_trimmed_example(
example: np.ndarray, keep: str, trim_length: int, start: Optional[int] = None
) -> np.ndarray:
"""Takes in an IQ example as input and returns a trimmed example.
:param example: The example to be trimmed.
:type example: np.ndarray
:param keep: The position the trimming occurs from.
:type keep: str
:param trim_length: The desired length of the trimmed example:
:type trim_length: int
:param start: The starting index if keep = "middle" or "random"
:type start: int, optional
:return: The trimmed example
:rtype: np.ndarray
"""
if keep == "start":
return example[:, :trim_length]
elif keep == "end":
return example[:, -trim_length:]
elif keep == "middle":
return example[:, start : start + trim_length]
else:
return example[:, start : start + trim_length]

View File

@ -0,0 +1,211 @@
"""
This package contains the ``DatasetLicense`` class and a bunch of off-the-shelf implementations for several common
license types.
Common license types for datasets courtesy of the University of Calgary:
`Common license types for datasets and what they mean <https://libanswers.ucalgary.ca/faq/200582>`_
.. note::
License descriptions are provided for informational purposes only and should not be construed as legal advice.
For legal guidance, please refer to official licence documentation and consult with legal professionals specializing
in software and dataset licensing.
.. note::
When licensing datasets, it's recommended to use licenses specifically designed for data, rather than using
software licenses such as MIT, Apache, or GPL.
"""
__all__ = [
"DatasetLicense",
"PUBLIC_DOMAIN",
"CC_0",
"CC_BY",
"CC_BY_NC",
"CC_BY_NC_ND",
"CC_BY_NC_SA",
"CC_BY_ND",
"CC_BY_SA",
"ODC_BY",
"ODC_PDDL",
"ODC_ODbL",
"RESTRICTED",
]
from .dataset_license import DatasetLicense
PUBLIC_DOMAIN = DatasetLicense(
name="Public Domain (No License)",
identifier=None,
description="Technically not a license, the public domain mark relinquishes all rights to a dataset and "
"dedicates the dataset to the public domain.",
licence="https://creativecommons.org/public-domain/pdm/",
)
"""
`Public Domain <https://creativecommons.org/public-domain/pdm/>`_: Technically not a license, the public domain mark
relinquishes all rights to a dataset and dedicates the dataset to the public domain.
"""
CC_0 = DatasetLicense(
name="Creative Commons Public Domain Dedication",
identifier="CC0-1.0",
description="A Creative Commons license and is like a public domain dedication. The copyright holder "
"surrenders rights in a dataset using this license.",
licence="https://creativecommons.org/publicdomain/zero/1.0/",
)
"""
`Creative Commons Public Domain Dedication <https://creativecommons.org/public-domain/pdm/>`_: A Creative Commons
license and is like a public domain dedication. The copyright holder surrenders rights in a dataset using this license.
"""
ODC_PDDL = DatasetLicense(
name="Open Data Commons Public Domain Dedication and License",
identifier="PDDL-1.0",
description="This license is one of the Open Data Commons licenses and is like a public domain dedication. "
"The copyright holder surrenders rights in a dataset using this license.",
licence="https://opendatacommons.org/licenses/pddl/",
)
"""
`Open Data Commons Public Domain Dedication and License <https://opendatacommons.org/licenses/pddl/>`_: This license
is one of the Open Data Commons licenses and is like a public domain dedication. The copyright holder surrenders rights
in a dataset using this license.
"""
CC_BY = DatasetLicense(
name="Creative Commons Attribution 4.0 International",
identifier="CC-BY-4.0",
description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
"the dataset so long as they give credit to the copyright holder.",
licence="https://creativecommons.org/licenses/by/4.0/",
)
"""
`Creative Commons Attribution 4.0 International <https://creativecommons.org/licenses/by/4.0/>`_: This license is one
of the open Creative Commons licenses and allows users to share and adapt the dataset so long as they give credit to
the copyright holder.
"""
ODC_BY = DatasetLicense(
name="Open Data Commons Attribution License",
identifier="ODC-By-1.0",
description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
"dataset as long as they give credit to the copyright holder.",
licence="https://opendatacommons.org/licenses/by/",
)
"""
`Open Data Commons Attribution License <https://opendatacommons.org/licenses/by/>`_: This license is one of the Open
Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
holder.
"""
CC_BY_SA = DatasetLicense(
name="Creative Commons Attribution-ShareAlike 4.0 International",
identifier="CC-BY-SA-4.0",
description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
"the dataset as long as they give credit to the copyright holder and distribute any additions, "
"transformations or changes to the dataset under this same license.",
licence="https://creativecommons.org/licenses/by-sa/4.0/",
)
"""
`Creative Commons Attribution-ShareAlike 4.0 International <https://creativecommons.org/licenses/by-sa/4.0/>`_: This
license is one of the open Creative Commons licenses and allows users to share and adapt the dataset as long as they
give credit to the copyright holder and distribute any additions, transformations or changes to the dataset under
this same license.
"""
ODC_ODbL = DatasetLicense(
name="Open Data Commons Open Database License",
identifier="ODbL-1.0",
description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
"dataset as long as they give credit to the copyright holder and distribute any additions, "
"transformation or changes to the dataset.",
licence="https://opendatacommons.org/licenses/odbl/",
)
"""
`Open Data Commons Open Database License <https://opendatacommons.org/licenses/odbl/>`_: This license is one of the
Open Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
holder and distribute any additions, transformation or changes to the dataset.
"""
CC_BY_NC = DatasetLicense(
name="Creative Commons Attribution-NonCommercial 4.0 International",
identifier="CC-BY-NC-4.0",
description="This license is one of the Creative Commons licenses and allows users to share and adapt the "
"dataset if they give credit to the copyright holder and do not use the dataset for any "
"commercial purposes.",
licence="https://creativecommons.org/licenses/by-nc/4.0/",
)
"""
`Creative Commons Attribution-NonCommercial 4.0 International <https://creativecommons.org/licenses/by-nc/4.0/>`_: This
license is one of the Creative Commons licenses and allows users to share and adapt the dataset if they give credit to
the copyright holder and do not use the dataset for any commercial purposes.
"""
CC_BY_ND = DatasetLicense(
name="Creative Commons Attribution-NoDerivatives 4.0 International",
identifier="CC-BY-ND-4.0",
description="This license is one of the Creative Commons licenses and allows users to share the dataset if "
"they give credit to copyright holder, but they cannot make any additions, transformations or "
"changes to the dataset under this license.",
licence="https://creativecommons.org/licenses/by-nd/4.0/",
)
"""
`Creative Commons Attribution-NoDerivatives 4.0 International <https://creativecommons.org/licenses/by-nd/4.0/>`_: This
license is one of the Creative Commons licenses and allows users to share the dataset if they give credit to copyright
holder, but they cannot make any additions, transformations or changes to the dataset under this license.
"""
CC_BY_NC_SA = DatasetLicense(
name="Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International",
identifier="CC-BY-NC-SA-4.0",
description="This license is one of the Creative Commons licenses and allows users to share the dataset only "
"if they (1) give credit to the copyright holder, (2) do not use the dataset for any commercial "
"purposes, and (3) distribute any additions, transformations or changes to the dataset under this "
"same license.",
licence="https://creativecommons.org/licenses/by-nc-sa/4.0/",
)
"""
`Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
<https://creativecommons.org/licenses/by-nc-sa/4.0/>`_: This license is one of the Creative Commons licenses and allows
users to share the dataset only if they (1) give credit to the copyright holder, (2) do not use the dataset for any
commercial purposes, and (3) distribute any additions, transformations or changes to the dataset under this same
license.
"""
CC_BY_NC_ND = DatasetLicense(
name="Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International",
identifier="CC-BY-NC-ND-4.0",
description="This license is one of the Creative Commons licenses and allows users to use only your "
"unmodified dataset if they give credit to the copyright holder and do not share it for "
"commercial purposes. Users cannot make any additions, transformations or changes to the dataset"
"under this license.",
licence="https://creativecommons.org/licenses/by-nc-nd/4.0/",
)
"""
`Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International
<https://creativecommons.org/licenses/by-nc-nd/4.0/>`_: This license is one of the Creative Commons licenses and allows
users to use only your unmodified dataset if they give credit to the copyright holder and do not share it for
commercial purposes. Users cannot make any additions, transformations or changes to the dataset under this license.
"""
RESTRICTED = DatasetLicense(
name="Restricted (All Rights Reserved)",
identifier="Restricted",
description="All rights reserved. No permissions granted for use, modification, or distribution of the dataset.",
licence="Restricted (All Rights Reserved)",
)
"""
Restricted (All Rights Reserved): No permissions granted for use, modification, or distribution of the dataset.
"""

View File

@ -0,0 +1,13 @@
from dataclasses import dataclass
@dataclass
class DatasetLicense:
"""
Represents a dataset license.
"""
name: str #: The name or title of the license.
identifier: str | None #: SPDX short identifier, or None if one does not exist.
description: str #: A description of the license.
licence: str #: Full license text or URL if the license is available online.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
from __future__ import annotations
import os
from abc import ABC
from utils.data.datasets.radio_dataset import RadioDataset
class SpectDataset(RadioDataset, ABC):
"""A ``SpectDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
radiofrequency (RF) signals represented as spectrograms. This class is integrated with vision frameworks,
allowing you to leverage models and techniques from the field of computer vision for analyzing and processing
radio signal spectrograms.
For machine learning tasks that involve processing on IQ samples, please use
utils.data.datasets.IQDataset instead.
This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
learning backends.
:param source: Path to the dataset source file. For more information on dataset source files
and their format, see :doc:`radio_datasets`.
:type source: str or os.PathLike
"""
def __init__(self, source: str | os.PathLike):
"""Create a new SpectDataset."""
super().__init__(source=source)
@property
def shape(self) -> tuple[int]:
"""Spectrogram datasets are M x C x H x W, where M is the number of examples, C is the number of image
channels, H is the height of the spectrogram, and W is the width of the spectrogram.
:return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
dataset dimensions.
:type: tuple of ints
"""
return super().shape
def default_augmentations(self) -> list[callable]:
"""Returns the list of default augmentations for spectrogram datasets.
.. todo:: This method is not yet implemented.
:return: A list of default augmentations.
:rtype: list[callable]
"""
# Consider the following list of default augmentations:
# #. horizontal_flip
# #. vertical_flip
# #. sharpen
# #. darken
# #. lighten
# #. linear_rotate
raise NotImplementedError

View File

@ -0,0 +1,317 @@
import math
import os
from collections import Counter
from typing import Optional
import numpy as np
from numpy.random import Generator
from utils.data.datasets import RadioDataset
from utils.data.datasets.h5helpers import copy_over_example, make_empty_clone
def split(dataset: RadioDataset, lengths: list[int | float]) -> list[RadioDataset]:
"""Split a radio dataset into non-overlapping new datasets of given lengths.
Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
longer-form tapes into shorter units called slices.
For each slice in the dataset, the metadata should include the unique ID of the recording from which the example
was cut ('rec_id'). To avoid leakage, all examples with the same 'rec_id' are assigned only to one of the new
datasets. This ensures, for example, that slices cut from the same recording do not appear in both the training
and test datasets.
This restriction makes it challenging to generate datasets with the exact lengths specified. To get as close as
possible, this method uses a greedy algorithm, which assigns the recordings with the most slices first, working
down to those with the fewest. This may not always provide a perfect split, but it works well in most practical
cases.
This function is deterministic, meaning it will always produce the same split. For a random split, see
utils.data.datasets.random_split.
:param dataset: Dataset to be split.
:type dataset: RadioDataset
:param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
provided, and any remainders will be distributed in round-robin fashion.
:type lengths: list of ints (lengths) or floats (fractions)
:return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
'lengths' list.
:rtype: list of RadioDataset
**Examples:**
>>> import random
>>> import string
>>> import numpy as np
>>> import pandas as pd
>>> from utils.data.datasets import split
First, let's generate some random data:
>>> shape = (24, 1, 1024) # 24 examples, each of length 1024
>>> real_part, imag_part = np.random.randint(0, 12, size=shape), np.random.randint(0, 79, size=shape)
>>> data = real_part + 1j * imag_part
Then, a list of recording IDs. Let's pretend this data was cut from 4 separate recordings:
>>> rec_id_options = [''.join(random.choices(string.ascii_lowercase + string.digits, k=256)) for _ in range(4)]
>>> rec_id = [np.random.choice(rec_id_options) for _ in range(shape[0])]
Using this data and metadata, let's initialize a dataset:
>>> metadata = pd.DataFrame(data={"rec_id": rec_id}).to_records(index=False)
>>> fid = os.path.join(os.getcwd(), "source_file.hdf5")
>>> ds = RadioDataset(source=fid)
Finally, let's do an 80/20 train-test split:
>>> train_ds, test_ds = split(ds, lengths=[0.8, 0.2])
"""
if not isinstance(dataset, RadioDataset):
raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
raise ValueError("Dataset missing string field 'rec_id'.")
rec_ids = dict(Counter(dataset.metadata["rec_id"]))
if len(rec_ids) < len(lengths_):
raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
# Sort the rec_ids in descending order by frequency.
ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
sorted_indices = np.flip(np.argsort(freqs))
sorted_rec_ids = [ids[x] for x in sorted_indices]
sorted_freqs = [freqs[x] for x in sorted_indices]
# Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
split_key_ids = [[] for _ in range(len(lengths_))]
split_key_freqs = [[] for _ in range(len(lengths_))]
for i in range(len(rec_ids)):
# Find the subset whose current length is farthest from its target length.
current_lengths = [sum(subkey) for subkey in split_key_freqs]
diffs = [lengths_[j] - current_lengths[j] for j in range(len(lengths_))]
index = np.argmax(diffs)
# Add the 'rec_id' with the highest frequency to the subset farthest from its target.
split_key_freqs[index].append(sorted_freqs[i])
split_key_ids[index].append(sorted_rec_ids[i])
_validate_sublists(list_of_lists=split_key_ids, ids=ids)
return _split_datasets(dataset=dataset, key=split_key_ids)
def random_split(
dataset: RadioDataset, lengths: list[int | float], generator: Optional[Generator] = None
) -> list[RadioDataset]:
"""Randomly split a radio dataset into non-overlapping new datasets of given lengths.
Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
longer-form tapes into shorter units called slices.
For each slice in the dataset, the metadata should include the unique recording ID ('rec_id') of the recording
from which the example was cut. To avoid leakage, all examples with the same 'rec_id' are assigned only to one of
the new datasets. This ensures, for example, that slices cut from the same recording do not appear in both the
training and test datasets.
This restriction makes it unlikely that a random split will produce datasets with the exact lengths specified.
If it is important to ensure the closest possible split, consider using utils.data.datasets.split instead.
:param dataset: Dataset to be split.
:type dataset: RadioDataset
:param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
provided, and any remainders will be distributed in round-robin fashion.
:type lengths: list of ints (lengths) or floats (fractions)
:param generator: Random generator. Defaults to None.
:type generator: NumPy Generator Object, optional.
:return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
'lengths' list.
:rtype: list of RadioDataset
See Also:
utils.data.datasets.split: Usage is the same as for ``random_split()``.
"""
if not isinstance(dataset, RadioDataset):
raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
if generator is None:
rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
else:
rng = generator
if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
raise ValueError("Dataset missing string field 'rec_id'.")
rec_ids = dict(Counter(dataset.metadata["rec_id"]))
if len(rec_ids) < len(lengths_):
raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
sorted_indices = np.flip(np.argsort(freqs))
sorted_rec_ids = [ids[x] for x in sorted_indices]
sorted_freqs = [freqs[x] for x in sorted_indices]
# Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
n = len(lengths_)
split_key_ids = [[] for _ in range(n)]
split_key_freqs = [[] for _ in range(n)]
# Taking from the bottom (least frequent), assign one recording to each subset. This is important to ensure we
# don't end up with any empty subsets, and serves to help randomize the results.
top_rec_ids, bottom_rec_ids = sorted_rec_ids[:-n], sorted_rec_ids[-n:]
top_freqs, bottom_freqs = sorted_freqs[:-n], sorted_freqs[-n:]
bottom_indices = rng.permutation(x=np.asarray(range(n)))
for i in range(n):
split_key_freqs[i].append(bottom_freqs[bottom_indices[i]])
split_key_ids[i].append(bottom_rec_ids[bottom_indices[i]])
for i in range(len(top_rec_ids)):
# Find the subset whose current length is farthest from its target length.
current_lengths = np.array([sum(subkey) for subkey in split_key_freqs])
diffs = np.array([lengths_[j] - current_lengths[j] for j in range(n)])
# Use the normalized diffs as probabilities. This results in a higher probability for larger diffs.
diffs = np.asarray([0 if d < 0 else d for d in diffs]) # Don't add to full or overfull subsets.
probabilities = diffs / sum(diffs)
index = rng.choice(range(n), p=probabilities)
# Add the 'rec_id' with the highest frequency to the chosen subset.
split_key_freqs[index].append(top_freqs[i])
split_key_ids[index].append(top_rec_ids[i])
_validate_sublists(list_of_lists=split_key_ids, ids=ids)
return _split_datasets(dataset=dataset, key=split_key_ids, generator=rng)
def _validate_lengths(dataset: RadioDataset, lengths: list[int | float]) -> list[int]:
"""Validate lengths. If lengths are fractions of splits, lengths will be computed automatically.
:param dataset: Dataset to be split.
:type dataset: RadioDataset
:param: lengths: Lengths or fractions of splits to be produced.
:type lengths: list of ints (lengths) or floats (fractions)
:return: List of lengths to be produced.
:rtype: list of ints
"""
if not isinstance(lengths, list):
raise ValueError(f"'lengths' must be a list of ints or a list of floats, got {type(lengths)}.")
if len(lengths) < 2:
raise ValueError("'lengths' list must contain at least 2 elements.")
if not all(isinstance(sub, type(lengths[0])) for sub in lengths[1:]):
raise ValueError("All elements of 'lengths' must be of the same type.")
if sum(lengths) == len(dataset):
return [int(i) for i in lengths]
elif math.isclose(sum(lengths), 1, abs_tol=1e-9):
# Fractions of splits, which add to 1.
lengths_ = [math.floor(f * len(dataset)) for f in lengths]
# Distribute remainders in round-robin fashion to the lengths until there are no remainders left.
i = 0
while len(dataset) > sum(lengths_):
lengths_[i] = lengths_[i] + 1
i = i + 1
return lengths_
else:
raise ValueError("'lengths' must sum to either the length of 'dataset' or 1.")
def _validate_sublists(list_of_lists: list[list[str]], ids: list[str]) -> None:
"""Ensure that each ID is present in one and only one sublist."""
all_elements = [item for sublist in list_of_lists for item in sublist]
assert len(all_elements) == len(set(all_elements)) and list(set(ids)).sort() == list(set(all_elements)).sort()
def _generate_split_source_filenames(
parent_dataset: RadioDataset, n_new_datasets: int, generator: Generator
) -> list[str]:
"""Generate source filenames for each new dataset.
Examples:
.../file_name.hdf5 -> [
.../file_name.split66ce07f-0.hdf5,
.../file_name.split66ce07f-1.hdf5,
.../file_name.split66ce07f-2.hdf5
]
.../file_name.002.hdf5 -> [
.../file_name.002.split156afd7-0.hdf5,
.../file_name.002.split156afd7-1.hdf5,
.../file_name.002.split156afd7-2.hdf5
]
"""
parent_file_name = str(parent_dataset.source)
parent_base_name = os.path.splitext(parent_file_name)[0]
random_tag = generator.bytes(length=4).hex()[:7]
return [f"{parent_base_name}.split{random_tag}-{i}.hdf5" for i in range(n_new_datasets)]
def _split_datasets(
dataset: RadioDataset, key: list[list[str]], generator: Optional[Generator] = None
) -> list[RadioDataset]:
"""Once we know how we'd like to split up the dataset (i.e., which slices are to be included in which new
dataset), this helper function does the actual split.
:param dataset: Dataset to be split.
:type dataset: RadioDataset
:param key: A key indicating which slices are to be included in which dataset. This is a list of lists, where
each sublist contains the recordings IDs of the slices to be included in the corresponding subset.
:type key: A list of lists
:param generator: Random generator. Defaults to None.
:type generator: NumPy Generator Object, optional.
:return: Non-overlapping datasets
:rtype: list of RadioDataset
"""
if generator is None:
rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
else:
rng = generator
new_source_filenames = _generate_split_source_filenames(
parent_dataset=dataset, n_new_datasets=len(key), generator=rng
)
for new_source in new_source_filenames:
make_empty_clone(original_source=dataset.source, new_source=new_source, example_length=len(dataset.data[0, 0]))
new_datasets = [dataset.__class__(source=new_source) for new_source in new_source_filenames]
rec_ids = list(dataset.metadata["rec_id"])
for i, sublist in enumerate(key):
for rec_id in sublist:
# The examples at these indices are part of the corresponding new dataset.
indices = [index for index, value in enumerate(rec_ids) if value == rec_id]
for idx in indices:
copy_over_example(source=dataset.source, destination=new_datasets[i].source, idx=idx)
return new_datasets

View File

@ -0,0 +1,763 @@
from __future__ import annotations
import copy
import datetime
import hashlib
import json
import os
import re
import time
import warnings
from typing import Any, Iterator, Optional
import numpy as np
from numpy.typing import ArrayLike
from quantiphy import Quantity
from utils.data.annotation import Annotation
PROTECTED_KEYS = ["rec_id", "timestamp"]
class Recording:
"""Tape of complex IQ (in-phase and quadrature) samples with associated metadata and annotations.
Recording data is a complex array of shape C x N, where C is the number of channels
and N is the number of samples in each channel.
Metadata is stored in a dictionary of key value pairs,
to include information such as sample_rate and center_frequency.
Annotations are a list of :ref:`Annotation <utils.data.Annotation>`,
defining bounding boxes in time and frequency with labels and metadata.
Here, signal data is represented as a NumPy array. This class is then extended in the RIA Backends to provide
support for different data structures, such as Tensors.
Recordings are long-form tapes can be obtained either from a software-defined radio (SDR) or generated
synthetically. Then, machine learning datasets are curated from collection of recordings by segmenting these
longer-form tapes into shorter units called slices.
All recordings are assigned a unique 64-character recording ID, ``rec_id``. If this field is missing from the
provided metadata, a new ID will be generated upon object instantiation.
:param data: Signal data as a tape IQ samples, either C x N complex, where C is the number of
channels and N is number of samples in the signal. If data is a one-dimensional array of complex samples with
length N, it will be reshaped to a two-dimensional array with dimensions 1 x N.
:type data: array_like
:param metadata: Additional information associated with the recording.
:type metadata: dict, optional
:param annotations: A collection of ``Annotation`` objects defining bounding boxes.
:type annotations: list of Annotations, optional
:param dtype: Explicitly specify the data-type of the complex samples. Must be a complex NumPy type, such as
``np.complex64`` or ``np.complex128``. Default is None, in which case the type is determined implicitly. If
``data`` is a NumPy array, the Recording will use the dtype of ``data`` directly without any conversion.
:type dtype: numpy dtype object, optional
:param timestamp: The timestamp when the recording data was generated. If provided, it should be a float or integer
representing the time in seconds since epoch (e.g., ``time.time()``). Only used if the `timestamp` field is not
present in the provided metadata.
:type dtype: float or int, optional
:raises ValueError: If data is not complex 1xN or CxN.
:raises ValueError: If metadata is not a python dict.
:raises ValueError: If metadata is not json serializable.
:raises ValueError: If annotations is not a list of valid annotation objects.
**Examples:**
>>> import numpy
>>> from utils.data import Recording, Annotation
>>> # Create an array of complex samples, just 1s in this case.
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> # Create a dictionary of relevant metadata.
>>> sample_rate = 1e6
>>> center_frequency = 2.44e9
>>> metadata = {
... "sample_rate": sample_rate,
... "center_frequency": center_frequency,
... "author": "me",
... }
>>> # Create an annotation for the annotations list.
>>> annotations = [
... Annotation(
... sample_start=0,
... sample_count=1000,
... freq_lower_edge=center_frequency - (sample_rate / 2),
... freq_upper_edge=center_frequency + (sample_rate / 2),
... label="example",
... )
... ]
>>> # Store samples, metadata, and annotations together in a convenient object.
>>> recording = Recording(data=samples, metadata=metadata, annotations=annotations)
>>> print(recording.metadata)
{'sample_rate': 1000000.0, 'center_frequency': 2440000000.0, 'author': 'me'}
>>> print(recording.annotations[0].label)
'example'
"""
def __init__( # noqa C901
self,
data: ArrayLike | list[list],
metadata: Optional[dict[str, any]] = None,
dtype: Optional[np.dtype] = None,
timestamp: Optional[float | int] = None,
annotations: Optional[list[Annotation]] = None,
):
data_arr = np.asarray(data)
if np.iscomplexobj(data_arr):
# Expect C x N
if data_arr.ndim == 1:
self._data = np.expand_dims(data_arr, axis=0) # N -> 1 x N
elif data_arr.ndim == 2:
self._data = data_arr
else:
raise ValueError("Complex data must be C x N.")
else:
raise ValueError("Input data must be complex.")
if dtype is not None:
self._data = self._data.astype(dtype)
assert np.iscomplexobj(self._data)
if metadata is None:
self._metadata = {}
elif isinstance(metadata, dict):
self._metadata = metadata
else:
raise ValueError(f"Metadata must be a python dict, but was {type(metadata)}.")
if not _is_jsonable(metadata):
raise ValueError("Value must be JSON serializable.")
if "timestamp" not in self.metadata:
if timestamp is not None:
if not isinstance(timestamp, (int, float)):
raise ValueError(f"timestamp must be int or float, not {type(timestamp)}")
self._metadata["timestamp"] = timestamp
else:
self._metadata["timestamp"] = time.time()
else:
if not isinstance(self._metadata["timestamp"], (int, float)):
raise ValueError("timestamp must be int or float, not ", type(self._metadata["timestamp"]))
if "rec_id" not in self.metadata:
self._metadata["rec_id"] = generate_recording_id(data=self.data, timestamp=self._metadata["timestamp"])
if annotations is None:
self._annotations = []
elif isinstance(annotations, list):
self._annotations = annotations
else:
raise ValueError("Annotations must be a list or None.")
if not all(isinstance(annotation, Annotation) for annotation in self._annotations):
raise ValueError("All elements in self._annotations must be of type Annotation.")
self._index = 0
@property
def data(self) -> np.ndarray:
"""
:return: Recording data, as a complex array.
:type: np.ndarray
.. note::
For recordings with more than 1,024 samples, this property returns a read-only view of the data.
.. note::
To access specific samples, consider indexing the object directly with ``rec[c, n]``.
"""
if self._data.size > 1024:
# Returning a read-only view prevents mutation at a distance while maintaining performance.
v = self._data.view()
v.setflags(write=False)
return v
else:
return self._data.copy()
@property
def metadata(self) -> dict:
"""
:return: Dictionary of recording metadata.
:type: dict
"""
return self._metadata.copy()
@property
def annotations(self) -> list[Annotation]:
"""
:return: List of recording annotations
:type: list of Annotation objects
"""
return self._annotations.copy()
@property
def shape(self) -> tuple[int]:
"""
:return: The shape of the data array.
:type: tuple of ints
"""
return np.shape(self.data)
@property
def n_chan(self) -> int:
"""
:return: The number of channels in the recording.
:type: int
"""
return self.shape[0]
@property
def rec_id(self) -> str:
"""
:return: Recording ID.
:type: str
"""
return self.metadata["rec_id"]
@property
def dtype(self) -> str:
"""
:return: Data-type of the data array's elements.
:type: numpy dtype object
"""
return self.data.dtype
@property
def timestamp(self) -> float | int:
"""
:return: Recording timestamp (time in seconds since epoch).
:type: float or int
"""
return self.metadata["timestamp"]
@property
def sample_rate(self) -> float | None:
"""
:return: Sample rate of the recording, or None is 'sample_rate' is not in metadata.
:type: str
"""
return self.metadata.get("sample_rate")
@sample_rate.setter
def sample_rate(self, sample_rate: float | int) -> None:
"""Set the sample rate of the recording.
:param sample_rate: The sample rate of the recording.
:type sample_rate: float or int
:return: None
"""
self.add_to_metadata(key="sample_rate", value=sample_rate)
def astype(self, dtype: np.dtype) -> Recording:
"""Copy of the recording, data cast to a specified type.
.. todo: This method is not yet implemented.
:param dtype: Data-type to which the array is cast. Must be a complex scalar type, such as ``np.complex64`` or
``np.complex128``.
:type dtype: NumPy data type, optional
.. note: Casting to a data type with less precision can risk losing data by truncating or rounding values,
potentially resulting in a loss of accuracy and significant information.
:return: A new recording with the same metadata and data, with dtype.
TODO: Add example usage.
"""
# Rather than check for a valid datatype, let's cast and check the result. This makes it easier to provide
# cross-platform support where the types are aliased across platforms.
with warnings.catch_warnings():
warnings.simplefilter("ignore") # Casting may generate user warnings. E.g., complex -> real
data = self.data.astype(dtype)
if np.iscomplexobj(data):
return Recording(data=data, metadata=self.metadata, annotations=self.annotations)
else:
raise ValueError("dtype must be a complex number scalar type.")
def add_to_metadata(self, key: str, value: Any) -> None:
"""Add a new key-value pair to the recording metadata.
:param key: New metadata key, must be snake_case.
:type key: str
:param value: Corresponding metadata value.
:type value: any
:raises ValueError: If key is already in metadata or if key is not a valid metadata key.
:raises ValueError: If value is not JSON serializable.
:return: None.
**Examples:**
Create a recording and add metadata:
>>> import numpy
>>> from utils.data import Recording
>>>
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> metadata = {
>>> "sample_rate": 1e6,
>>> "center_frequency": 2.44e9,
>>> }
>>>
>>> recording = Recording(data=samples, metadata=metadata)
>>> print(recording.metadata)
{'sample_rate': 1000000.0,
'center_frequency': 2440000000.0,
'timestamp': 17369...,
'rec_id': 'fda0f41...'}
>>>
>>> recording.add_to_metadata(key="author", value="me")
>>> print(recording.metadata)
{'sample_rate': 1000000.0,
'center_frequency': 2440000000.0,
'author': 'me',
'timestamp': 17369...,
'rec_id': 'fda0f41...'}
"""
if key in self.metadata:
raise ValueError(
f"Key {key} already in metadata. Use Recording.update_metadata() to modify existing fields."
)
if not _is_valid_metadata_key(key):
raise ValueError(f"Invalid metadata key: {key}.")
if not _is_jsonable(value):
raise ValueError("Value must be JSON serializable.")
self._metadata[key] = value
def update_metadata(self, key: str, value: Any) -> None:
"""Update the value of an existing metadata key,
or add the key value pair if it does not already exist.
:param key: Existing metadata key.
:type key: str
:param value: New value to enter at key.
:type value: any
:raises ValueError: If value is not JSON serializable
:raises ValueError: If key is protected.
:return: None.
**Examples:**
Create a recording and update metadata:
>>> import numpy
>>> from utils.data import Recording
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> metadata = {
>>> "sample_rate": 1e6,
>>> "center_frequency": 2.44e9,
>>> "author": "me"
>>> }
>>> recording = Recording(data=samples, metadata=metadata)
>>> print(recording.metadata)
{'sample_rate': 1000000.0,
'center_frequency': 2440000000.0,
'author': "me",
'timestamp': 17369...
'rec_id': 'fda0f41...'}
>>> recording.update_metadata(key="author", value=you")
>>> print(recording.metadata)
{'sample_rate': 1000000.0,
'center_frequency': 2440000000.0,
'author': "you",
'timestamp': 17369...
'rec_id': 'fda0f41...'}
"""
if key not in self.metadata:
self.add_to_metadata(key=key, value=value)
if not _is_jsonable(value):
raise ValueError("Value must be JSON serializable.")
if key in PROTECTED_KEYS: # Check protected keys.
raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
else:
self._metadata[key] = value
def remove_from_metadata(self, key: str):
"""
Remove a key from the recording metadata.
Does not remove key if it is protected.
:param key: The key to remove.
:type key: str
:raises ValueError: If key is protected.
:return: None.
**Examples:**
Create a recording and add metadata:
>>> import numpy
>>> from utils.data import Recording
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> metadata = {
... "sample_rate": 1e6,
... "center_frequency": 2.44e9,
... }
>>> recording = Recording(data=samples, metadata=metadata)
>>> print(recording.metadata)
{'sample_rate': 1000000.0,
'center_frequency': 2440000000.0,
'timestamp': 17369..., # Example value
'rec_id': 'fda0f41...'} # Example value
>>> recording.add_to_metadata(key="author", value="me")
>>> print(recording.metadata)
{'sample_rate': 1000000.0,
'center_frequency': 2440000000.0,
'author': 'me',
'timestamp': 17369..., # Example value
'rec_id': 'fda0f41...'} # Example value
"""
if key not in PROTECTED_KEYS:
self._metadata.pop(key)
else:
raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
def view(self, output_path: Optional[str] = "images/signal.png", **kwargs) -> None:
"""Create a plot of various signal visualizations as a PNG image.
:param output_path: The output image path. Defaults to "images/signal.png".
:type output_path: str, optional
:param kwargs: Keyword arguments passed on to utils.view.view_sig.
:type: dict of keyword arguments
**Examples:**
Create a recording and view it as a plot in a .png image:
>>> import numpy
>>> from utils.data import Recording
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> metadata = {
>>> "sample_rate": 1e6,
>>> "center_frequency": 2.44e9,
>>> }
>>> recording = Recording(data=samples, metadata=metadata)
>>> recording.view()
"""
from utils.view import view_sig
view_sig(recording=self, output_path=output_path, **kwargs)
def to_sigmf(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
"""Write recording to a set of SigMF files.
The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
:param recording: The recording to be written to file.
:type recording: utils.data.Recording
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
:type filename: os.PathLike or str, optional
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
:type path: os.PathLike or str, optional
:raises IOError: If there is an issue encountered during the file writing process.
:return: None
**Examples:**
Create a recording and view it as a plot in a `.png` image:
>>> import numpy
>>> from utils.data import Recording
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> metadata = {
... "sample_rate": 1e6,
... "center_frequency": 2.44e9,
... }
>>> recording = Recording(data=samples, metadata=metadata)
>>> recording.view()
"""
from utils.io.recording import to_sigmf
to_sigmf(filename=filename, path=path, recording=self)
def to_npy(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
"""Write recording to ``.npy`` binary file.
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
:type filename: os.PathLike or str, optional
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
:type path: os.PathLike or str, optional
:raises IOError: If there is an issue encountered during the file writing process.
:return: Path where the file was saved.
:rtype: str
**Examples:**
Create a recording and save it to a .npy file:
>>> import numpy
>>> from utils.data import Recording
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> metadata = {
>>> "sample_rate": 1e6,
>>> "center_frequency": 2.44e9,
>>> }
>>> recording = Recording(data=samples, metadata=metadata)
>>> recording.to_npy()
"""
from utils.io.recording import to_npy
to_npy(recording=self, filename=filename, path=path)
def trim(self, num_samples: int, start_sample: Optional[int] = 0) -> Recording:
"""Trim Recording samples to a desired length, shifting annotations to maintain alignment.
:param start_sample: The start index of the desired trimmed recording. Defaults to 0.
:type start_sample: int, optional
:param num_samples: The number of samples that the output trimmed recording will have.
:type num_samples: int
:raises IndexError: If start_sample + num_samples is greater than the length of the recording.
:raises IndexError: If sample_start < 0 or num_samples < 0.
:return: The trimmed Recording.
:rtype: Recording
**Examples:**
Create a recording and trim it:
>>> import numpy
>>> from utils.data import Recording
>>> samples = numpy.ones(10000, dtype=numpy.complex64)
>>> metadata = {
... "sample_rate": 1e6,
... "center_frequency": 2.44e9,
... }
>>> recording = Recording(data=samples, metadata=metadata)
>>> print(len(recording))
10000
>>> trimmed_recording = recording.trim(start_sample=1000, num_samples=1000)
>>> print(len(trimmed_recording))
1000
"""
if start_sample < 0:
raise IndexError("start_sample cannot be < 0.")
elif start_sample + num_samples > len(self):
raise IndexError(
f"start_sample {start_sample} + num_samples {num_samples} > recording length {len(self)}."
)
end_sample = start_sample + num_samples
data = self.data[:, start_sample:end_sample]
new_annotations = copy.deepcopy(self.annotations)
for annotation in new_annotations:
# trim annotation if it goes outside the trim boundaries
if annotation.sample_start < start_sample:
annotation.sample_count = annotation.sample_count - (start_sample - annotation.sample_start)
annotation.sample_start = start_sample
if annotation.sample_start + annotation.sample_count > end_sample:
annotation.sample_count = end_sample - annotation.sample_start
# shift annotation to align with the new start point
annotation.sample_start = annotation.sample_start - start_sample
return Recording(data=data, metadata=self.metadata, annotations=new_annotations)
def normalize(self) -> Recording:
"""Scale the recording data, relative to its maximum value, so that the magnitude of the maximum sample is 1.
:return: Recording where the maximum sample amplitude is 1.
:rtype: Recording
**Examples:**
Create a recording with maximum amplitude 0.5 and normalize to a maximum amplitude of 1:
>>> import numpy
>>> from utils.data import Recording
>>> samples = numpy.ones(10000, dtype=numpy.complex64) * 0.5
>>> metadata = {
... "sample_rate": 1e6,
... "center_frequency": 2.44e9,
... }
>>> recording = Recording(data=samples, metadata=metadata)
>>> print(numpy.max(numpy.abs(recording.data)))
0.5
>>> normalized_recording = recording.normalize()
>>> print(numpy.max(numpy.abs(normalized_recording.data)))
1
"""
scaled_data = self.data / np.max(abs(self.data))
return Recording(data=scaled_data, metadata=self.metadata, annotations=self.annotations)
def generate_filename(self, tag: Optional[str] = "rec"):
"""Generate a filename from metadata.
:param tag: The string at the beginning of the generated filename. Default is "rec".
:type tag: str, optional
:return: A filename without an extension.
:rtype: str
"""
# TODO: This method should be refactored to use the first 7 characters of the 'rec_id' field.
tag = tag + "_"
source = self.metadata.get("source", "")
if source != "":
source = source + "_"
# converts 1000 to 1k for example
center_frequency = str(Quantity(self.metadata.get("center_frequency", 0)))
if center_frequency != "0":
num = center_frequency[:-1]
suffix = center_frequency[-1]
num = int(np.round(float(num)))
else:
num = 0
suffix = ""
center_frequency = str(num) + suffix + "Hz_"
timestamp = int(self.timestamp)
timestamp = datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d_%H-%M-%S") + "_"
# Add first seven characters of rec_id for uniqueness
rec_id = self.rec_id[0:7]
return tag + source + center_frequency + timestamp + rec_id
def __len__(self) -> int:
"""The length of a recording is defined by the number of complex samples in each channel of the recording."""
return self.shape[1]
def __eq__(self, other: Recording) -> bool:
"""Two Recordings are equal if all data, metadata, and annotations are the same."""
# counter used to allow for differently ordered annotation lists
return (
np.array_equal(self.data, other.data)
and self.metadata == other.metadata
and self.annotations == other.annotations
)
def __ne__(self, other: Recording) -> bool:
"""Two Recordings are equal if all data, and metadata, and annotations are the same."""
return not self.__eq__(other=other)
def __iter__(self) -> Iterator:
self._index = 0
return self
def __next__(self) -> np.ndarray:
if self._index < self.n_chan:
to_ret = self.data[self._index]
self._index += 1
return to_ret
else:
raise StopIteration
def __getitem__(self, key: int | tuple[int] | slice) -> np.ndarray | np.complexfloating:
"""If key is an integer, tuple of integers, or a slice, return the corresponding samples.
For arrays with 1,024 or fewer samples, return a copy of the recording data. For larger arrays, return a
read-only view. This prevents mutation at a distance while maintaining performance.
"""
if isinstance(key, (int, tuple, slice)):
v = self._data[key]
if isinstance(v, np.complexfloating):
return v
elif v.size > 1024:
v.setflags(write=False) # Make view read-only.
return v
else:
return v.copy()
else:
raise ValueError(f"Key must be an integer, tuple, or slice but was {type(key)}.")
def __setitem__(self, *args, **kwargs) -> None:
"""Raise an error if an attempt is made to assign to the recording."""
raise ValueError("Assignment to Recording is not allowed.")
def generate_recording_id(data: np.ndarray, timestamp: Optional[float | int] = None) -> str:
"""Generate unique 64-character recording ID. The recording ID is generated by hashing the recording data with
the datetime that the recording data was generated. If no datatime is provided, the current datatime is used.
:param data: Tape of IQ samples, as a NumPy array.
:type data: np.ndarray
:param timestamp: Unix timestamp in seconds. Defaults to None.
:type timestamp: float or int, optional
:return: 256-character hash, to be used as the recording ID.
:rtype: str
"""
if timestamp is None:
timestamp = time.time()
byte_sequence = data.tobytes() + str(timestamp).encode("utf-8")
sha256_hash = hashlib.sha256(byte_sequence)
return sha256_hash.hexdigest()
def _is_jsonable(x: Any) -> bool:
"""
:return: True if x is JSON serializable, False otherwise.
"""
try:
json.dumps(x)
return True
except (TypeError, OverflowError):
return False
def _is_valid_metadata_key(key: Any) -> bool:
"""
:return: True if key is a valid metadata key, False otherwise.
"""
if isinstance(key, str) and key.islower() and re.match(pattern=r"^[a-z_]+$", string=key) is not None:
return True
else:
return False

View File

@ -0,0 +1,22 @@
"""
The IO package contains utilities for input and output operations, such as loading and saving recordings to and from
file.
"""
__all__ = [
# Common:
"exists",
"copy",
"move",
"validate",
# Recording:
"save_recording",
"load_recording",
"to_sigmf",
"from_sigmf",
"to_npy",
"from_npy",
]
from .common import copy, exists, move, validate
from .recording import from_npy, from_sigmf, load_recording, to_npy, to_sigmf

View File

@ -0,0 +1,331 @@
"""
Utilities for input/output operations on the utils.data.Recording object.
"""
import datetime as dt
import os
from datetime import timezone
from typing import Optional
import numpy as np
import sigmf
from sigmf import SigMFFile, sigmffile
from sigmf.utils import get_data_type_str
from utils.data import Annotation
from utils.data.recording import Recording
def load_rec(file: os.PathLike) -> Recording:
"""Load a recording from file.
:param file: The directory path to the file(s) to load, **with** the file extension.
To loading from SigMF, the file extension must be one of *sigmf*, *sigmf-data*, or *sigmf-meta*,
either way both the SigMF data and meta files must be present for a successful read.
:type file: os.PathLike
:raises IOError: If there is an issue encountered during the file reading process.
:raises ValueError: If the inferred file extension is not supported.
:return: The recording, as initialized from file(s).
:rtype: utils.data.Recording
"""
_, extension = os.path.splitext(file)
extension = extension.lstrip(".")
if extension.lower() in ["sigmf", "sigmf-data", "sigmf-meta"]:
return from_sigmf(file=file)
elif extension.lower() == "npy":
return from_npy(file=file)
else:
raise ValueError(f"File extension {extension} not supported.")
SIGMF_KEY_CONVERSION = {
SigMFFile.AUTHOR_KEY: "author",
SigMFFile.COLLECTION_KEY: "sigmf:collection",
SigMFFile.DATASET_KEY: "sigmf:dataset",
SigMFFile.DATATYPE_KEY: "datatype",
SigMFFile.DATA_DOI_KEY: "data_doi",
SigMFFile.DESCRIPTION_KEY: "description",
SigMFFile.EXTENSIONS_KEY: "sigmf:extensions",
SigMFFile.GEOLOCATION_KEY: "geolocation",
SigMFFile.HASH_KEY: "sigmf:hash",
SigMFFile.HW_KEY: "sdr",
SigMFFile.LICENSE_KEY: "license",
SigMFFile.META_DOI_KEY: "metadata",
SigMFFile.METADATA_ONLY_KEY: "sigmf:metadata_only",
SigMFFile.NUM_CHANNELS_KEY: "sigmf:num_channels",
SigMFFile.RECORDER_KEY: "source_software",
SigMFFile.SAMPLE_RATE_KEY: "sample_rate",
SigMFFile.START_OFFSET_KEY: "sigmf:start_offset",
SigMFFile.TRAILING_BYTES_KEY: "sigmf:trailing_bytes",
SigMFFile.VERSION_KEY: "sigmf:version",
}
def convert_to_serializable(obj):
"""
Recursively convert a JSON-compatible structure into a fully JSON-serializable one.
Handles cases like NumPy data types, nested dicts, lists, and sets.
"""
if isinstance(obj, np.integer):
return int(obj) # Convert NumPy int to Python int
elif isinstance(obj, np.floating):
return float(obj) # Convert NumPy float to Python float
elif isinstance(obj, np.ndarray):
return obj.tolist() # Convert NumPy array to list
elif isinstance(obj, (list, tuple)):
return [convert_to_serializable(item) for item in obj] # Process list or tuple
elif isinstance(obj, dict):
return {key: convert_to_serializable(value) for key, value in obj.items()} # Process dict
elif isinstance(obj, set):
return list(obj) # Convert set to list
elif obj in [float("inf"), float("-inf"), None]: # Handle infinity or None
return None
elif isinstance(obj, (str, int, float, bool)) or obj is None:
return obj # Base case: already serializable
else:
raise TypeError(f"Value of type {type(obj)} is not JSON serializable: {obj}")
def to_sigmf(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
"""Write recording to a set of SigMF files.
The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
:param recording: The recording to be written to file.
:type recording: utils.data.Recording
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
:type filename: os.PathLike or str, optional
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
:type path: os.PathLike or str, optional
:raises IOError: If there is an issue encountered during the file writing process.
:return: None
**Examples:**
>>> from utils.sdr import Synth
>>> from utils.data import Recording
>>> from utils.io import to_sigmf
>>> sdr = Synth()
>>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
>>> to_sigmf(recording=rec, file="sample_recording")
"""
if filename is not None:
filename, _ = os.path.splitext(filename)
else:
filename = recording.generate_filename()
if path is None:
path = "recordings"
if not os.path.exists(path):
os.makedirs(path)
multichannel_samples = recording.data
metadata = recording.metadata
annotations = recording.annotations
if multichannel_samples.shape[0] > 1:
raise NotImplementedError("SigMF File Saving Not Implemented for Multichannel Recordings")
else:
# extract single channel
samples = multichannel_samples[0]
data_file_path = os.path.join(path, f"{filename}.sigmf-data")
samples.tofile(data_file_path)
global_info = {
SigMFFile.DATATYPE_KEY: get_data_type_str(samples),
SigMFFile.VERSION_KEY: sigmf.__version__,
SigMFFile.RECORDER_KEY: "RIA",
}
converted_metadata = {
sigmf_key: metadata[metadata_key]
for sigmf_key, metadata_key in SIGMF_KEY_CONVERSION.items()
if metadata_key in metadata
}
# Merge dictionaries, giving priority to sigmf_meta
global_info = {**converted_metadata, **global_info}
ria_metadata = {f"ria:{key}": value for key, value in metadata.items()}
ria_metadata = convert_to_serializable(ria_metadata)
global_info.update(ria_metadata)
sigMF_metafile = SigMFFile(
data_file=data_file_path,
global_info=global_info,
)
for annotation_object in annotations:
annotation_dict = annotation_object.to_sigmf_format()
annotation_dict = convert_to_serializable(annotation_dict)
sigMF_metafile.add_annotation(
start_index=annotation_dict[SigMFFile.START_INDEX_KEY],
length=annotation_dict[SigMFFile.LENGTH_INDEX_KEY],
metadata=annotation_dict["metadata"],
)
sigMF_metafile.add_capture(
0,
metadata={
SigMFFile.FREQUENCY_KEY: metadata.get("center_frequency", 0),
SigMFFile.DATETIME_KEY: dt.datetime.fromtimestamp(float(metadata.get("timestamp", 0)), tz=timezone.utc)
.isoformat()
.replace("+00:00", "Z"),
},
)
meta_dict = sigMF_metafile.ordered_metadata()
meta_dict["ria"] = metadata
sigMF_metafile.tofile(f"{os.path.join(path,filename)}.sigmf-meta")
def from_sigmf(file: os.PathLike | str) -> Recording:
"""Load a recording from a set of SigMF files.
:param file: The directory path to the SigMF recording files, without any file extension.
The recording will be initialized from ``file_name.sigmf-data`` and ``file_name.sigmf-meta``.
Both the data and meta files must be present for a successful read.
:type file: str or os.PathLike
:raises IOError: If there is an issue encountered during the file reading process.
:return: The recording, as initialized from the SigMF files.
:rtype: utils.data.Recording
"""
if len(file) > 11:
if file[-11:-5] != ".sigmf":
file = file + ".sigmf-data"
sigmf_file = sigmffile.fromfile(file)
data = sigmf_file.read_samples()
global_metadata = sigmf_file.get_global_info()
dict_annotations = sigmf_file.get_annotations()
processed_metadata = {}
for key, value in global_metadata.items():
# Process core keys
if key.startswith("core:"):
base_key = key[5:] # Remove 'core:' prefix
converted_key = SIGMF_KEY_CONVERSION.get(base_key, base_key)
# Process ria keys
elif key.startswith("ria:"):
converted_key = key[4:] # Remove 'ria:' prefix
else:
# Load non-core/ria keys as is
converted_key = key
processed_metadata[converted_key] = value
annotations = []
for dict in dict_annotations:
annotations.append(
Annotation(
sample_start=dict[SigMFFile.START_INDEX_KEY],
sample_count=dict[SigMFFile.LENGTH_INDEX_KEY],
freq_lower_edge=dict.get(SigMFFile.FLO_KEY, None),
freq_upper_edge=dict.get(SigMFFile.FHI_KEY, None),
label=dict.get(SigMFFile.LABEL_KEY, None),
comment=dict.get(SigMFFile.COMMENT_KEY, None),
detail=dict.get("ria:detail", None),
)
)
output_recording = Recording(data=data, metadata=processed_metadata, annotations=annotations)
return output_recording
def to_npy(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
"""Write recording to ``.npy`` binary file.
:param recording: The recording to be written to file.
:type recording: utils.data.Recording
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
:type filename: os.PathLike or str, optional
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
:type path: os.PathLike or str, optional
:raises IOError: If there is an issue encountered during the file writing process.
:return: Path where the file was saved.
:rtype: str
**Examples:**
>>> from utils.sdr import Synth
>>> from utils.data import Recording
>>> from utils.io import to_npy
>>> sdr = Synth()
>>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
>>> to_npy(recording=rec, file="sample_recording.npy")
"""
if filename is not None:
filename, _ = os.path.splitext(filename)
else:
filename = recording.generate_filename()
filename = filename + ".npy"
if path is None:
path = "recordings"
if not os.path.exists(path):
os.makedirs(path)
fullpath = os.path.join(path, filename)
data = np.array(recording.data)
metadata = recording.metadata
annotations = recording.annotations
with open(file=fullpath, mode="wb") as f:
np.save(f, data)
np.save(f, metadata)
np.save(f, annotations)
# print(f"Saved recording to {os.getcwd()}/{fullpath}")
return str(fullpath)
def from_npy(file: os.PathLike | str) -> Recording:
"""Load a recording from a ``.npy`` binary file.
:param file: The directory path to the recording file, with or without the ``.npy`` file extension.
:type file: str or os.PathLike
:raises IOError: If there is an issue encountered during the file reading process.
:return: The recording, as initialized from the ``.npy`` file.
:rtype: utils.data.Recording
"""
filename, extension = os.path.splitext(file)
if extension != ".npy" and extension != "":
raise ValueError("Cannot use from_npy if file extension is not .npy")
# Rebuild with .npy extension.
filename = str(filename) + ".npy"
with open(file=filename, mode="rb") as f:
data = np.load(f, allow_pickle=True)
metadata = np.load(f, allow_pickle=True)
metadata = metadata.tolist()
try:
annotations = list(np.load(f, allow_pickle=True))
except EOFError:
annotations = []
recording = Recording(data=data, metadata=metadata, annotations=annotations)
return recording

View File

@ -0,0 +1,8 @@
"""
The transforms package houses a collection of functions to manipulate and transform radio data.
This package contains various functions that operate on NumPy arrays. These functions are utilized within the machine
learning backends to build transforms and functions that seamlessly integrate with those from the respective backend.
All the transforms in this package expect data in the complex 1xN format.
"""

View File

@ -0,0 +1,717 @@
"""
This module comprises the functionals of various transforms designed to create new training examples by augmenting
existing examples or recordings using a variety of techniques These transforms take an ArrayLike object as input
and return a corresponding numpy.ndarray with the impairment model applied;
we call the latter the impaired data.
"""
from typing import Optional
import numpy as np
from numpy.typing import ArrayLike
from utils.data.recording import Recording
from utils.helpers.array_conversion import convert_to_2xn
# TODO: For round 2 of index generation, should j be at min 2 spots away from where it was to prevent adjacent patches.
# TODO: All the transforms with some randomness need to be refactored to use a random generator.
def generate_awgn(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
"""Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
provided `signal` array or `Recording`.
This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of
the noise which matches the specified SNR. Then, the AWGN is generated after calculating the variance and
randomly calculating the amplitude and phase of the noise.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param snr: The signal-to-noise ratio in dB. Default is 1.
:type snr: float, optional
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array representing the generated noise which matches the SNR of `signal`. If `signal` is a
Recording, returns a Recording object with its `data` attribute containing the generated noise array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[2 + 5j, 1 + 8j]])
>>> new_rec = generate_awgn(rec)
>>> new_rec.data
array([[2.15991777 + 0.69673915j, 0.2814541 - 0.12111976j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
snr_linear = 10 ** (snr / 10)
# Calculate the RMS power of the signal to solve for the RMS power of the noise
signal_rms_power = np.sqrt(np.mean(np.abs(data) ** 2))
noise_rms_power = signal_rms_power / snr_linear
# Generate the AWGN noise which has the same shape as data
variance = noise_rms_power**2
magnitude = np.random.normal(loc=0, scale=np.sqrt(variance), size=(c, n))
phase = np.random.uniform(low=0, high=2 * np.pi, size=(c, n))
complex_awgn = magnitude * np.exp(1j * phase)
if isinstance(signal, Recording):
return Recording(data=complex_awgn, metadata=signal.metadata)
else:
return complex_awgn
def time_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
"""Reverses the order of the I (In-phase) and Q (Quadrature) data samples along the time axis of the provided
`signal` array or `Recording`.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array containing the reversed I and Q data samples if `signal` is an array.
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
reversed array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[1+2j, 3+4j, 5+6j]])
>>> new_rec = time_reversal(rec)
>>> new_rec.data
array([[5+6j, 3+4j, 1+2j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if c == 1:
# If 1xN complex
reversed_data = np.squeeze(data)[::-1]
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=reversed_data, metadata=signal.metadata)
else:
return reversed_data.reshape(c, n)
def spectral_inversion(signal: ArrayLike | Recording) -> np.ndarray | Recording:
"""Negates the imaginary components (Q, Quadrature) of the data samples contained within the
provided `signal` array or `Recording`.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array containing the original I and negated Q data samples if `signal` is an array.
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
inverted array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[0+45j, 2-10j]])
>>> new_rec = spectral_inversion(rec)
>>> new_rec.data
array([[0-45j, 2+10j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if c == 1:
new_data = np.squeeze(data).real - 1j * np.squeeze(data).imag
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=new_data, metadata=signal.metadata)
else:
return new_data.reshape(c, n)
def channel_swap(signal: ArrayLike | Recording) -> np.ndarray | Recording:
"""Switches the I (In-phase) with the and Q (Quadrature) data samples for each sample within the
provided `signal` array or `Recording`.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array containing the swapped I and Q data samples if `signal` is an array.
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
swapped array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[10+20j, 7+35j]])
>>> new_rec = channel_swap(rec)
>>> new_rec.data
array([[20+10j, 35+7j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if c == 1:
swapped_data = np.squeeze(data).imag + 1j * np.squeeze(data).real
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=swapped_data, metadata=signal.metadata)
else:
return swapped_data.reshape(c, n)
def amplitude_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
"""Negates the amplitudes of both the I (In-phase) and Q (Quadrature) data samples contained within the
provided `signal` array or `Recording`.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array containing the negated I and Q data samples if `signal` is an array.
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
negated array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[4-3j, -5-2j, -9+1j]])
>>> new_rec = amplitude_reversal(rec)
>>> new_rec.data
array([[-4+3j, 5+2j, 9-1j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if c == 1:
reversed_data = -1 * np.squeeze(data).real - 1j * np.squeeze(data).imag
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=reversed_data, metadata=signal.metadata)
else:
return reversed_data.reshape(c, n)
def drop_samples( # noqa: C901 # TODO: Simplify function
signal: ArrayLike | Recording, max_section_size: Optional[int] = 2, fill_type: Optional[str] = "zeros"
) -> np.ndarray | Recording:
"""Randomly drops IQ data samples contained within the provided `signal` array or `Recording`.
This function randomly selects sections of the signal and replaces the current data samples in the specified
section with another value dependent on the fill type.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param max_section_size: Maximum allowable size of the section to be dropped and replaced. Default is 2.
:type max_section_size: int, optional
:param fill_type: Fill option used to replace dropped section of data (back-fill, front-fill, mean, zeros).
Default is "zeros".
"back-fill": replace dropped section with the data sample occuring before the section.
"front-fill": replace dropped section with the data sample occuring after the section.
"mean": replace dropped section with mean of the entire signal.
"zeros": replace dropped section with constant value of 0+0j.
:type fill_type: str, optional
:raises ValueError: If `signal` is not CxN complex.
:raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
:return: A numpy array containing the I and Q data samples with replaced subsections if
`signal` is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data`
attribute containing the array with dropped samples.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
>>> new_rec = drop_samples(rec)
>>> new_rec.data
array([[2+5j, 0, 0, 0, 4+9j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if max_section_size < 1 or max_section_size >= n:
raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
if c == 1:
data = np.squeeze(data)
if fill_type == "mean":
mean = np.mean(data)
i = -1
j = -1
# Pointers i and j point to exact positions
while i < n:
# Generate valid starting point so that at least 1 drop occurs
i = np.random.randint(j + 1, j + n - max_section_size + 2)
j = np.random.randint(i, i + max_section_size)
if j > n - 1: # Check that the full drop is within the dataset
break
# Generate fill based on fill_type
if fill_type == "back-fill":
fill = data[i - 1] if i > 0 else data[i]
elif fill_type == "front-fill":
fill = data[j + 1] if j < n - 1 else data[j]
elif fill_type == "mean":
fill = mean
elif fill_type == "zeros":
fill = 0 + 0j
else:
raise ValueError(f"fill_type {fill_type} not recognized.")
# Replaces dropped samples with fill values
data[i : j + 1] = fill
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=data, metadata=signal.metadata)
else:
return data.reshape(c, n)
def quantize_tape(
signal: ArrayLike | Recording, bin_number: Optional[int] = 4, rounding_type: Optional[str] = "floor"
) -> np.ndarray | Recording:
"""Quantizes the IQ data of the provided `signal` array or `Recording` by a few bits.
This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param bin_number: The number of bins the signal should be divided into. Default is 4.
:type bin_number: int, optional
:param rounding_type: The type of rounding applied during processing. Default is "floor".
"floor": rounds down to the lower bound of the bin.
"ceiling": rounds up to the upper bound of the bin.
:type rounding_type: str, optional
:raises ValueError: If `signal` is not CxN complex.
:raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
:return: A numpy array containing the quantized I and Q data samples if `signal` is an array.
If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
the quantized array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[1+1j, 4+4j, 1+2j, 1+4j]])
>>> new_rec = quantize_tape(rec)
>>> new_rec.data
array([[4+4j, 3+3j, 4+1j, 4+3j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if rounding_type not in {"ceiling", "floor"}:
raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
if c == 1:
iq_data = convert_to_2xn(data)
maximum, minimum = iq_data.max(), iq_data.min()
bin_edges = np.linspace(minimum, maximum, bin_number + 1)
indices = np.digitize(iq_data, bin_edges, right=True)
# If data falls outside the first bin, map it back into the first bin, data will not fall outside of last bin
indices[indices == 0] = 1
# Map the data points to the correct bins
if rounding_type == "ceiling":
modified_iq_data = bin_edges[indices]
else:
modified_iq_data = bin_edges[indices - 1]
new_data = modified_iq_data[0] + 1j * modified_iq_data[1]
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=new_data, metadata=signal.metadata)
else:
return new_data.reshape(c, n)
def quantize_parts(
signal: ArrayLike | Recording,
max_section_size: Optional[int] = 2,
bin_number: Optional[int] = 4,
rounding_type: Optional[str] = "floor",
) -> np.ndarray | Recording:
"""Quantizes random parts of the IQ data within the provided `signal` array or `Recording` by a few bits.
This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param max_section_size: Maximum allowable size of the section to be quantized. Default is 2.
:type max_section_size: int, optional
:param bin_number: The number of bins the signal should be divided into. Default is 4.
:type bin_number: int, optional
:param rounding_type: Type of rounding applied during processing. Default is "floor".
"floor": rounds down to the lower bound of the bin.
"ceiling": rounds up to the upper bound of the bin.
:type rounding_type: str, optional
:raises ValueError: If `signal` is not CxN complex.
:raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
:return: A numpy array containing the I and Q data samples with quantized subsections if `signal`
is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute
containing the partially quantized array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
>>> new_rec = quantize_parts(rec)
>>> new_rec.data
array([[2+5j, 1+8j, 3.66666667+3.66666667j, 3+7j, 4+9j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if rounding_type not in {"ceiling", "floor"}:
raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
if c == 1:
iq_data = convert_to_2xn(data)
i_data, q_data = iq_data
maximum, minimum = iq_data.max(), iq_data.min()
bin_edges = np.linspace(minimum, maximum, bin_number + 1)
indices = np.digitize(iq_data, bin_edges, right=True)
# Map everything from bin 0 to bin 1
indices[indices == 0] = 1
i = -1
j = -1
# Pointers i and j point to exact positions
while i < n:
# Generate valid starting point so that at least 1 drop occurs
i = np.random.randint(j + 1, j + n - max_section_size + 2)
j = np.random.randint(i, i + max_section_size)
if j > n - 1: # Check that the full drop is within the dataset
break
if rounding_type == "ceiling":
i_data[i : j + 1] = bin_edges[indices[0][i : j + 1]]
q_data[i : j + 1] = bin_edges[indices[1][i : j + 1]]
else:
i_data[i : j + 1] = bin_edges[indices[0][i : j + 1] - 1]
q_data[i : j + 1] = bin_edges[indices[1][i : j + 1] - 1]
quantized_data = i_data + 1j * q_data
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=quantized_data, metadata=signal.metadata)
else:
return quantized_data.reshape(c, n)
def magnitude_rescale(
signal: ArrayLike | Recording,
starting_bounds: Optional[tuple] = None,
max_magnitude: Optional[int] = 1,
) -> np.ndarray | Recording:
"""Selects a random starting point from within the specified starting bounds and multiplies IQ data of the
provided `signal` array or `Recording` by a random constant.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param starting_bounds: The bounds (inclusive) as indices in which the starting position of the rescaling occurs.
Default is None, but if user does not assign any bounds, the bounds become (random index, N-1).
:type starting_bounds: tuple, optional
:param max_magnitude: The maximum value of the constant that is used to rescale the data. Default is 1.
:type max_magnitude: int, optional
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array containing the I and Q data samples with the rescaled magnitude after the random
starting point if `signal` is an array. If `signal` is a `Recording`, returns a `Recording`
object with its `data` attribute containing the rescaled array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
>>> new_rec = magniute_rescale(rec)
>>> new_rec.data
array([[2+5j, 1+8j, 6+4j, 3+7j, 3.03181761+6.82158963j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if starting_bounds is None:
starting_bounds = (np.random.randint(0, n), n - 1)
if starting_bounds[0] < 0 or starting_bounds[1] > n - 1:
raise ValueError("starting_bounds must be valid indices for the dataset.")
if c == 1:
data = np.squeeze(data)
starting_point = np.random.randint(starting_bounds[0], starting_bounds[1] + 1)
magnitude = np.random.rand() * max_magnitude
rescaled_section = data[starting_point:] * magnitude
rescaled_data = np.concatenate((data[:starting_point], rescaled_section))
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=rescaled_data, metadata=signal.metadata)
else:
return rescaled_data.reshape(c, n)
def cut_out( # noqa: C901 # TODO: Simplify function
signal: ArrayLike | Recording, max_section_size: Optional[int] = 3, fill_type: Optional[str] = "ones"
) -> np.ndarray | Recording:
"""Cuts out random sections of IQ data and replaces them with either 0s, 1s, or low, average, or high
sound-to-noise ratio (SNR) additive white gausssian noise (AWGN) within the provided `signal` array or
`Recording`.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param max_section_size: Maximum allowable size of the section to be quantized. Default is 3.
:type max_section_size: int, optional
:param fill_type: Fill option used to replace cutout section of data (zeros, ones, low-snr, avg-snr-1, avg-snr-2).
Default is "ones".
"zeros": replace cutout section with 0s.
"ones": replace cutout section with 1s.
"low-snr": replace cutout section with AWGN with an SNR of 0.5.
"avg-snr": replace cutout section with AWGN with an SNR of 1.
"high-snr": replace cutout section with AWGN with an SNR of 2.
:type fill_type: str, optional
:raises ValueError: If `signal` is not CxN complex.
:raises UserWarning: If fill_type is not "zeros", "ones", "low-snr", "avg-snr", or "high-snr", "ones" is selected
by default.
:raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
:return: A numpy array containing the I and Q data samples with random sections cut out and replaced according to
`fill_type` if `signal` is an array. If `signal` is a `Recording`, returns a `Recording` object
with its `data` attribute containing the cut out and replaced array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
>>> new_rec = cut_out(rec)
>>> new_rec.data
array([[2+5j, 1+8j, 1+1j, 1+1j, 1+1j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if fill_type not in {"zeros", "ones", "low-snr", "avg-snr", "high-snr"}:
raise UserWarning(
"""fill_type must be "zeros", "ones", "low-snr", "avg-snr", or "high-snr",
"ones" has been selected by default"""
)
if max_section_size < 1 or max_section_size >= n:
raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
if c == 1:
data = np.squeeze(data)
i = -1
j = -1
# Pointers i and j point to exact positions
while i < n:
# Generate valid starting point so that at least 1 drop occurs
i = np.random.randint(j + 1, j + n - max_section_size + 2)
j = np.random.randint(i, i + max_section_size)
if j > n - 1: # Check that the full drop is within the dataset
break
# TODO: Check if we can collapse last three options which depends on what snr value the user enters
if fill_type == "zeros":
fill = 0 + 0j
elif fill_type == "ones":
fill = 1 + 1j
elif fill_type == "low-snr":
fill = generate_awgn([data[i : j + 1]], 0.5)
elif fill_type == "avg-snr":
fill = generate_awgn([data[i : j + 1]], 1)
else:
fill = generate_awgn([data[i : j + 1]], 2)
data[i : j + 1] = fill
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=data, metadata=signal.metadata)
else:
return data.reshape(c, n)
def patch_shuffle(signal: ArrayLike | Recording, max_patch_size: Optional[int] = 3) -> np.ndarray | Recording:
"""Selects random patches of the IQ data and randomly shuffles the data samples within the specified patch of
the provided `signal` array or `Recording`.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param max_patch_size: Maximum allowable patch size of the data that can be shuffled. Default is 3.
:type max_patch_size: int, optional
:raises ValueError: If `signal` is not CxN complex.
:raises ValueError: If `max_patch_size` is less than or equal to 1 or greater than length of `signal`.
:return: A numpy array containing the I and Q data samples with randomly shuffled regions if `signal` is
an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
the shuffled array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
>>> new_rec = patch_shuffle(rec)
>>> new_rec.data
array([[2+5j, 1+8j, 3+4j, 6+9j, 4+7j]])
"""
if isinstance(signal, Recording):
data = signal.data.copy() # Cannot shuffle read-only array.
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if max_patch_size > n or max_patch_size <= 1:
raise ValueError("max_patch_size must be less than or equal to the length of signal and greater than 1.")
if c == 1:
data = np.squeeze(data)
i = -1
j = -1
# Pointers i and j point to exact positions
while i < n:
# Generate valid starting point so that at least 1 drop occurs
i = np.random.randint(j + 1, j + n - max_patch_size + 2)
j = np.random.randint(i, i + max_patch_size)
if j > n - 1: # Check that the full drop is within the dataset
break
np.random.shuffle(data.real[i : j + 1])
np.random.shuffle(data.imag[i : j + 1])
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=data, metadata=signal.metadata)
else:
return data.reshape(c, n)

View File

@ -0,0 +1,365 @@
"""
This module comprises various transforms designed to represent signal impairments.
These transforms take a recording as input and return a corresponding recording with
the impairment model applied; we call the latter an impaired recording.
Signals travel through transmission media, which are not perfect. The imperfection
causes signal impairment, meaning that the signal at the beginning of the medium is
not the same as the signal at the end of the medium. What is sent is not what is received.
Three causes of impairment are attenuation, distortion, and noise.
"""
from typing import Optional
import numpy as np
from numpy.typing import ArrayLike
from scipy.signal import resample_poly
from utils.data import Recording
from utils.transforms import iq_augmentations
def add_awgn_to_signal(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
"""Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
provided `signal` array or `Recording`.
This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of the noise
which matches the specified SNR. Then, the AWGN is generated after calculating the variance and randomly
calculating the amplitude and phase of the noise. Then, this generated AWGN is added to the original signal and
returned.
:param signal: Input IQ data as a complex ``C x N`` array or `Recording`, where ``C`` is the number of channels
and ``N`` is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param snr: The signal-to-noise ratio in dB. Default is 1.
:type snr: float, optional
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array which is the sum of the noise (which matches the SNR) and the original signal. If `signal`
is a `Recording`, returns a `Recording object` with its `data` attribute containing the noisy signal array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[1+1j, 2+2j]])
>>> new_rec = add_awgn_to_signal(rec)
>>> new_rec.data
array([[0.83141973+0.32529242j, -1.00909846+2.39282713j]])
"""
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim != 2 or not np.iscomplexobj(data):
raise ValueError("signal must be CxN complex.")
noise = iq_augmentations.generate_awgn(signal=data, snr=snr)
print(f"noise is {noise}")
noisy_signal = data + noise
if isinstance(signal, Recording):
return Recording(data=noisy_signal, metadata=signal.metadata)
else:
return noisy_signal
def time_shift(signal: ArrayLike | Recording, shift: Optional[int] = 1) -> np.ndarray | Recording:
"""Apply a time shift to a signal.
After the time shift is applied, we fill any empty regions with zeros.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param shift: The number of indices to shift by. Default is 1.
:type shift: int, optional
:raises ValueError: If `signal` is not CxN complex.
:raises UserWarning: If `shift` is greater than length of `signal`.
:return: A numpy array which represents the time-shifted signal. If `signal` is a `Recording`,
returns a `Recording object` with its `data` attribute containing the time-shifted array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j, 5+5j]])
>>> new_rec = time_shift(rec, -2)
>>> new_rec.data
array([[3+3j, 4+4j, 5+5j, 0+0j, 0+0j]])
"""
# TODO: Additional info needs to be added to docstring description
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if shift > n:
raise UserWarning("shift is greater than signal length")
shifted_data = np.zeros_like(data)
if c == 1:
# New iq array shifted left or right depending on sign of shift
# This should work even if shift > iqdata.shape[1]
if shift >= 0:
# Shift to right
shifted_data[:, shift:] = data[:, :-shift]
else:
# Shift to the left
shifted_data[:, :shift] = data[:, -shift:]
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=shifted_data, metadata=signal.metadata)
else:
return shifted_data
def frequency_shift(signal: ArrayLike | Recording, shift: Optional[float] = 0.5) -> np.ndarray | Recording:
"""Apply a frequency shift to a signal.
.. note::
The frequency shift is applied relative to the sample rate.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param shift: The frequency shift relative to the sample rate. Must be in the range ``[-0.5, 0.5]``.
Default is 0.5.
:type shift: float, optional
:raises ValueError: If the provided frequency shift is not in the range ``[-0.5, 0.5]``.
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array which represents the frequency-shifted signal. If `signal` is a `Recording`,
returns a `Recording object` with its `data` attribute containing the frequency-shifted array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
>>> new_rec = frequency_shift(rec, -0.4)
>>> new_rec.data
array([[1+1j, -0.44246348-2.79360449j, -1.92611857+3.78022053j, 5.04029404-2.56815809j]])
"""
# TODO: Additional info needs to be added to docstring description
if shift > 0.5 or shift < -0.5:
raise ValueError("Frequency shift must be in the range [-0.5, 0.5]")
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
shifted_data = np.zeros_like(data)
if c == 1:
# Calculate the phase shift for the frequency shift
phase_shift_ = 2.0 * np.pi * shift * np.arange(n)
# Use trigonometric identities to apply the frequency shift
shifted_data.real = data.real * np.cos(phase_shift_) - data.imag * np.sin(phase_shift_)
shifted_data.imag = data.real * np.sin(phase_shift_) + data.imag * np.cos(phase_shift_)
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=shifted_data, metadata=signal.metadata)
else:
return shifted_data
def phase_shift(signal: ArrayLike | Recording, phase: Optional[float] = np.pi) -> np.ndarray | Recording:
"""Apply a phase shift to a signal.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param phase: The phase angle by which to rotate the IQ samples, in radians. Must be in the range ``[-π, π]``.
Default is π.
:type phase: float, optional
:raises ValueError: If the provided phase rotation is not in the range ``[-π, π]``.
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array which represents the phase-shifted signal. If `signal` is a `Recording`,
returns a `Recording object` with its `data` attribute containing the phase-shifted array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
>>> new_rec = phase_shift(rec, np.pi/2)
>>> new_rec.data
array([[-1+1j, -2+2j -3+3j -4+4j]])
"""
# TODO: Additional info needs to be added to docstring description
if phase > np.pi or phase < -np.pi:
raise ValueError("Phase rotation must be in the range [-π, π]")
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if c == 1:
shifted_data = data * np.exp(1j * phase)
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=shifted_data, metadata=signal.metadata)
else:
return shifted_data
def iq_imbalance(
signal: ArrayLike | Recording,
amplitude_imbalance: Optional[float] = 1.5,
phase_imbalance: Optional[float] = np.pi,
dc_offset: Optional[float] = 1.5,
) -> np.ndarray | Recording:
"""Apply an IQ Imbalance to a signal.
.. note::
Based on MathWorks' `I/Q Imbalance <https://www.mathworks.com/help/comm/ref/iqimbalance.html>`_.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param amplitude_imbalance: The IQ amplitude imbalance to apply, in dB. Default is 1.5.
:type amplitude_imbalance: float, optional
:param phase_imbalance: The IQ phase imbalance to apply, in radians. Default is π.
Must be in the range ``[-π, π]``.
:type phase_imbalance: float, optional
:param dc_offset: The IQ DC offset to apply, in dB. Default is 1.5.
:type dc_offset: float, optional
:raises ValueError: If the phase imbalance is not in the range ``[-π, π]``.
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array which is the original signal with an applied IQ imbalance. If `signal` is a `Recording`,
returns a `Recording object` with its `data` attribute containing the IQ imbalanced signal array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[2+18j, -34+2j, 3+9j]])
>>> new_rec = iq_imbalance(rec, 1, np.pi, 2)
>>> new_rec.data
array([[-38.38613587-4.78555031j, -4.26512621+81.35435535j, -19.19306793-7.17832547j]])
"""
# TODO: Additional info needs to be added to docstring description
if phase_imbalance > np.pi or phase_imbalance < -np.pi:
raise ValueError("Phase imbalance must be in the range [-π, π].")
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if c == 1:
# Apply amplitude imbalance
data = (
10 ** (0.5 * amplitude_imbalance / 20.0) * data.real
+ 1j * 10 ** (-0.5 * amplitude_imbalance / 20.0) * data.imag
)
# Apply phase imbalance
data = (
np.exp(-1j * phase_imbalance / 2.0) * data.real
+ np.exp(1j * (np.pi / 2.0 + phase_imbalance / 2.0)) * data.imag
)
# Apply DC offset
imbalanced_data = data + (10 ** (dc_offset / 20.0) * data.real + 1j * 10 ** (dc_offset / 20.0) * data.imag)
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=imbalanced_data, metadata=signal.metadata)
else:
return imbalanced_data
def resample(signal: ArrayLike | Recording, up: Optional[int] = 4, down: Optional[int] = 2) -> np.ndarray | Recording:
"""Resample a signal using polyphase filtering.
Uses scipy.signal.resample_poly to upsample the signal by the
factor *up*, apply a zero-phase low-pass FIR filter, and downsample the
signal by the factor *down*.
:param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
is the length of the IQ examples.
:type signal: array_like or utils.data.Recording
:param up: The upsampling factor. Default is 4.
:type up: int, optional
:param down: The downsampling factor. Default is 2.
:type down: int, optional
:raises ValueError: If `signal` is not CxN complex.
:return: A numpy array which represents the resampled signal If `signal` is a `Recording`,
returns a `Recording object` with its `data` attribute containing the resampled array.
:rtype: np.ndarray or utils.data.Recording
>>> rec = Recording(data=[[1+1j, 2+2j]])
>>> new_rec = resample(rec, 2, 1)
>>> new_rec.data
array([[1.00051747+1.00051747j, 1.90020207+1.90020207j]])
"""
# TODO: Additional info needs to be added to docstring description
if isinstance(signal, Recording):
data = signal.data
else:
data = np.asarray(signal)
if data.ndim == 2 and np.iscomplexobj(data):
c, n = data.shape
else:
raise ValueError("signal must be CxN complex.")
if c == 1:
data = np.squeeze(data)
resampled_iqdata = resample_poly(x=data, up=up, down=down)
# Reshape array so that slicing operations work on resampled data
resampled_iqdata = np.reshape(resampled_iqdata, newshape=(1, len(resampled_iqdata)))
if resampled_iqdata.shape[1] > n:
resampled_iqdata = resampled_iqdata[:, :n]
else:
empty_array = np.zeros(resampled_iqdata.shape, dtype=resampled_iqdata.dtype)
empty_array[:, : resampled_iqdata.shape[1]] = resampled_iqdata
else:
raise NotImplementedError
if isinstance(signal, Recording):
return Recording(data=resampled_iqdata, metadata=signal.metadata)
else:
return resampled_iqdata

View File

@ -0,0 +1,9 @@
"""
The Helpers module contains a bunch of helper functions, including array conversion utilities.
"""
__all__ = [
"bytes_to_samples",
]
from .bytes_to_samples import bytes_to_samples

View File

@ -0,0 +1,80 @@
"""
IQ data represents the in-phase (I) and quadrature (Q) components of a signal. There are two ways to represent
single-channel IQ signals:
#. **Complex 1xN Format:** In the complex 1xN format, the IQ data is represented as a 2D array of complex numbers with
shape 1xN. In this format, the real part of each complex number represents the in-phase component, while the
imaginary part represents the quadrature component.
#. **Real 2xN Format:** In the real 2xN format, the IQ data is represented as a 2D array of real numbers with shape
2xN. In this format, the first row contains the in-phase components, while the second row contains the quadrature
components.
This submodule provides functions to verify and convert between these two formats.
"""
import numpy as np
from numpy.typing import ArrayLike
def convert_to_2xn(arr: np.ndarray) -> np.ndarray:
"""Convert arr to the real 2xN format. If arr is already real 2xN, then you'll get back a copy.
:param arr: Array of IQ samples, in the complex 1XN format.
:type arr: array_like
:return: The provided signal, in the real 2xN format.
:rtype: np.ndarray
"""
if is_1xn(arr):
return np.vstack((np.real(arr[0]), np.imag(arr[0])))
elif is_2xn(arr):
return np.copy(arr)
else:
raise ValueError("arr is neither complex 1xN nor real 2xN.")
def convert_to_1xn(arr: np.ndarray) -> np.ndarray:
"""Convert arr to the complex 1xN format. If arr is already complex 1xN, then you'll get back a copy.
:param arr: Array of IQ samples, in the real 2xN format.
:type arr: np.ndarray
:return: The provided signal, in the complex 1xN format.
:rtype: np.ndarray
"""
if is_2xn(arr):
return np.expand_dims(a=arr[0, :] + 1j * arr[1, :], axis=0)
elif is_1xn(arr):
return np.copy(arr)
else:
raise ValueError("arr is neither complex 1xN nor real 2xN.")
def is_1xn(arr: ArrayLike) -> bool:
"""
:return: True is arr is complex 1xN, False otherwise.
:rtype: bool
"""
a = np.asarray(arr)
if a.ndim == 2 and a.shape[0] == 1 and np.iscomplexobj(a):
return True
else:
return False
def is_2xn(arr: ArrayLike) -> bool:
"""
:return: True is arr is real 2xN, False otherwise.
:rtype: bool
"""
a = np.asarray(arr)
if a.ndim == 2 and a.shape[0] == 2 and not np.iscomplexobj(a):
return True
else:
return False

View File

@ -0,0 +1,18 @@
from numpy.typing import NDArray
def bytes_to_samples(data: bytes) -> NDArray:
"""Convert bytes to IQ samples, in the complex 1xN format.
:param data: Array of bytes
:type data: bytes
:return: Tape of IQ samples, as numpy complex type
:rtype: np.ndarray
"""
# samples = np.frombuffer(data, dtype=np.int16).astype(np.float32)
# samples /= 2048
# samples = samples[::2] + 1j * samples[1::2]
# # samples = samples.view(np.complex64)
# return samples
raise NotImplementedError

View File

@ -0,0 +1,12 @@
"""
The package contains assorted plotting and report generation utilities to help visualize RIA components such as
recordings and radio datasets.
"""
__all__ = [
"view_annotations",
"view_channels",
"view_sig",
]
from .view_signal import view_annotations, view_channels, view_sig

View File

@ -0,0 +1,192 @@
import numpy as np
import plotly.graph_objects as go
import scipy.signal as signal
from plotly.graph_objs import Figure
from scipy.fft import fft, fftshift
from utils.data import Recording
def spectrogram(rec: Recording, thumbnail: bool = False) -> Figure:
"""Create a spectrogram for the recording.
:param rec: Signal to plot.
:type rec: utils.data.Recording
:param thumbnail: Whether to return a small thumbnail version or full plot.
:type thumbnail: bool
:return: Spectrogram, as a Plotly figure.
"""
complex_signal = rec.data[0]
sample_rate = int(rec.metadata.get("sample_rate", 1))
plot_length = len(complex_signal)
# Determine FFT size
if plot_length < 2000:
fft_size = 64
elif plot_length < 10000:
fft_size = 256
elif plot_length < 1000000:
fft_size = 1024
else:
fft_size = 2048
frequencies, times, Sxx = signal.spectrogram(
complex_signal,
fs=sample_rate,
nfft=fft_size,
nperseg=fft_size,
noverlap=fft_size // 8,
scaling="density",
mode="complex",
return_onesided=False,
)
# Convert complex values to amplitude and then to log scale for visualization
Sxx_magnitude = np.abs(Sxx)
Sxx_log = np.log10(Sxx_magnitude + 1e-6)
# Normalize spectrogram values between 0 and 1 for plotting
Sxx_log_shifted = Sxx_log - np.min(Sxx_log)
Sxx_log_norm = Sxx_log_shifted / np.max(Sxx_log_shifted)
# Shift frequency bins and spectrogram rows so frequencies run from negative to positive
frequencies_shifted = np.fft.fftshift(frequencies)
Sxx_shifted = np.fft.fftshift(Sxx_log_norm, axes=0)
fig = go.Figure(
data=go.Heatmap(
z=Sxx_shifted,
x=times / 1e6,
y=frequencies_shifted,
colorscale="Viridis",
zmin=0,
zmax=1,
reversescale=False,
showscale=False,
)
)
if thumbnail:
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)
fig.update_layout(
template="plotly_dark",
width=200,
height=100,
margin=dict(l=5, r=5, t=5, b=5),
xaxis=dict(scaleanchor=None),
yaxis=dict(scaleanchor=None),
)
else:
fig.update_layout(
title="Spectrogram",
xaxis_title="Time [s]",
yaxis_title="Frequency [Hz]",
template="plotly_dark",
height=300,
width=800,
)
return fig
def iq_time_series(rec: Recording) -> Figure:
"""Create a time series plot of the real and imaginary parts of signal.
:param rec: Signal to plot.
:type rec: utils.data.Recording
:return: Time series plot as a Plotly figure.
"""
complex_signal = rec.data[0]
sample_rate = int(rec.metadata.get("sample_rate", 1))
plot_length = len(complex_signal)
t = np.arange(0, plot_length, 1) / sample_rate
fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=complex_signal.real, mode="lines", name="I (In-phase)", line=dict(width=0.6)))
fig.add_trace(go.Scatter(x=t, y=complex_signal.imag, mode="lines", name="Q (Quadrature)", line=dict(width=0.6)))
fig.update_layout(
title="IQ Time Series",
xaxis_title="Time [s]",
yaxis_title="Amplitude",
template="plotly_dark",
height=300,
width=800,
showlegend=True,
)
return fig
def frequency_spectrum(rec: Recording) -> Figure:
"""Create a frequency spectrum plot from the recording.
:param rec: Input signal to plot.
:type rec: utils.data.Recording
:return: Frequency spectrum as a Plotly figure.
"""
complex_signal = rec.data[0]
center_frequency = int(rec.metadata.get("center_frequency", 0))
sample_rate = int(rec.metadata.get("sample_rate", 1))
epsilon = 1e-10
spectrum = np.abs(fftshift(fft(complex_signal)))
freqs = np.linspace(-sample_rate / 2, sample_rate / 2, len(complex_signal)) + center_frequency
log_spectrum = np.log10(spectrum + epsilon)
scaled_log_spectrum = (log_spectrum - log_spectrum.min()) / (log_spectrum.max() - log_spectrum.min())
fig = go.Figure()
fig.add_trace(go.Scatter(x=freqs, y=scaled_log_spectrum, mode="lines", name="Spectrum", line=dict(width=0.4)))
fig.update_layout(
title="Frequency Spectrum",
xaxis_title="Frequency [Hz]",
yaxis_title="Magnitude",
yaxis_type="log",
template="plotly_dark",
height=300,
width=800,
showlegend=False,
)
return fig
def constellation(rec: Recording) -> Figure:
"""Create a constellation plot from the recording.
:param rec: Input signal to plot.
:type rec: utils.data.Recording
:return: Constellation as a Plotly figure.
"""
complex_signal = rec.data[0]
# Downsample the IQ samples to a target number of points
# This reduces the amount of data plotted, improving performance and interactivity
# without losing significant detail in the constellation visualization.
target_number_of_points = 5000
step = max(1, len(complex_signal) // target_number_of_points)
i_ds = complex_signal.real[::step]
q_ds = complex_signal.imag[::step]
fig = go.Figure()
fig.add_trace(go.Scatter(x=i_ds, y=q_ds, mode="lines", name="Constellation", line=dict(width=0.2)))
fig.update_layout(
title="Constellation",
xaxis_title="In-phase (I)",
yaxis_title="Quadrature (Q)",
template="plotly_dark",
height=400,
width=400,
showlegend=False,
xaxis=dict(range=[-1.1, 1.1]),
yaxis=dict(range=[-1.1, 1.1]),
)
return fig