Populating initial source code from RIA Utils project

2025-09-02 11:35:41 -04:00 · 2025-09-02 11:35:41 -04:00 · d8a7dc16be
commit d8a7dc16be
parent 25e39d3544
22 changed files with 4912 additions and 0 deletions
--- a/src/ria_toolkit/adt/init.py
+++ b/src/ria_toolkit/adt/init.py
@ -0,0 +1,8 @@
 """
 The Data package contains abstract data types tailored for radio machine learning, such as ``Recording``, as well
 as the abstract interfaces for the radio dataset and radio dataset builder framework.
 """
 __all__ = ["Annotation", "Recording"]
 from .annotation import Annotation
 from .recording import Recording
--- a/src/ria_toolkit/adt/annotation.py
+++ b/src/ria_toolkit/adt/annotation.py
@ -0,0 +1,128 @@
 from __future__ import annotations
 import json
 from typing import Any, Optional
 from sigmf import SigMFFile
 class Annotation:
    """Signal annotations are labels or additional information associated with specific data points or segments within
    a signal. These annotations could be used for tasks like supervised learning, where the goal is to train a model
    to recognize patterns or characteristics in the signal associated with these annotations.
    Annotations can be used to label interesting points in your recording.
    :param sample_start: The index of the starting sample of the annotation.
    :type sample_start: int
    :param sample_count: The index of the ending sample of the annotation, inclusive.
    :type sample_count: int
    :param freq_lower_edge: The lower frequency of the annotation.
    :type freq_lower_edge: float
    :param freq_upper_edge: The upper frequency of the annotation.
    :type freq_upper_edge: float
    :param label: The label that will be displayed with the bounding box in compatible viewers including IQEngine.
     Defaults to an emtpy string.
    :type label: str, optional
    :param comment: A human-readable comment. Defaults to an empty string.
    :type comment: str, optional
    :param detail: A dictionary of user defined annotation-specific metadata. Defaults to None.
    :type detail: dict, optional
    """
    def __init__(
        self,
        sample_start: int,
        sample_count: int,
        freq_lower_edge: float,
        freq_upper_edge: float,
        label: Optional[str] = "",
        comment: Optional[str] = "",
        detail: Optional[dict] = None,
    ):
        """Initialize a new Annotation instance."""
        self.sample_start = int(sample_start)
        self.sample_count = int(sample_count)
        self.freq_lower_edge = float(freq_lower_edge)
        self.freq_upper_edge = float(freq_upper_edge)
        self.label = str(label)
        self.comment = str(comment)
        if detail is None:
            self.detail = {}
        elif not _is_jsonable(detail):
            raise ValueError(f"Detail object is not json serializable: {detail}")
        else:
            self.detail = detail
    def is_valid(self) -> bool:
        """
        Check that the annotation sample count is > 0 and the freq_lower_edge<freq_upper_edge.
        :returns: True if valid, False if not.
        """
        return self.sample_count > 0 and self.freq_lower_edge < self.freq_upper_edge
    def overlap(self, other):
        """
        Quantify how much the bounding box in this annotation overlaps with another annotation.
        :param other: The other annotation.
        :type other: Annotation
        :returns: The area of the overlap in samples*frequency, or 0 if they do not overlap."""
        sample_overlap_start = max(self.sample_start, other.sample_start)
        sample_overlap_end = min(self.sample_start + self.sample_count, other.sample_start + other.sample_count)
        freq_overlap_start = max(self.freq_lower_edge, other.freq_lower_edge)
        freq_overlap_end = min(self.freq_upper_edge, other.freq_upper_edge)
        if freq_overlap_start >= freq_overlap_end or sample_overlap_start >= sample_overlap_end:
            return 0
        else:
            return (sample_overlap_end - sample_overlap_start) * (freq_overlap_end - freq_overlap_start)
    def area(self):
        """
        The 'area' of the bounding box, samples*frequency.
        Useful to quantify annotation size.
        :returns: sample length multiplied by bandwidth."""
        return self.sample_count * (self.freq_upper_edge - self.freq_lower_edge)
    def __eq__(self, other: Annotation) -> bool:
        return self.__dict__ == other.__dict__
    def to_sigmf_format(self):
        """
        Returns a JSON dictionary representing this annotation formatted to be saved in a .sigmf-meta file.
        """
        annotation_dict = {SigMFFile.START_INDEX_KEY: self.sample_start, SigMFFile.LENGTH_INDEX_KEY: self.sample_count}
        annotation_dict["metadata"] = {
            SigMFFile.LABEL_KEY: self.label,
            SigMFFile.COMMENT_KEY: self.comment,
            SigMFFile.FHI_KEY: self.freq_upper_edge,
            SigMFFile.FLO_KEY: self.freq_lower_edge,
            "ria:detail": self.detail,
        }
        if _is_jsonable(annotation_dict):
            return annotation_dict
        else:
            raise ValueError("Annotation dictionary was not json serializable.")
 def _is_jsonable(x: Any) -> bool:
    """
    :return: True if x is JSON serializable, False otherwise.
    """
    try:
        json.dumps(x)
        return True
    except (TypeError, OverflowError):
        return False
--- a/src/ria_toolkit/adt/datasets/init.py
+++ b/src/ria_toolkit/adt/datasets/init.py
@ -0,0 +1,12 @@
 """
 The Radio Dataset Subpackage defines the abstract interfaces and framework components for the management of machine
 learning datasets tailored for radio signal processing.
 """
 __all__ = ["RadioDataset", "IQDataset", "SpectDataset", "DatasetBuilder", "split", "random_split"]
 from .dataset_builder import DatasetBuilder
 from .iq_dataset import IQDataset
 from .radio_dataset import RadioDataset
 from .spect_dataset import SpectDataset
 from .split import random_split, split
--- a/src/ria_toolkit/adt/datasets/dataset_builder.py
+++ b/src/ria_toolkit/adt/datasets/dataset_builder.py
@ -0,0 +1,137 @@
 """
 A `DatasetBuilder` is a creator class that manages the download, preparation, and creation of radio datasets.
 """
 from abc import ABC, abstractmethod
 from typing import Any, Optional
 from packaging.version import Version
 from utils._utils.abstract_attribute import abstract_attribute
 from utils.data.datasets.license.dataset_license import DatasetLicense
 from utils.data.datasets.radio_dataset import RadioDataset
 class DatasetBuilder(ABC):
    """Abstract interface for radio dataset builders. These builder produce radio datasets for common and project
    datasets related to radio science.
    This class should not be instantiated directly. Instead, subclass it to define specific builders for different
    datasets.
    """
    _url: str = abstract_attribute()
    _SHA256: str  # SHA256 checksum.
    _name: str = abstract_attribute()
    _author: str = abstract_attribute()
    _license: DatasetLicense = abstract_attribute()
    _version: Version = abstract_attribute()
    _latest_version: Version = None
    def __init__(self):
        super().__init__()
    @property
    def name(self) -> str:
        """
        :return: The name of the dataset.
        :type: str
        """
        return self._name
    @property
    def author(self) -> str:
        """
        :return: The author of the dataset.
        :type: str
        """
        return self._author
    @property
    def url(self) -> str:
        """
        :return: The URL where the dataset was accessed.
        :type: str
        """
        return self._url
    @property
    def sha256(self) -> Optional[str]:
        """
        :return: The SHA256 checksum, or None if not set.
        :type: str
        """
        return self._SHA256
    @property
    def md5(self) -> Optional[str]:
        """
        :return: The MD5 checksum, or None if not set.
        :type: str
        """
        return self._MD5
    @property
    def version(self) -> Version:
        """
        :return: The version identifier of the dataset.
        :type: Version Identifier
        """
        return self._version
    @property
    def latest_version(self) -> Optional[Version]:
        """
        :return: The version identifier of the latest available version of the dataset, or None if not set.
        :type: Version Identifier or None
        """
        return self._latest_version
    @property
    def license(self) -> DatasetLicense:
        """
        :return: The dataset license information.
        :type: DatasetLicense
        """
        return self._license
    @property
    def info(self) -> dict[str, Any]:
        """
        :return: Information about the dataset including the name, author, and version of the dataset.
        :rtype: dict
        """
        # TODO: We should increase the amount of information that's included here. See the information included in
        #  tdfs.core.DatasetInfo for more: https://www.tensorflow.org/datasets/api_docs/python/tfds/core/DatasetInfo.
        return {
            "name": self.name,
            "author": self.author,
            "url": self.url,
            "sha256": self.sha256,
            "md5": self.md5,
            "version": self.version,
            "license": self.license,
            "latest_version": self.latest_version,
        }
    @abstractmethod
    def download_and_prepare(self) -> None:
        """Download and prepare the dataset for use as an HDF5 source file.
        Once an HDF5 source file has been prepared, the downloaded files are deleted.
        """
        pass
    @abstractmethod
    def as_dataset(self, backend: str) -> RadioDataset:
        """A factory method to manage the creation of radio datasets.
        :param backend: Backend framework to use ("pytorch" or "tensorflow").
        :type backend: str
        Note: Depending on your installation, not all backends may be available.
        :return: A new RadioDataset based on the signal representation and specified backend.
        :type: RadioDataset
        """
        pass
--- a/src/ria_toolkit/adt/datasets/h5helpers.py
+++ b/src/ria_toolkit/adt/datasets/h5helpers.py
@ -0,0 +1,221 @@
 import os
 import h5py
 import numpy as np
 def copy_dataset_entry_by_index(
    source: str | os.PathLike, destination: str | os.PathLike, dataset_path: str, idx: int
 ) -> None:
    """
    Copies an entry from a dataset based on an index from the source HDF5 file to the destination HDF5 file.
    :param source: The name of the original HDF5 file.
    :type source: str
    :param destination: The name of the new HDF5 file.
    :type destination: str
    :param dataset_path: The path of the dataset from the root of the file.
    :type dataset_path: str
    :param idx: The index of the specified example.
    :type idx: int
    :return: None
    """
    # TODO: Generalize so that source and destination can be file objects or strings
    with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
        original_ds = original_file[dataset_path]
        entry = original_ds[idx]
        new_ds = new_file[dataset_path]
        new_ds.resize(new_ds.shape[0] + 1, axis=0)
        new_ds[-1] = entry
 def copy_over_example(source: str | os.PathLike, destination: str | os.PathLike, idx: int) -> None:
    """
    Copies over an example and it's corresponding metadata located at the given index to a new file.
        It appends the new example to the end of the new file.
    :param source: The name of the original HDF5 file.
    :type source: str or os.PathLike
    :param destination: The name of the new HDF5 file.
    :type destination: str or os.PathLike
    :param idx: The index of the example within the dataset.
    :type idx: int
    :return: None
    """
    with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
        ds, md = original_file["data"], original_file["metadata/metadata"]
        new_ds, new_md = new_file["data"], new_file["metadata/metadata"]
        new_ds.resize(new_ds.shape[0] + 1, axis=0)
        new_md.resize(new_md.shape[0] + 1, axis=0)
        new_ds[-1], new_md[-1] = ds[idx], md[idx]
 def append_entry_inplace(source: str | os.PathLike, dataset_path: str, entry: np.ndarray) -> None:
    """
    Appends an entry to the specified dataset of the source HDF5 file. This operation is done inplace.
    :param source: The name of the source HDF5 file.
    :type source: str or os.PathLike
    :param dataset_path: The path of the dataset from the root of the file.
    :type dataset_path: str
    :param entry: The entry that is being copied.
    :type entry: np.ndarray
    :return: None
    """
    # TODO: Generalize so that source can be file object or string
    with h5py.File(source, "a") as new_file:
        new_ds = new_file[dataset_path]
        new_ds.resize(new_ds.shape[0] + 1, axis=0)
        new_ds[-1] = entry
 def duplicate_entry_inplace(source: str | os.PathLike, dataset_path: str, idx: int) -> None:
    """
    Appends the entry at index to the end of the dataset. This operation is done inplace.
    :param source: The name of the source HDF5 file.
    :type source: str or os.PathLike
    :param dataset_path: The path of the dataset from the root of the file. This dataset is usually
      'data' or 'metadata/metadata'.
    :type dataset_path: str
    :param idx: The index of the example within the dataset.
    :type idx: int
    :return: None
    """
    # This function appends to dataset, so upon dataset creation, chunks has to = True and max_size has to = None
    with h5py.File(source, "a") as f:
        ds = f[dataset_path]
        entry = ds[idx]
        ds.resize(ds.shape[0] + 1, axis=0)
        ds[-1] = entry
 def copy_file(original_source: str | os.PathLike, new_source: str | os.PathLike) -> None:
    """Copies contents of source HDF5 file to a new HDF5 file.
    :param original_source: The name of the original HDF5 source file.
    :type original_source: str or os.PathLike
    :param new_source: The copy of the HDF5 source file.
    :type new_source: str or os.PathLike
    :return: None
    """
    original_file = h5py.File(original_source, "r")
    with h5py.File(new_source, "w") as new_file:
        for key in original_file.keys():
            original_file.copy(key, new_file)
    original_file.close()
 def make_empty_clone(original_source: str | os.PathLike, new_source: str | os.PathLike, example_length: int) -> None:
    """Creates a new HDF5 file with the same structure but will leave metadata and dataset empty for operations.
    :param original_source: The name of the original HDF5 source file.
    :type original_source: str or os.PathLike
    :param new_source: The name of the new HDF5 source file.
    :type new_source: str or os.PathLike
    :param example_length: The desired length of an example in the new file.
    :type example_length: int
    :return: None
    """
    with h5py.File(new_source, "w") as new_file, h5py.File(original_source, "r") as original_file:
        for key in original_file.keys():
            if key == "data":
                ds = original_file["data"]
                channels = ds.shape[1]
                new_file.create_dataset(
                    "data",
                    shape=(0, channels, example_length),
                    chunks=True,
                    maxshape=(None, None, None),
                    dtype=original_file["data"].dtype,
                )
            elif key == "metadata":
                new_metadata_group = new_file.create_group("metadata")
                new_metadata_group.create_dataset(
                    "metadata",
                    shape=(0,),
                    chunks=True,
                    maxshape=(None,),
                    dtype=original_file["metadata/metadata"].dtype,
                )
            else:
                original_file.copy(key, new_file)
 def delete_example_inplace(source: str | os.PathLike, idx: int) -> None:
    """Deletes an example and it's corresponding metadata located at the given index.
        This deletion is done by creating a temporary dataset and copying all contents
        to the temporary dataset except for the example at idx. This operation is inplace.
    :param source: The name of the source HDF5 file.
    :type source: str or os.PathLike
    :param idx: The index of the example and metadata to be deleted.
    :type idx: int
    :return: None
    """
    with h5py.File(source, "a") as f:
        ds, md = f["data"], f["metadata/metadata"]
        m, c, n = ds.shape
        assert 0 <= idx <= m - 1
        assert len(ds) == len(md)
        new_ds = f.create_dataset(
            "data.temp",
            shape=(m - 1, c, n),
            chunks=True,
            dtype=ds.dtype,
            maxshape=(None, None, None),  # Required to allow future mutations which expand the shape
        )
        new_md = f.create_dataset(
            "metadata/metadata.temp", shape=len(md) - 1, chunks=True, dtype=md.dtype, maxshape=(None,)
        )
        for row in range(idx):
            new_ds[row], new_md[row] = ds[row], md[row]
        for row in range(idx + 1, len(md)):
            new_ds[row - 1], new_md[row - 1] = ds[row], md[row]
        del f["data"]
        del f["metadata/metadata"]
        f.move("data.temp", "data")
        f.move("metadata/metadata.temp", "metadata/metadata")
 def overwrite_file(source: str | os.PathLike, new_data: np.ndarray) -> None:
    """
    Overwrites data in an HDF5 file with new data.
    :param source: The copy of the HDF5 source file.
    :type source: str or os.PathLike
    :param new_data: The updated copy of the data that should be stored.
    :type new_data: np.ndarray
    :return: None
    """
    # TODO: Might need to pass in dataset_path instead of datastet_name depending on file structure
    # Update copy to include augmented data
    with h5py.File(source, "r+") as f:
        ds_name = tuple(f.keys())[0]
        del f[ds_name]
        f.create_dataset(ds_name, data=new_data)
        f.close()
--- a/src/ria_toolkit/adt/datasets/iq_dataset.py
+++ b/src/ria_toolkit/adt/datasets/iq_dataset.py
@ -0,0 +1,210 @@
 from __future__ import annotations
 import os
 from abc import ABC
 from typing import Optional
 import h5py
 import numpy as np
 from utils.data.datasets.h5helpers import (
    append_entry_inplace,
    copy_dataset_entry_by_index,
 )
 from utils.data.datasets.radio_dataset import RadioDataset
 class IQDataset(RadioDataset, ABC):
    """An ``IQDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
    radiofrequency (RF) signals represented as In-phase (I) and Quadrature (Q) samples.
    For machine learning tasks that involve processing spectrograms, please use
    utils.data.datasets.SpectDataset instead.
    This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
    should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
    learning backends.
    :param source: Path to the dataset source file. For more information on dataset source files
        and their format, see :doc:`radio_datasets`.
    :type source: str or os.PathLike
    """
    def __init__(self, source: str | os.PathLike):
        """Create a new IQDataset."""
        super().__init__(source=source)
    @property
    def shape(self) -> tuple[int]:
        """IQ datasets are M x C x N, where M is the number of examples, C is the number of channels, N is the length
         of the signals.
        :return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
            dataset dimensions.
        :type: tuple of ints
        """
        return super().shape
    def trim_examples(
        self, trim_length: int, keep: Optional[str] = "start", inplace: Optional[bool] = False
    ) -> IQDataset | None:
        """Trims all examples in a dataset to a desired length.
        :param trim_length: The desired length of the trimmed examples.
        :type trim_length: int
        :param keep: Specifies the part of the example to keep. Defaults to "start".
            The options are:
            - "start"
            - "end"
            - "middle"
            - "random"
        :type keep: str, optional
        :param inplace: If True, the operation modifies the existing source file directly and returns None.
            If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
            dataset unchanged. Default is False.
        :type inplace: bool
        :raises ValueError: If trim_length is greater than or equal to the length of the examples.
        :raises ValueError: If value of keep is not recognized.
        :raises ValueError: If specified trim length is invalid for middle index.
        :return: The dataset that is composed of shorter examples.
        :rtype: IQDataset
         **Examples:**
        >>> from ria.dataset_manager.builders import AWGN_Builder()
        >>> builder = AWGN_Builder()
        >>> builder.download_and_prepare()
        >>> ds = builder.as_dataset()
        >>> ds.shape
        (5, 1, 3)
        >>> new_ds = ds.trim_examples(2)
        >>> new_ds.shape
        (5, 1, 2)
        """
        keep = keep.lower()
        channels, example_length = np.shape(self[0])
        if trim_length >= example_length:
            raise ValueError(f"Trim length must be less than {example_length}")
        if keep not in {"start", "end", "middle", "random"}:
            raise ValueError('keep must be "start", "end", "middle", or "random"')
        start = None
        if keep == "middle":
            start = int(example_length / 2)
            if start + trim_length > example_length:
                raise ValueError(f"Trim length of {trim_length} is invalid for middle index of: {start} ")
        elif keep == "random":
            start = np.random.randint(0, example_length - trim_length + 1)
        if not inplace:
            ds = self._create_next_dataset(example_length=trim_length)
        with h5py.File(self.source, "a") as f:
            data = f["data"]
            for idx in range(len(self)):
                trimmed_example = generate_trimmed_example(
                    example=data[idx],
                    keep=keep,
                    trim_length=trim_length,
                    start=start,
                )
                if not inplace:
                    append_entry_inplace(source=ds.source, dataset_path="data", entry=trimmed_example)
                    copy_dataset_entry_by_index(
                        source=self.source, destination=ds.source, dataset_path="metadata/metadata", idx=idx
                    )
                else:
                    trimmed_example = np.pad(
                        trimmed_example, ((0, 0), (0, example_length - trim_length)), "constant", constant_values=0
                    )
                    data[idx] = trimmed_example
            if not inplace:
                return ds
            else:
                data.resize(trim_length, axis=2)
    def split_examples(
        self, split_factor: Optional[int] = None, example_length: Optional[int] = None, inplace: Optional[bool] = False
    ) -> IQDataset | None:
        """If the current example length is not evenly divisible by the provided example_length, excess samples are
        discarded. Excess examples are always at the end of the slice. If the split factor results in non-integer
        example lengths for the new example chunks, it rounds down.
            For example:
            Requires either split_factor or example_length to be specified but not both. If both are provided,
            split factor will be used by default, and a warning will be raised.
        :param split_factor: the number of new example chunks produced from each original example, defaults to None.
        :type split_factor: int, optional
        :param example_length: the example length of the new example chunks, defaults to None.
        :type example_length: int, optional
        :param inplace: If True, the operation modifies the existing source file directly and returns None.
            If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
            dataset unchanged. Default is False.
        :type inplace: bool, optional
        :return: A dataset with more examples that are shorter.
        :rtype: IQDataset
        **Examples:**
        If the dataset has 100 examples of length 1024 and the split factor is 2, the resulting dataset
        will have 200 examples of 512. No samples have been discarded.
        If the example dataset has 100 examples of length 1024 and the example length is 100, the resulting dataset
        will have 1000 examples of length 100. The remaining 24 samples from each example have been discarded.
        """
        if split_factor is not None and example_length is not None:
            # Raise warning and use split factor
            raise Warning("split_factor and example_length should not both be specified.")
        if not inplace:
            # ds = self.create_new_dataset(example_length=example_length)
            pass
        raise NotImplementedError
 def generate_trimmed_example(
    example: np.ndarray, keep: str, trim_length: int, start: Optional[int] = None
 ) -> np.ndarray:
    """Takes in an IQ example as input and returns a trimmed example.
    :param example: The example to be trimmed.
    :type example: np.ndarray
    :param keep: The position the trimming occurs from.
    :type keep: str
    :param trim_length: The desired length of the trimmed example:
    :type trim_length: int
    :param start: The starting index if keep = "middle" or "random"
    :type start: int, optional
    :return: The trimmed example
    :rtype: np.ndarray
    """
    if keep == "start":
        return example[:, :trim_length]
    elif keep == "end":
        return example[:, -trim_length:]
    elif keep == "middle":
        return example[:, start : start + trim_length]
    else:
        return example[:, start : start + trim_length]
--- a/src/ria_toolkit/adt/datasets/license/init.py
+++ b/src/ria_toolkit/adt/datasets/license/init.py
@ -0,0 +1,211 @@
 """
 This package contains the ``DatasetLicense`` class and a bunch of off-the-shelf implementations for several common
 license types.
 Common license types for datasets courtesy of the University of Calgary:
 `Common license types for datasets and what they mean <https://libanswers.ucalgary.ca/faq/200582>`_
 .. note::
   License descriptions are provided for informational purposes only and should not be construed as legal advice.
   For legal guidance, please refer to official licence documentation and consult with legal professionals specializing
   in software and dataset licensing.
 .. note::
    When licensing datasets, it's recommended to use licenses specifically designed for data, rather than using
    software licenses such as MIT, Apache, or GPL.
 """
 __all__ = [
    "DatasetLicense",
    "PUBLIC_DOMAIN",
    "CC_0",
    "CC_BY",
    "CC_BY_NC",
    "CC_BY_NC_ND",
    "CC_BY_NC_SA",
    "CC_BY_ND",
    "CC_BY_SA",
    "ODC_BY",
    "ODC_PDDL",
    "ODC_ODbL",
    "RESTRICTED",
 ]
 from .dataset_license import DatasetLicense
 PUBLIC_DOMAIN = DatasetLicense(
    name="Public Domain (No License)",
    identifier=None,
    description="Technically not a license, the public domain mark relinquishes all rights to a dataset and "
    "dedicates the dataset to the public domain.",
    licence="https://creativecommons.org/public-domain/pdm/",
 )
 """
 `Public Domain <https://creativecommons.org/public-domain/pdm/>`_: Technically not a license, the public domain mark
 relinquishes all rights to a dataset and dedicates the dataset to the public domain.
 """
 CC_0 = DatasetLicense(
    name="Creative Commons Public Domain Dedication",
    identifier="CC0-1.0",
    description="A  Creative Commons license and is like a public domain dedication. The copyright holder "
    "surrenders rights in a dataset using this license.",
    licence="https://creativecommons.org/publicdomain/zero/1.0/",
 )
 """
 `Creative Commons Public Domain Dedication <https://creativecommons.org/public-domain/pdm/>`_: A Creative Commons
 license and is like a public domain dedication. The copyright holder surrenders rights in a dataset using this license.
 """
 ODC_PDDL = DatasetLicense(
    name="Open Data Commons Public Domain Dedication and License",
    identifier="PDDL-1.0",
    description="This license is one of the Open Data Commons licenses and is like a public domain dedication. "
    "The copyright holder surrenders rights in a dataset using this license.",
    licence="https://opendatacommons.org/licenses/pddl/",
 )
 """
 `Open Data Commons Public Domain Dedication and License <https://opendatacommons.org/licenses/pddl/>`_: This license
 is one of the Open Data Commons licenses and is like a public domain dedication. The copyright holder surrenders rights
 in a dataset using this license.
 """
 CC_BY = DatasetLicense(
    name="Creative Commons Attribution 4.0 International",
    identifier="CC-BY-4.0",
    description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
    "the dataset so long as they give credit to the copyright holder.",
    licence="https://creativecommons.org/licenses/by/4.0/",
 )
 """
 `Creative Commons Attribution 4.0 International <https://creativecommons.org/licenses/by/4.0/>`_: This license is one
 of the open Creative Commons licenses and allows users to share and adapt the dataset so long as they give credit to
 the copyright holder.
 """
 ODC_BY = DatasetLicense(
    name="Open Data Commons Attribution License",
    identifier="ODC-By-1.0",
    description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
    "dataset as long as they give credit to the copyright holder.",
    licence="https://opendatacommons.org/licenses/by/",
 )
 """
 `Open Data Commons Attribution License <https://opendatacommons.org/licenses/by/>`_: This license is one of the Open
 Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
 holder.
 """
 CC_BY_SA = DatasetLicense(
    name="Creative Commons Attribution-ShareAlike 4.0 International",
    identifier="CC-BY-SA-4.0",
    description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
    "the dataset as long as they give credit to the copyright holder and distribute any additions, "
    "transformations or changes to the dataset under this same license.",
    licence="https://creativecommons.org/licenses/by-sa/4.0/",
 )
 """
 `Creative Commons Attribution-ShareAlike 4.0 International <https://creativecommons.org/licenses/by-sa/4.0/>`_: This
 license is one of the open Creative Commons licenses and allows users to share and adapt the dataset as long as they
 give credit to the copyright holder and distribute any additions, transformations or changes to the dataset under
 this same license.
 """
 ODC_ODbL = DatasetLicense(
    name="Open Data Commons Open Database License",
    identifier="ODbL-1.0",
    description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
    "dataset as long as they give credit to the copyright holder and distribute any additions, "
    "transformation or changes to the dataset.",
    licence="https://opendatacommons.org/licenses/odbl/",
 )
 """
 `Open Data Commons Open Database License <https://opendatacommons.org/licenses/odbl/>`_: This license is one of the
 Open Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
 holder and distribute any additions, transformation or changes to the dataset.
 """
 CC_BY_NC = DatasetLicense(
    name="Creative Commons Attribution-NonCommercial 4.0 International",
    identifier="CC-BY-NC-4.0",
    description="This license is one of the Creative Commons licenses and allows users to share and adapt the "
    "dataset if they give credit to the copyright holder and do not use the dataset for any "
    "commercial purposes.",
    licence="https://creativecommons.org/licenses/by-nc/4.0/",
 )
 """
 `Creative Commons Attribution-NonCommercial 4.0 International <https://creativecommons.org/licenses/by-nc/4.0/>`_: This
 license is one of the Creative Commons licenses and allows users to share and adapt the dataset if they give credit to
 the copyright holder and do not use the dataset for any commercial purposes.
 """
 CC_BY_ND = DatasetLicense(
    name="Creative Commons Attribution-NoDerivatives 4.0 International",
    identifier="CC-BY-ND-4.0",
    description="This license is one of the Creative Commons licenses and allows users to share the dataset if "
    "they give credit to copyright holder, but they cannot make any additions, transformations or "
    "changes to the dataset under this license.",
    licence="https://creativecommons.org/licenses/by-nd/4.0/",
 )
 """
 `Creative Commons Attribution-NoDerivatives 4.0 International <https://creativecommons.org/licenses/by-nd/4.0/>`_: This
 license is one of the Creative Commons licenses and allows users to share the dataset if they give credit to copyright
 holder, but they cannot make any additions, transformations or changes to the dataset under this license.
 """
 CC_BY_NC_SA = DatasetLicense(
    name="Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International",
    identifier="CC-BY-NC-SA-4.0",
    description="This license is one of the Creative Commons licenses and allows users to share the dataset only "
    "if they (1) give credit to the copyright holder, (2) do not use the dataset for any commercial "
    "purposes, and (3) distribute any additions, transformations or changes to the dataset under this "
    "same license.",
    licence="https://creativecommons.org/licenses/by-nc-sa/4.0/",
 )
 """
 `Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
 <https://creativecommons.org/licenses/by-nc-sa/4.0/>`_: This license is one of the Creative Commons licenses and allows
 users to share the dataset only if they (1) give credit to the copyright holder, (2) do not use the dataset for any
 commercial purposes, and (3) distribute any additions, transformations or changes to the dataset under this same
 license.
 """
 CC_BY_NC_ND = DatasetLicense(
    name="Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International",
    identifier="CC-BY-NC-ND-4.0",
    description="This license is one of the Creative Commons licenses and allows users to use only your "
    "unmodified dataset if they give credit to the copyright holder and do not share it for "
    "commercial purposes. Users cannot make any additions, transformations or changes to the dataset"
    "under this license.",
    licence="https://creativecommons.org/licenses/by-nc-nd/4.0/",
 )
 """
 `Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International
 <https://creativecommons.org/licenses/by-nc-nd/4.0/>`_: This license is one of the Creative Commons licenses and allows
 users to use only your unmodified dataset if they give credit to the copyright holder and do not share it for
 commercial purposes. Users cannot make any additions, transformations or changes to the dataset under this license.
 """
 RESTRICTED = DatasetLicense(
    name="Restricted (All Rights Reserved)",
    identifier="Restricted",
    description="All rights reserved. No permissions granted for use, modification, or distribution of the dataset.",
    licence="Restricted (All Rights Reserved)",
 )
 """
 Restricted (All Rights Reserved): No permissions granted for use, modification, or distribution of the dataset.
 """
--- a/src/ria_toolkit/adt/datasets/license/dataset_license.py
+++ b/src/ria_toolkit/adt/datasets/license/dataset_license.py
@ -0,0 +1,13 @@
 from dataclasses import dataclass
@dataclass
 class DatasetLicense:
    """
    Represents a dataset license.
    """
    name: str  #: The name or title of the license.
    identifier: str | None  #: SPDX short identifier, or None if one does not exist.
    description: str  #: A description of the license.
    licence: str  #: Full license text or URL if the license is available online.
--- a/src/ria_toolkit/adt/datasets/radio_dataset.py
+++ b/src/ria_toolkit/adt/datasets/radio_dataset.py
--- a/src/ria_toolkit/adt/datasets/spect_dataset.py
+++ b/src/ria_toolkit/adt/datasets/spect_dataset.py
@ -0,0 +1,57 @@
 from __future__ import annotations
 import os
 from abc import ABC
 from utils.data.datasets.radio_dataset import RadioDataset
 class SpectDataset(RadioDataset, ABC):
    """A ``SpectDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
    radiofrequency (RF) signals represented as spectrograms. This class is integrated with vision frameworks,
    allowing you to leverage models and techniques from the field of computer vision for analyzing and processing
    radio signal spectrograms.
    For machine learning tasks that involve processing on IQ samples, please use
    utils.data.datasets.IQDataset instead.
    This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
    should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
    learning backends.
    :param source: Path to the dataset source file. For more information on dataset source files
        and their format, see :doc:`radio_datasets`.
    :type source: str or os.PathLike
    """
    def __init__(self, source: str | os.PathLike):
        """Create a new SpectDataset."""
        super().__init__(source=source)
    @property
    def shape(self) -> tuple[int]:
        """Spectrogram datasets are M x C x H x W, where M is the number of examples, C is the number of image
        channels, H is the height of the spectrogram, and W is the width of the spectrogram.
        :return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
            dataset dimensions.
        :type: tuple of ints
        """
        return super().shape
    def default_augmentations(self) -> list[callable]:
        """Returns the list of default augmentations for spectrogram datasets.
        .. todo:: This method is not yet implemented.
        :return: A list of default augmentations.
        :rtype: list[callable]
        """
        # Consider the following list of default augmentations:
        # #. horizontal_flip
        # #. vertical_flip
        # #. sharpen
        # #. darken
        # #. lighten
        # #. linear_rotate
        raise NotImplementedError
--- a/src/ria_toolkit/adt/datasets/split.py
+++ b/src/ria_toolkit/adt/datasets/split.py
@ -0,0 +1,317 @@
 import math
 import os
 from collections import Counter
 from typing import Optional
 import numpy as np
 from numpy.random import Generator
 from utils.data.datasets import RadioDataset
 from utils.data.datasets.h5helpers import copy_over_example, make_empty_clone
 def split(dataset: RadioDataset, lengths: list[int | float]) -> list[RadioDataset]:
    """Split a radio dataset into non-overlapping new datasets of given lengths.
    Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
    synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
    longer-form tapes into shorter units called slices.
    For each slice in the dataset, the metadata should include the unique ID of the recording from which the example
    was cut ('rec_id'). To avoid leakage, all examples with the same 'rec_id' are assigned only to one of the new
    datasets. This ensures, for example, that slices cut from the same recording do not appear in both the training
    and test datasets.
    This restriction makes it challenging to generate datasets with the exact lengths specified. To get as close as
    possible, this method uses a greedy algorithm, which assigns the recordings with the most slices first, working
    down to those with the fewest. This may not always provide a perfect split, but it works well in most practical
    cases.
    This function is deterministic, meaning it will always produce the same split. For a random split, see
    utils.data.datasets.random_split.
    :param dataset: Dataset to be split.
    :type dataset: RadioDataset
    :param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
     sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
     provided, and any remainders will be distributed in round-robin fashion.
    :type lengths: list of ints (lengths) or floats (fractions)
    :return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
     'lengths' list.
    :rtype: list of RadioDataset
    **Examples:**
    >>> import random
    >>> import string
    >>> import numpy as np
    >>> import pandas as pd
    >>> from utils.data.datasets import split
    First, let's generate some random data:
    >>> shape = (24, 1, 1024)  # 24 examples, each of length 1024
    >>> real_part, imag_part = np.random.randint(0, 12, size=shape), np.random.randint(0, 79, size=shape)
    >>> data = real_part + 1j * imag_part
    Then, a list of recording IDs. Let's pretend this data was cut from 4 separate recordings:
    >>> rec_id_options = [''.join(random.choices(string.ascii_lowercase + string.digits, k=256)) for _ in range(4)]
    >>> rec_id = [np.random.choice(rec_id_options) for _ in range(shape[0])]
    Using this data and metadata, let's initialize a dataset:
    >>> metadata = pd.DataFrame(data={"rec_id": rec_id}).to_records(index=False)
    >>> fid = os.path.join(os.getcwd(), "source_file.hdf5")
    >>> ds = RadioDataset(source=fid)
    Finally, let's do an 80/20 train-test split:
    >>> train_ds, test_ds = split(ds, lengths=[0.8, 0.2])
    """
    if not isinstance(dataset, RadioDataset):
        raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
    lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
    if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
        raise ValueError("Dataset missing string field 'rec_id'.")
    rec_ids = dict(Counter(dataset.metadata["rec_id"]))
    if len(rec_ids) < len(lengths_):
        raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
    # Sort the rec_ids in descending order by frequency.
    ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
    sorted_indices = np.flip(np.argsort(freqs))
    sorted_rec_ids = [ids[x] for x in sorted_indices]
    sorted_freqs = [freqs[x] for x in sorted_indices]
    # Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
    split_key_ids = [[] for _ in range(len(lengths_))]
    split_key_freqs = [[] for _ in range(len(lengths_))]
    for i in range(len(rec_ids)):
        # Find the subset whose current length is farthest from its target length.
        current_lengths = [sum(subkey) for subkey in split_key_freqs]
        diffs = [lengths_[j] - current_lengths[j] for j in range(len(lengths_))]
        index = np.argmax(diffs)
        # Add the 'rec_id' with the highest frequency to the subset farthest from its target.
        split_key_freqs[index].append(sorted_freqs[i])
        split_key_ids[index].append(sorted_rec_ids[i])
    _validate_sublists(list_of_lists=split_key_ids, ids=ids)
    return _split_datasets(dataset=dataset, key=split_key_ids)
 def random_split(
    dataset: RadioDataset, lengths: list[int | float], generator: Optional[Generator] = None
 ) -> list[RadioDataset]:
    """Randomly split a radio dataset into non-overlapping new datasets of given lengths.
    Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
    synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
    longer-form tapes into shorter units called slices.
    For each slice in the dataset, the metadata should include the unique recording ID ('rec_id') of the recording
    from which the example was cut. To avoid leakage, all examples with the same 'rec_id' are assigned only to one of
    the new datasets. This ensures, for example, that slices cut from the same recording do not appear in both the
    training and test datasets.
    This restriction makes it unlikely that a random split will produce datasets with the exact lengths specified.
    If it is important to ensure the closest possible split, consider using utils.data.datasets.split instead.
    :param dataset: Dataset to be split.
    :type dataset: RadioDataset
    :param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
     sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
     provided, and any remainders will be distributed in round-robin fashion.
    :type lengths: list of ints (lengths) or floats (fractions)
    :param generator: Random generator. Defaults to None.
    :type generator: NumPy Generator Object, optional.
    :return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
     'lengths' list.
    :rtype: list of RadioDataset
    See Also:
        utils.data.datasets.split: Usage is the same as for ``random_split()``.
    """
    if not isinstance(dataset, RadioDataset):
        raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
    lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
    if generator is None:
        rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
    else:
        rng = generator
    if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
        raise ValueError("Dataset missing string field 'rec_id'.")
    rec_ids = dict(Counter(dataset.metadata["rec_id"]))
    if len(rec_ids) < len(lengths_):
        raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
    ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
    sorted_indices = np.flip(np.argsort(freqs))
    sorted_rec_ids = [ids[x] for x in sorted_indices]
    sorted_freqs = [freqs[x] for x in sorted_indices]
    # Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
    n = len(lengths_)
    split_key_ids = [[] for _ in range(n)]
    split_key_freqs = [[] for _ in range(n)]
    # Taking from the bottom (least frequent), assign one recording to each subset. This is important to ensure we
    # don't end up with any empty subsets, and serves to help randomize the results.
    top_rec_ids, bottom_rec_ids = sorted_rec_ids[:-n], sorted_rec_ids[-n:]
    top_freqs, bottom_freqs = sorted_freqs[:-n], sorted_freqs[-n:]
    bottom_indices = rng.permutation(x=np.asarray(range(n)))
    for i in range(n):
        split_key_freqs[i].append(bottom_freqs[bottom_indices[i]])
        split_key_ids[i].append(bottom_rec_ids[bottom_indices[i]])
    for i in range(len(top_rec_ids)):
        # Find the subset whose current length is farthest from its target length.
        current_lengths = np.array([sum(subkey) for subkey in split_key_freqs])
        diffs = np.array([lengths_[j] - current_lengths[j] for j in range(n)])
        # Use the normalized diffs as probabilities. This results in a higher probability for larger diffs.
        diffs = np.asarray([0 if d < 0 else d for d in diffs])  # Don't add to full or overfull subsets.
        probabilities = diffs / sum(diffs)
        index = rng.choice(range(n), p=probabilities)
        # Add the 'rec_id' with the highest frequency to the chosen subset.
        split_key_freqs[index].append(top_freqs[i])
        split_key_ids[index].append(top_rec_ids[i])
    _validate_sublists(list_of_lists=split_key_ids, ids=ids)
    return _split_datasets(dataset=dataset, key=split_key_ids, generator=rng)
 def _validate_lengths(dataset: RadioDataset, lengths: list[int | float]) -> list[int]:
    """Validate lengths. If lengths are fractions of splits, lengths will be computed automatically.
    :param dataset: Dataset to be split.
    :type dataset: RadioDataset
    :param: lengths: Lengths or fractions of splits to be produced.
    :type lengths: list of ints (lengths) or floats (fractions)
    :return: List of lengths to be produced.
    :rtype: list of ints
    """
    if not isinstance(lengths, list):
        raise ValueError(f"'lengths' must be a list of ints or a list of floats, got {type(lengths)}.")
    if len(lengths) < 2:
        raise ValueError("'lengths' list must contain at least 2 elements.")
    if not all(isinstance(sub, type(lengths[0])) for sub in lengths[1:]):
        raise ValueError("All elements of 'lengths' must be of the same type.")
    if sum(lengths) == len(dataset):
        return [int(i) for i in lengths]
    elif math.isclose(sum(lengths), 1, abs_tol=1e-9):
        # Fractions of splits, which add to 1.
        lengths_ = [math.floor(f * len(dataset)) for f in lengths]
        # Distribute remainders in round-robin fashion to the lengths until there are no remainders left.
        i = 0
        while len(dataset) > sum(lengths_):
            lengths_[i] = lengths_[i] + 1
            i = i + 1
        return lengths_
    else:
        raise ValueError("'lengths' must sum to either the length of 'dataset' or 1.")
 def _validate_sublists(list_of_lists: list[list[str]], ids: list[str]) -> None:
    """Ensure that each ID is present in one and only one sublist."""
    all_elements = [item for sublist in list_of_lists for item in sublist]
    assert len(all_elements) == len(set(all_elements)) and list(set(ids)).sort() == list(set(all_elements)).sort()
 def _generate_split_source_filenames(
    parent_dataset: RadioDataset, n_new_datasets: int, generator: Generator
 ) -> list[str]:
    """Generate source filenames for each new dataset.
    Examples:
    .../file_name.hdf5 -> [
        .../file_name.split66ce07f-0.hdf5,
        .../file_name.split66ce07f-1.hdf5,
        .../file_name.split66ce07f-2.hdf5
    ]
    .../file_name.002.hdf5 -> [
        .../file_name.002.split156afd7-0.hdf5,
        .../file_name.002.split156afd7-1.hdf5,
        .../file_name.002.split156afd7-2.hdf5
    ]
    """
    parent_file_name = str(parent_dataset.source)
    parent_base_name = os.path.splitext(parent_file_name)[0]
    random_tag = generator.bytes(length=4).hex()[:7]
    return [f"{parent_base_name}.split{random_tag}-{i}.hdf5" for i in range(n_new_datasets)]
 def _split_datasets(
    dataset: RadioDataset, key: list[list[str]], generator: Optional[Generator] = None
 ) -> list[RadioDataset]:
    """Once we know how we'd like to split up the dataset (i.e., which slices are to be included in which new
    dataset), this helper function does the actual split.
    :param dataset: Dataset to be split.
    :type dataset: RadioDataset
    :param key: A key indicating which slices are to be included in which dataset. This is a list of lists, where
     each sublist contains the recordings IDs of the slices to be included in the corresponding subset.
    :type key: A list of lists
    :param generator: Random generator. Defaults to None.
    :type generator: NumPy Generator Object, optional.
    :return: Non-overlapping datasets
    :rtype: list of RadioDataset
    """
    if generator is None:
        rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
    else:
        rng = generator
    new_source_filenames = _generate_split_source_filenames(
        parent_dataset=dataset, n_new_datasets=len(key), generator=rng
    )
    for new_source in new_source_filenames:
        make_empty_clone(original_source=dataset.source, new_source=new_source, example_length=len(dataset.data[0, 0]))
    new_datasets = [dataset.__class__(source=new_source) for new_source in new_source_filenames]
    rec_ids = list(dataset.metadata["rec_id"])
    for i, sublist in enumerate(key):
        for rec_id in sublist:
            # The examples at these indices are part of the corresponding new dataset.
            indices = [index for index, value in enumerate(rec_ids) if value == rec_id]
            for idx in indices:
                copy_over_example(source=dataset.source, destination=new_datasets[i].source, idx=idx)
    return new_datasets
--- a/src/ria_toolkit/adt/recording.py
+++ b/src/ria_toolkit/adt/recording.py
@ -0,0 +1,763 @@
 from __future__ import annotations
 import copy
 import datetime
 import hashlib
 import json
 import os
 import re
 import time
 import warnings
 from typing import Any, Iterator, Optional
 import numpy as np
 from numpy.typing import ArrayLike
 from quantiphy import Quantity
 from utils.data.annotation import Annotation
 PROTECTED_KEYS = ["rec_id", "timestamp"]
 class Recording:
    """Tape of complex IQ (in-phase and quadrature) samples with associated metadata and annotations.
    Recording data is a complex array of shape C x N, where C is the number of channels
    and N is the number of samples in each channel.
    Metadata is stored in a dictionary of key value pairs,
    to include information such as sample_rate and center_frequency.
    Annotations are a list of :ref:`Annotation <utils.data.Annotation>`,
    defining bounding boxes in time and frequency with labels and metadata.
    Here, signal data is represented as a NumPy array. This class is then extended in the RIA Backends to provide
    support for different data structures, such as Tensors.
    Recordings are long-form tapes can be obtained either from a software-defined radio (SDR) or generated
    synthetically. Then, machine learning datasets are curated from collection of recordings by segmenting these
    longer-form tapes into shorter units called slices.
    All recordings are assigned a unique 64-character recording ID, ``rec_id``. If this field is missing from the
    provided metadata, a new ID will be generated upon object instantiation.
    :param data: Signal data as a tape IQ samples, either C x N complex, where C is the number of
        channels and N is number of samples in the signal. If data is a one-dimensional array of complex samples with
        length N, it will be reshaped to a two-dimensional array with dimensions 1 x N.
    :type data: array_like
    :param metadata: Additional information associated with the recording.
    :type metadata: dict, optional
    :param annotations: A collection of ``Annotation`` objects defining bounding boxes.
    :type annotations: list of Annotations, optional
    :param dtype: Explicitly specify the data-type of the complex samples. Must be a complex NumPy type, such as
        ``np.complex64`` or ``np.complex128``. Default is None, in which case the type is determined implicitly. If
        ``data`` is a NumPy array, the Recording will use the dtype of ``data`` directly without any conversion.
    :type dtype: numpy dtype object, optional
    :param timestamp: The timestamp when the recording data was generated. If provided, it should be a float or integer
        representing the time in seconds since epoch (e.g., ``time.time()``). Only used if the `timestamp` field is not
        present in the provided metadata.
    :type dtype: float or int, optional
    :raises ValueError: If data is not complex 1xN or CxN.
    :raises ValueError: If metadata is not a python dict.
    :raises ValueError: If metadata is not json serializable.
    :raises ValueError: If annotations is not a list of valid annotation objects.
    **Examples:**
    >>> import numpy
    >>> from utils.data import Recording, Annotation
    >>> # Create an array of complex samples, just 1s in this case.
    >>> samples = numpy.ones(10000, dtype=numpy.complex64)
    >>> # Create a dictionary of relevant metadata.
    >>> sample_rate = 1e6
    >>> center_frequency = 2.44e9
    >>> metadata = {
    ...     "sample_rate": sample_rate,
    ...     "center_frequency": center_frequency,
    ...     "author": "me",
    ... }
    >>> # Create an annotation for the annotations list.
    >>> annotations = [
    ...     Annotation(
    ...         sample_start=0,
    ...         sample_count=1000,
    ...         freq_lower_edge=center_frequency - (sample_rate / 2),
    ...         freq_upper_edge=center_frequency + (sample_rate / 2),
    ...         label="example",
    ...     )
    ... ]
    >>> # Store samples, metadata, and annotations together in a convenient object.
    >>> recording = Recording(data=samples, metadata=metadata, annotations=annotations)
    >>> print(recording.metadata)
    {'sample_rate': 1000000.0, 'center_frequency': 2440000000.0, 'author': 'me'}
    >>> print(recording.annotations[0].label)
    'example'
    """
    def __init__(  # noqa C901
        self,
        data: ArrayLike | list[list],
        metadata: Optional[dict[str, any]] = None,
        dtype: Optional[np.dtype] = None,
        timestamp: Optional[float | int] = None,
        annotations: Optional[list[Annotation]] = None,
    ):
        data_arr = np.asarray(data)
        if np.iscomplexobj(data_arr):
            # Expect C x N
            if data_arr.ndim == 1:
                self._data = np.expand_dims(data_arr, axis=0)  # N -> 1 x N
            elif data_arr.ndim == 2:
                self._data = data_arr
            else:
                raise ValueError("Complex data must be C x N.")
        else:
            raise ValueError("Input data must be complex.")
        if dtype is not None:
            self._data = self._data.astype(dtype)
        assert np.iscomplexobj(self._data)
        if metadata is None:
            self._metadata = {}
        elif isinstance(metadata, dict):
            self._metadata = metadata
        else:
            raise ValueError(f"Metadata must be a python dict, but was {type(metadata)}.")
        if not _is_jsonable(metadata):
            raise ValueError("Value must be JSON serializable.")
        if "timestamp" not in self.metadata:
            if timestamp is not None:
                if not isinstance(timestamp, (int, float)):
                    raise ValueError(f"timestamp must be int or float, not {type(timestamp)}")
                self._metadata["timestamp"] = timestamp
            else:
                self._metadata["timestamp"] = time.time()
        else:
            if not isinstance(self._metadata["timestamp"], (int, float)):
                raise ValueError("timestamp must be int or float, not ", type(self._metadata["timestamp"]))
        if "rec_id" not in self.metadata:
            self._metadata["rec_id"] = generate_recording_id(data=self.data, timestamp=self._metadata["timestamp"])
        if annotations is None:
            self._annotations = []
        elif isinstance(annotations, list):
            self._annotations = annotations
        else:
            raise ValueError("Annotations must be a list or None.")
        if not all(isinstance(annotation, Annotation) for annotation in self._annotations):
            raise ValueError("All elements in self._annotations must be of type Annotation.")
        self._index = 0
    @property
    def data(self) -> np.ndarray:
        """
        :return: Recording data, as a complex array.
        :type: np.ndarray
        .. note::
           For recordings with more than 1,024 samples, this property returns a read-only view of the data.
        .. note::
           To access specific samples, consider indexing the object directly with ``rec[c, n]``.
        """
        if self._data.size > 1024:
            # Returning a read-only view prevents mutation at a distance while maintaining performance.
            v = self._data.view()
            v.setflags(write=False)
            return v
        else:
            return self._data.copy()
    @property
    def metadata(self) -> dict:
        """
        :return: Dictionary of recording metadata.
        :type: dict
        """
        return self._metadata.copy()
    @property
    def annotations(self) -> list[Annotation]:
        """
        :return: List of recording annotations
        :type: list of Annotation objects
        """
        return self._annotations.copy()
    @property
    def shape(self) -> tuple[int]:
        """
        :return: The shape of the data array.
        :type: tuple of ints
        """
        return np.shape(self.data)
    @property
    def n_chan(self) -> int:
        """
        :return: The number of channels in the recording.
        :type: int
        """
        return self.shape[0]
    @property
    def rec_id(self) -> str:
        """
        :return: Recording ID.
        :type: str
        """
        return self.metadata["rec_id"]
    @property
    def dtype(self) -> str:
        """
        :return: Data-type of the data array's elements.
        :type: numpy dtype object
        """
        return self.data.dtype
    @property
    def timestamp(self) -> float | int:
        """
        :return: Recording timestamp (time in seconds since epoch).
        :type: float or int
        """
        return self.metadata["timestamp"]
    @property
    def sample_rate(self) -> float | None:
        """
        :return: Sample rate of the recording, or None is 'sample_rate' is not in metadata.
        :type: str
        """
        return self.metadata.get("sample_rate")
    @sample_rate.setter
    def sample_rate(self, sample_rate: float | int) -> None:
        """Set the sample rate of the recording.
        :param sample_rate: The sample rate of the recording.
        :type sample_rate: float or int
        :return: None
        """
        self.add_to_metadata(key="sample_rate", value=sample_rate)
    def astype(self, dtype: np.dtype) -> Recording:
        """Copy of the recording, data cast to a specified type.
        .. todo: This method is not yet implemented.
        :param dtype: Data-type to which the array is cast. Must be a complex scalar type, such as ``np.complex64`` or
            ``np.complex128``.
        :type dtype: NumPy data type, optional
        .. note: Casting to a data type with less precision can risk losing data by truncating or rounding values,
          potentially resulting in a loss of accuracy and significant information.
        :return: A new recording with the same metadata and data, with dtype.
        TODO: Add example usage.
        """
        # Rather than check for a valid datatype, let's cast and check the result. This makes it easier to provide
        # cross-platform support where the types are aliased across platforms.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")  # Casting may generate user warnings. E.g., complex -> real
            data = self.data.astype(dtype)
        if np.iscomplexobj(data):
            return Recording(data=data, metadata=self.metadata, annotations=self.annotations)
        else:
            raise ValueError("dtype must be a complex number scalar type.")
    def add_to_metadata(self, key: str, value: Any) -> None:
        """Add a new key-value pair to the recording metadata.
        :param key: New metadata key, must be snake_case.
        :type key: str
        :param value: Corresponding metadata value.
        :type value: any
        :raises ValueError: If key is already in metadata or if key is not a valid metadata key.
        :raises ValueError: If value is not JSON serializable.
        :return: None.
        **Examples:**
        Create a recording and add metadata:
        >>> import numpy
        >>> from utils.data import Recording
        >>>
        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
        >>> metadata = {
        >>>     "sample_rate": 1e6,
        >>>     "center_frequency": 2.44e9,
        >>> }
        >>>
        >>> recording = Recording(data=samples, metadata=metadata)
        >>> print(recording.metadata)
        {'sample_rate': 1000000.0,
        'center_frequency': 2440000000.0,
        'timestamp': 17369...,
        'rec_id': 'fda0f41...'}
        >>>
        >>> recording.add_to_metadata(key="author", value="me")
        >>> print(recording.metadata)
        {'sample_rate': 1000000.0,
        'center_frequency': 2440000000.0,
        'author': 'me',
        'timestamp': 17369...,
        'rec_id': 'fda0f41...'}
        """
        if key in self.metadata:
            raise ValueError(
                f"Key {key} already in metadata. Use Recording.update_metadata() to modify existing fields."
            )
        if not _is_valid_metadata_key(key):
            raise ValueError(f"Invalid metadata key: {key}.")
        if not _is_jsonable(value):
            raise ValueError("Value must be JSON serializable.")
        self._metadata[key] = value
    def update_metadata(self, key: str, value: Any) -> None:
        """Update the value of an existing metadata key,
        or add the key value pair if it does not already exist.
        :param key: Existing metadata key.
        :type key: str
        :param value: New value to enter at key.
        :type value: any
        :raises ValueError: If value is not JSON serializable
        :raises ValueError: If key is protected.
        :return: None.
        **Examples:**
        Create a recording and update metadata:
        >>> import numpy
        >>> from utils.data import Recording
        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
        >>> metadata = {
        >>>     "sample_rate": 1e6,
        >>>     "center_frequency": 2.44e9,
        >>>     "author": "me"
        >>> }
        >>> recording = Recording(data=samples, metadata=metadata)
        >>> print(recording.metadata)
        {'sample_rate': 1000000.0,
        'center_frequency': 2440000000.0,
        'author': "me",
        'timestamp': 17369...
        'rec_id': 'fda0f41...'}
        >>> recording.update_metadata(key="author", value=you")
        >>> print(recording.metadata)
        {'sample_rate': 1000000.0,
        'center_frequency': 2440000000.0,
        'author': "you",
        'timestamp': 17369...
        'rec_id': 'fda0f41...'}
        """
        if key not in self.metadata:
            self.add_to_metadata(key=key, value=value)
        if not _is_jsonable(value):
            raise ValueError("Value must be JSON serializable.")
        if key in PROTECTED_KEYS:  # Check protected keys.
            raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
        else:
            self._metadata[key] = value
    def remove_from_metadata(self, key: str):
        """
        Remove a key from the recording metadata.
        Does not remove key if it is protected.
        :param key: The key to remove.
        :type key: str
        :raises ValueError: If key is protected.
        :return: None.
        **Examples:**
        Create a recording and add metadata:
        >>> import numpy
        >>> from utils.data import Recording
        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
        >>> metadata = {
        ...     "sample_rate": 1e6,
        ...     "center_frequency": 2.44e9,
        ... }
        >>> recording = Recording(data=samples, metadata=metadata)
        >>> print(recording.metadata)
        {'sample_rate': 1000000.0,
        'center_frequency': 2440000000.0,
        'timestamp': 17369...,  # Example value
        'rec_id': 'fda0f41...'}  # Example value
        >>> recording.add_to_metadata(key="author", value="me")
        >>> print(recording.metadata)
        {'sample_rate': 1000000.0,
        'center_frequency': 2440000000.0,
        'author': 'me',
        'timestamp': 17369...,  # Example value
        'rec_id': 'fda0f41...'}  # Example value
        """
        if key not in PROTECTED_KEYS:
            self._metadata.pop(key)
        else:
            raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
    def view(self, output_path: Optional[str] = "images/signal.png", **kwargs) -> None:
        """Create a plot of various signal visualizations as a PNG image.
        :param output_path: The output image path. Defaults to "images/signal.png".
        :type output_path: str, optional
        :param kwargs: Keyword arguments passed on to utils.view.view_sig.
        :type: dict of keyword arguments
        **Examples:**
        Create a recording and view it as a plot in a .png image:
        >>> import numpy
        >>> from utils.data import Recording
        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
        >>> metadata = {
        >>>     "sample_rate": 1e6,
        >>>     "center_frequency": 2.44e9,
        >>> }
        >>> recording = Recording(data=samples, metadata=metadata)
        >>> recording.view()
        """
        from utils.view import view_sig
        view_sig(recording=self, output_path=output_path, **kwargs)
    def to_sigmf(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
        """Write recording to a set of SigMF files.
        The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
        :param recording: The recording to be written to file.
        :type recording: utils.data.Recording
        :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
        :type filename: os.PathLike or str, optional
        :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
        :type path: os.PathLike or str, optional
        :raises IOError: If there is an issue encountered during the file writing process.
        :return: None
        **Examples:**
        Create a recording and view it as a plot in a `.png` image:
        >>> import numpy
        >>> from utils.data import Recording
        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
        >>> metadata = {
        ...     "sample_rate": 1e6,
        ...     "center_frequency": 2.44e9,
        ... }
        >>> recording = Recording(data=samples, metadata=metadata)
        >>> recording.view()
        """
        from utils.io.recording import to_sigmf
        to_sigmf(filename=filename, path=path, recording=self)
    def to_npy(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
        """Write recording to ``.npy`` binary file.
        :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
        :type filename: os.PathLike or str, optional
        :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
        :type path: os.PathLike or str, optional
        :raises IOError: If there is an issue encountered during the file writing process.
        :return: Path where the file was saved.
        :rtype: str
        **Examples:**
        Create a recording and save it to a .npy file:
        >>> import numpy
        >>> from utils.data import Recording
        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
        >>> metadata = {
        >>>     "sample_rate": 1e6,
        >>>     "center_frequency": 2.44e9,
        >>> }
        >>> recording = Recording(data=samples, metadata=metadata)
        >>> recording.to_npy()
        """
        from utils.io.recording import to_npy
        to_npy(recording=self, filename=filename, path=path)
    def trim(self, num_samples: int, start_sample: Optional[int] = 0) -> Recording:
        """Trim Recording samples to a desired length, shifting annotations to maintain alignment.
         :param start_sample: The start index of the desired trimmed recording. Defaults to 0.
         :type start_sample: int, optional
         :param num_samples: The number of samples that the output trimmed recording will have.
         :type num_samples: int
         :raises IndexError: If start_sample + num_samples is greater than the length of the recording.
         :raises IndexError: If sample_start < 0 or num_samples < 0.
         :return: The trimmed Recording.
         :rtype: Recording
        **Examples:**
         Create a recording and trim it:
         >>> import numpy
         >>> from utils.data import Recording
         >>> samples = numpy.ones(10000, dtype=numpy.complex64)
         >>> metadata = {
         ...     "sample_rate": 1e6,
         ...     "center_frequency": 2.44e9,
         ... }
         >>> recording = Recording(data=samples, metadata=metadata)
         >>> print(len(recording))
         10000
         >>> trimmed_recording = recording.trim(start_sample=1000, num_samples=1000)
         >>> print(len(trimmed_recording))
         1000
        """
        if start_sample < 0:
            raise IndexError("start_sample cannot be < 0.")
        elif start_sample + num_samples > len(self):
            raise IndexError(
                f"start_sample {start_sample} + num_samples {num_samples} > recording length {len(self)}."
            )
        end_sample = start_sample + num_samples
        data = self.data[:, start_sample:end_sample]
        new_annotations = copy.deepcopy(self.annotations)
        for annotation in new_annotations:
            # trim annotation if it goes outside the trim boundaries
            if annotation.sample_start < start_sample:
                annotation.sample_count = annotation.sample_count - (start_sample - annotation.sample_start)
                annotation.sample_start = start_sample
            if annotation.sample_start + annotation.sample_count > end_sample:
                annotation.sample_count = end_sample - annotation.sample_start
            # shift annotation to align with the new start point
            annotation.sample_start = annotation.sample_start - start_sample
        return Recording(data=data, metadata=self.metadata, annotations=new_annotations)
    def normalize(self) -> Recording:
        """Scale the recording data, relative to its maximum value, so that the magnitude of the maximum sample is 1.
        :return: Recording where the maximum sample amplitude is 1.
        :rtype: Recording
        **Examples:**
        Create a recording with maximum amplitude 0.5 and normalize to a maximum amplitude of 1:
        >>> import numpy
        >>> from utils.data import Recording
        >>> samples = numpy.ones(10000, dtype=numpy.complex64) * 0.5
        >>> metadata = {
        ...     "sample_rate": 1e6,
        ...     "center_frequency": 2.44e9,
        ... }
        >>> recording = Recording(data=samples, metadata=metadata)
        >>> print(numpy.max(numpy.abs(recording.data)))
        0.5
        >>> normalized_recording = recording.normalize()
        >>> print(numpy.max(numpy.abs(normalized_recording.data)))
        1
        """
        scaled_data = self.data / np.max(abs(self.data))
        return Recording(data=scaled_data, metadata=self.metadata, annotations=self.annotations)
    def generate_filename(self, tag: Optional[str] = "rec"):
        """Generate a filename from metadata.
        :param tag: The string at the beginning of the generated filename. Default is "rec".
        :type tag: str, optional
        :return: A filename without an extension.
        :rtype: str
        """
        # TODO: This method should be refactored to use the first 7 characters of the 'rec_id' field.
        tag = tag + "_"
        source = self.metadata.get("source", "")
        if source != "":
            source = source + "_"
        # converts 1000 to 1k for example
        center_frequency = str(Quantity(self.metadata.get("center_frequency", 0)))
        if center_frequency != "0":
            num = center_frequency[:-1]
            suffix = center_frequency[-1]
            num = int(np.round(float(num)))
        else:
            num = 0
            suffix = ""
        center_frequency = str(num) + suffix + "Hz_"
        timestamp = int(self.timestamp)
        timestamp = datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d_%H-%M-%S") + "_"
        # Add first seven characters of rec_id for uniqueness
        rec_id = self.rec_id[0:7]
        return tag + source + center_frequency + timestamp + rec_id
    def __len__(self) -> int:
        """The length of a recording is defined by the number of complex samples in each channel of the recording."""
        return self.shape[1]
    def __eq__(self, other: Recording) -> bool:
        """Two Recordings are equal if all data, metadata, and annotations are the same."""
        # counter used to allow for differently ordered annotation lists
        return (
            np.array_equal(self.data, other.data)
            and self.metadata == other.metadata
            and self.annotations == other.annotations
        )
    def __ne__(self, other: Recording) -> bool:
        """Two Recordings are equal if all data, and metadata, and annotations are the same."""
        return not self.__eq__(other=other)
    def __iter__(self) -> Iterator:
        self._index = 0
        return self
    def __next__(self) -> np.ndarray:
        if self._index < self.n_chan:
            to_ret = self.data[self._index]
            self._index += 1
            return to_ret
        else:
            raise StopIteration
    def __getitem__(self, key: int | tuple[int] | slice) -> np.ndarray | np.complexfloating:
        """If key is an integer, tuple of integers, or a slice, return the corresponding samples.
        For arrays with 1,024 or fewer samples, return a copy of the recording data. For larger arrays, return a
        read-only view. This prevents mutation at a distance while maintaining performance.
        """
        if isinstance(key, (int, tuple, slice)):
            v = self._data[key]
            if isinstance(v, np.complexfloating):
                return v
            elif v.size > 1024:
                v.setflags(write=False)  # Make view read-only.
                return v
            else:
                return v.copy()
        else:
            raise ValueError(f"Key must be an integer, tuple, or slice but was {type(key)}.")
    def __setitem__(self, *args, **kwargs) -> None:
        """Raise an error if an attempt is made to assign to the recording."""
        raise ValueError("Assignment to Recording is not allowed.")
 def generate_recording_id(data: np.ndarray, timestamp: Optional[float | int] = None) -> str:
    """Generate unique 64-character recording ID. The recording ID is generated by hashing the recording data with
    the datetime that the recording data was generated. If no datatime is provided, the current datatime is used.
    :param data: Tape of IQ samples, as a NumPy array.
    :type data: np.ndarray
    :param timestamp: Unix timestamp in seconds. Defaults to None.
    :type timestamp: float or int, optional
    :return: 256-character hash, to be used as the recording ID.
    :rtype: str
    """
    if timestamp is None:
        timestamp = time.time()
    byte_sequence = data.tobytes() + str(timestamp).encode("utf-8")
    sha256_hash = hashlib.sha256(byte_sequence)
    return sha256_hash.hexdigest()
 def _is_jsonable(x: Any) -> bool:
    """
    :return: True if x is JSON serializable, False otherwise.
    """
    try:
        json.dumps(x)
        return True
    except (TypeError, OverflowError):
        return False
 def _is_valid_metadata_key(key: Any) -> bool:
    """
    :return: True if key is a valid metadata key, False otherwise.
    """
    if isinstance(key, str) and key.islower() and re.match(pattern=r"^[a-z_]+$", string=key) is not None:
        return True
    else:
        return False
--- a/src/ria_toolkit/io/init.py
+++ b/src/ria_toolkit/io/init.py
@ -0,0 +1,22 @@
 """
 The IO package contains utilities for input and output operations, such as loading and saving recordings to and from
 file.
 """
 __all__ = [
    # Common:
    "exists",
    "copy",
    "move",
    "validate",
    # Recording:
    "save_recording",
    "load_recording",
    "to_sigmf",
    "from_sigmf",
    "to_npy",
    "from_npy",
 ]
 from .common import copy, exists, move, validate
 from .recording import from_npy, from_sigmf, load_recording, to_npy, to_sigmf
--- a/src/ria_toolkit/io/recording.py
+++ b/src/ria_toolkit/io/recording.py
@ -0,0 +1,331 @@
 """
 Utilities for input/output operations on the utils.data.Recording object.
 """
 import datetime as dt
 import os
 from datetime import timezone
 from typing import Optional
 import numpy as np
 import sigmf
 from sigmf import SigMFFile, sigmffile
 from sigmf.utils import get_data_type_str
 from utils.data import Annotation
 from utils.data.recording import Recording
 def load_rec(file: os.PathLike) -> Recording:
    """Load a recording from file.
    :param file: The directory path to the file(s) to load, **with** the file extension.
        To loading from SigMF, the file extension must be one of *sigmf*, *sigmf-data*, or *sigmf-meta*,
        either way both the SigMF data and meta files must be present for a successful read.
    :type file: os.PathLike
    :raises IOError: If there is an issue encountered during the file reading process.
    :raises ValueError: If the inferred file extension is not supported.
    :return: The recording, as initialized from file(s).
    :rtype: utils.data.Recording
    """
    _, extension = os.path.splitext(file)
    extension = extension.lstrip(".")
    if extension.lower() in ["sigmf", "sigmf-data", "sigmf-meta"]:
        return from_sigmf(file=file)
    elif extension.lower() == "npy":
        return from_npy(file=file)
    else:
        raise ValueError(f"File extension {extension} not supported.")
 SIGMF_KEY_CONVERSION = {
    SigMFFile.AUTHOR_KEY: "author",
    SigMFFile.COLLECTION_KEY: "sigmf:collection",
    SigMFFile.DATASET_KEY: "sigmf:dataset",
    SigMFFile.DATATYPE_KEY: "datatype",
    SigMFFile.DATA_DOI_KEY: "data_doi",
    SigMFFile.DESCRIPTION_KEY: "description",
    SigMFFile.EXTENSIONS_KEY: "sigmf:extensions",
    SigMFFile.GEOLOCATION_KEY: "geolocation",
    SigMFFile.HASH_KEY: "sigmf:hash",
    SigMFFile.HW_KEY: "sdr",
    SigMFFile.LICENSE_KEY: "license",
    SigMFFile.META_DOI_KEY: "metadata",
    SigMFFile.METADATA_ONLY_KEY: "sigmf:metadata_only",
    SigMFFile.NUM_CHANNELS_KEY: "sigmf:num_channels",
    SigMFFile.RECORDER_KEY: "source_software",
    SigMFFile.SAMPLE_RATE_KEY: "sample_rate",
    SigMFFile.START_OFFSET_KEY: "sigmf:start_offset",
    SigMFFile.TRAILING_BYTES_KEY: "sigmf:trailing_bytes",
    SigMFFile.VERSION_KEY: "sigmf:version",
 }
 def convert_to_serializable(obj):
    """
    Recursively convert a JSON-compatible structure into a fully JSON-serializable one.
    Handles cases like NumPy data types, nested dicts, lists, and sets.
    """
    if isinstance(obj, np.integer):
        return int(obj)  # Convert NumPy int to Python int
    elif isinstance(obj, np.floating):
        return float(obj)  # Convert NumPy float to Python float
    elif isinstance(obj, np.ndarray):
        return obj.tolist()  # Convert NumPy array to list
    elif isinstance(obj, (list, tuple)):
        return [convert_to_serializable(item) for item in obj]  # Process list or tuple
    elif isinstance(obj, dict):
        return {key: convert_to_serializable(value) for key, value in obj.items()}  # Process dict
    elif isinstance(obj, set):
        return list(obj)  # Convert set to list
    elif obj in [float("inf"), float("-inf"), None]:  # Handle infinity or None
        return None
    elif isinstance(obj, (str, int, float, bool)) or obj is None:
        return obj  # Base case: already serializable
    else:
        raise TypeError(f"Value of type {type(obj)} is not JSON serializable: {obj}")
 def to_sigmf(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
    """Write recording to a set of SigMF files.
    The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
    :param recording: The recording to be written to file.
    :type recording: utils.data.Recording
    :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
    :type filename: os.PathLike or str, optional
    :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
    :type path: os.PathLike or str, optional
    :raises IOError: If there is an issue encountered during the file writing process.
    :return: None
    **Examples:**
    >>> from utils.sdr import Synth
    >>> from utils.data import Recording
    >>> from utils.io import to_sigmf
    >>> sdr = Synth()
    >>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
    >>> to_sigmf(recording=rec, file="sample_recording")
    """
    if filename is not None:
        filename, _ = os.path.splitext(filename)
    else:
        filename = recording.generate_filename()
    if path is None:
        path = "recordings"
    if not os.path.exists(path):
        os.makedirs(path)
    multichannel_samples = recording.data
    metadata = recording.metadata
    annotations = recording.annotations
    if multichannel_samples.shape[0] > 1:
        raise NotImplementedError("SigMF File Saving Not Implemented for Multichannel Recordings")
    else:
        # extract single channel
        samples = multichannel_samples[0]
    data_file_path = os.path.join(path, f"{filename}.sigmf-data")
    samples.tofile(data_file_path)
    global_info = {
        SigMFFile.DATATYPE_KEY: get_data_type_str(samples),
        SigMFFile.VERSION_KEY: sigmf.__version__,
        SigMFFile.RECORDER_KEY: "RIA",
    }
    converted_metadata = {
        sigmf_key: metadata[metadata_key]
        for sigmf_key, metadata_key in SIGMF_KEY_CONVERSION.items()
        if metadata_key in metadata
    }
    # Merge dictionaries, giving priority to sigmf_meta
    global_info = {**converted_metadata, **global_info}
    ria_metadata = {f"ria:{key}": value for key, value in metadata.items()}
    ria_metadata = convert_to_serializable(ria_metadata)
    global_info.update(ria_metadata)
    sigMF_metafile = SigMFFile(
        data_file=data_file_path,
        global_info=global_info,
    )
    for annotation_object in annotations:
        annotation_dict = annotation_object.to_sigmf_format()
        annotation_dict = convert_to_serializable(annotation_dict)
        sigMF_metafile.add_annotation(
            start_index=annotation_dict[SigMFFile.START_INDEX_KEY],
            length=annotation_dict[SigMFFile.LENGTH_INDEX_KEY],
            metadata=annotation_dict["metadata"],
        )
    sigMF_metafile.add_capture(
        0,
        metadata={
            SigMFFile.FREQUENCY_KEY: metadata.get("center_frequency", 0),
            SigMFFile.DATETIME_KEY: dt.datetime.fromtimestamp(float(metadata.get("timestamp", 0)), tz=timezone.utc)
            .isoformat()
            .replace("+00:00", "Z"),
        },
    )
    meta_dict = sigMF_metafile.ordered_metadata()
    meta_dict["ria"] = metadata
    sigMF_metafile.tofile(f"{os.path.join(path,filename)}.sigmf-meta")
 def from_sigmf(file: os.PathLike | str) -> Recording:
    """Load a recording from a set of SigMF files.
    :param file: The directory path to the SigMF recording files, without any file extension.
        The recording will be initialized from ``file_name.sigmf-data`` and ``file_name.sigmf-meta``.
        Both the data and meta files must be present for a successful read.
    :type file: str or os.PathLike
    :raises IOError: If there is an issue encountered during the file reading process.
    :return: The recording, as initialized from the SigMF files.
    :rtype: utils.data.Recording
    """
    if len(file) > 11:
        if file[-11:-5] != ".sigmf":
            file = file + ".sigmf-data"
    sigmf_file = sigmffile.fromfile(file)
    data = sigmf_file.read_samples()
    global_metadata = sigmf_file.get_global_info()
    dict_annotations = sigmf_file.get_annotations()
    processed_metadata = {}
    for key, value in global_metadata.items():
        # Process core keys
        if key.startswith("core:"):
            base_key = key[5:]  # Remove 'core:' prefix
            converted_key = SIGMF_KEY_CONVERSION.get(base_key, base_key)
        # Process ria keys
        elif key.startswith("ria:"):
            converted_key = key[4:]  # Remove 'ria:' prefix
        else:
            # Load non-core/ria keys as is
            converted_key = key
        processed_metadata[converted_key] = value
    annotations = []
    for dict in dict_annotations:
        annotations.append(
            Annotation(
                sample_start=dict[SigMFFile.START_INDEX_KEY],
                sample_count=dict[SigMFFile.LENGTH_INDEX_KEY],
                freq_lower_edge=dict.get(SigMFFile.FLO_KEY, None),
                freq_upper_edge=dict.get(SigMFFile.FHI_KEY, None),
                label=dict.get(SigMFFile.LABEL_KEY, None),
                comment=dict.get(SigMFFile.COMMENT_KEY, None),
                detail=dict.get("ria:detail", None),
            )
        )
    output_recording = Recording(data=data, metadata=processed_metadata, annotations=annotations)
    return output_recording
 def to_npy(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
    """Write recording to ``.npy`` binary file.
    :param recording: The recording to be written to file.
    :type recording: utils.data.Recording
    :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
    :type filename: os.PathLike or str, optional
    :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
    :type path: os.PathLike or str, optional
    :raises IOError: If there is an issue encountered during the file writing process.
    :return: Path where the file was saved.
    :rtype: str
    **Examples:**
    >>> from utils.sdr import Synth
    >>> from utils.data import Recording
    >>> from utils.io import to_npy
    >>> sdr = Synth()
    >>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
    >>> to_npy(recording=rec, file="sample_recording.npy")
    """
    if filename is not None:
        filename, _ = os.path.splitext(filename)
    else:
        filename = recording.generate_filename()
    filename = filename + ".npy"
    if path is None:
        path = "recordings"
    if not os.path.exists(path):
        os.makedirs(path)
    fullpath = os.path.join(path, filename)
    data = np.array(recording.data)
    metadata = recording.metadata
    annotations = recording.annotations
    with open(file=fullpath, mode="wb") as f:
        np.save(f, data)
        np.save(f, metadata)
        np.save(f, annotations)
    # print(f"Saved recording to {os.getcwd()}/{fullpath}")
    return str(fullpath)
 def from_npy(file: os.PathLike | str) -> Recording:
    """Load a recording from a ``.npy`` binary file.
    :param file: The directory path to the recording file, with or without the ``.npy`` file extension.
    :type file: str or os.PathLike
    :raises IOError: If there is an issue encountered during the file reading process.
    :return: The recording, as initialized from the ``.npy`` file.
    :rtype: utils.data.Recording
    """
    filename, extension = os.path.splitext(file)
    if extension != ".npy" and extension != "":
        raise ValueError("Cannot use from_npy if file extension is not .npy")
    # Rebuild with .npy extension.
    filename = str(filename) + ".npy"
    with open(file=filename, mode="rb") as f:
        data = np.load(f, allow_pickle=True)
        metadata = np.load(f, allow_pickle=True)
        metadata = metadata.tolist()
        try:
            annotations = list(np.load(f, allow_pickle=True))
        except EOFError:
            annotations = []
    recording = Recording(data=data, metadata=metadata, annotations=annotations)
    return recording
--- a/src/ria_toolkit/transforms/init.py
+++ b/src/ria_toolkit/transforms/init.py
@ -0,0 +1,8 @@
 """
 The transforms package houses a collection of functions to manipulate and transform radio data.
 This package contains various functions that operate on NumPy arrays. These functions are utilized within the machine
 learning backends to build transforms and functions that seamlessly integrate with those from the respective backend.
 All the transforms in this package expect data in the complex 1xN format.
 """
--- a/src/ria_toolkit/transforms/iq_augmentations.py
+++ b/src/ria_toolkit/transforms/iq_augmentations.py
@ -0,0 +1,717 @@
 """
 This module comprises the functionals of various transforms designed to create new training examples by augmenting
 existing examples or recordings using a variety of techniques These transforms take an ArrayLike object as input
 and return a corresponding numpy.ndarray with the impairment model applied;
 we call the latter the impaired data.
 """
 from typing import Optional
 import numpy as np
 from numpy.typing import ArrayLike
 from utils.data.recording import Recording
 from utils.helpers.array_conversion import convert_to_2xn
 # TODO: For round 2 of index generation, should j be at min 2 spots away from where it was to prevent adjacent patches.
 # TODO: All the transforms with some randomness need to be refactored to use a random generator.
 def generate_awgn(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
    """Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
    provided `signal` array or `Recording`.
    This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of
    the noise which matches the specified SNR. Then, the AWGN is generated after calculating the variance and
    randomly calculating the amplitude and phase of the noise.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param snr: The signal-to-noise ratio in dB. Default is 1.
    :type snr: float, optional
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array representing the generated noise which matches the SNR of `signal`. If `signal` is a
        Recording, returns a Recording object with its `data` attribute containing the generated noise array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[2 + 5j, 1 + 8j]])
    >>> new_rec = generate_awgn(rec)
    >>> new_rec.data
    array([[2.15991777 + 0.69673915j, 0.2814541 - 0.12111976j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    snr_linear = 10 ** (snr / 10)
    # Calculate the RMS power of the signal to solve for the RMS power of the noise
    signal_rms_power = np.sqrt(np.mean(np.abs(data) ** 2))
    noise_rms_power = signal_rms_power / snr_linear
    # Generate the AWGN noise which has the same shape as data
    variance = noise_rms_power**2
    magnitude = np.random.normal(loc=0, scale=np.sqrt(variance), size=(c, n))
    phase = np.random.uniform(low=0, high=2 * np.pi, size=(c, n))
    complex_awgn = magnitude * np.exp(1j * phase)
    if isinstance(signal, Recording):
        return Recording(data=complex_awgn, metadata=signal.metadata)
    else:
        return complex_awgn
 def time_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
    """Reverses the order of the I (In-phase) and Q (Quadrature) data samples along the time axis of the provided
    `signal` array or `Recording`.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array containing the reversed I and Q data samples if `signal` is an array.
        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
        reversed array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[1+2j, 3+4j, 5+6j]])
    >>> new_rec = time_reversal(rec)
    >>> new_rec.data
    array([[5+6j, 3+4j, 1+2j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if c == 1:
        # If 1xN complex
        reversed_data = np.squeeze(data)[::-1]
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=reversed_data, metadata=signal.metadata)
    else:
        return reversed_data.reshape(c, n)
 def spectral_inversion(signal: ArrayLike | Recording) -> np.ndarray | Recording:
    """Negates the imaginary components (Q, Quadrature) of the data samples contained within the
    provided `signal` array or `Recording`.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array containing the original I and negated Q data samples if `signal` is an array.
        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
        inverted array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[0+45j, 2-10j]])
    >>> new_rec = spectral_inversion(rec)
    >>> new_rec.data
    array([[0-45j, 2+10j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if c == 1:
        new_data = np.squeeze(data).real - 1j * np.squeeze(data).imag
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=new_data, metadata=signal.metadata)
    else:
        return new_data.reshape(c, n)
 def channel_swap(signal: ArrayLike | Recording) -> np.ndarray | Recording:
    """Switches the I (In-phase) with the and Q (Quadrature) data samples for each sample within the
    provided `signal` array or `Recording`.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array containing the swapped I and Q data samples if `signal` is an array.
        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
        swapped array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[10+20j, 7+35j]])
    >>> new_rec = channel_swap(rec)
    >>> new_rec.data
    array([[20+10j, 35+7j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if c == 1:
        swapped_data = np.squeeze(data).imag + 1j * np.squeeze(data).real
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=swapped_data, metadata=signal.metadata)
    else:
        return swapped_data.reshape(c, n)
 def amplitude_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
    """Negates the amplitudes of both the I (In-phase) and Q (Quadrature) data samples contained within the
    provided `signal` array or `Recording`.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array containing the negated I and Q data samples if `signal` is an array.
        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
        negated array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[4-3j, -5-2j, -9+1j]])
    >>> new_rec = amplitude_reversal(rec)
    >>> new_rec.data
    array([[-4+3j, 5+2j, 9-1j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if c == 1:
        reversed_data = -1 * np.squeeze(data).real - 1j * np.squeeze(data).imag
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=reversed_data, metadata=signal.metadata)
    else:
        return reversed_data.reshape(c, n)
 def drop_samples(  # noqa: C901  # TODO: Simplify function
    signal: ArrayLike | Recording, max_section_size: Optional[int] = 2, fill_type: Optional[str] = "zeros"
 ) -> np.ndarray | Recording:
    """Randomly drops IQ data samples contained within the provided `signal` array or `Recording`.
    This function randomly selects sections of the signal and replaces the current data samples in the specified
    section with another value dependent on the fill type.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param max_section_size: Maximum allowable size of the section to be dropped and replaced. Default is 2.
    :type max_section_size: int, optional
    :param fill_type: Fill option used to replace dropped section of data (back-fill, front-fill, mean, zeros).
        Default is "zeros".
        "back-fill": replace dropped section with the data sample occuring before the section.
        "front-fill": replace dropped section with the data sample occuring after the section.
        "mean": replace dropped section with mean of the entire signal.
        "zeros": replace dropped section with constant value of 0+0j.
    :type fill_type: str, optional
    :raises ValueError: If `signal` is not CxN complex.
    :raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
    :return: A numpy array containing the I and Q data samples with replaced subsections if
        `signal` is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data`
        attribute containing the array with dropped samples.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
    >>> new_rec = drop_samples(rec)
    >>> new_rec.data
    array([[2+5j, 0, 0, 0, 4+9j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if max_section_size < 1 or max_section_size >= n:
        raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
    if c == 1:
        data = np.squeeze(data)
        if fill_type == "mean":
            mean = np.mean(data)
        i = -1
        j = -1
        # Pointers i and j point to exact positions
        while i < n:
            # Generate valid starting point so that at least 1 drop occurs
            i = np.random.randint(j + 1, j + n - max_section_size + 2)
            j = np.random.randint(i, i + max_section_size)
            if j > n - 1:  # Check that the full drop is within the dataset
                break
            # Generate fill based on fill_type
            if fill_type == "back-fill":
                fill = data[i - 1] if i > 0 else data[i]
            elif fill_type == "front-fill":
                fill = data[j + 1] if j < n - 1 else data[j]
            elif fill_type == "mean":
                fill = mean
            elif fill_type == "zeros":
                fill = 0 + 0j
            else:
                raise ValueError(f"fill_type {fill_type} not recognized.")
            # Replaces dropped samples with fill values
            data[i : j + 1] = fill
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=data, metadata=signal.metadata)
    else:
        return data.reshape(c, n)
 def quantize_tape(
    signal: ArrayLike | Recording, bin_number: Optional[int] = 4, rounding_type: Optional[str] = "floor"
 ) -> np.ndarray | Recording:
    """Quantizes the IQ data of the provided `signal` array or `Recording` by a few bits.
    This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
    The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param bin_number: The number of bins the signal should be divided into. Default is 4.
    :type bin_number: int, optional
    :param rounding_type: The type of rounding applied during processing. Default is "floor".
        "floor": rounds down to the lower bound of the bin.
        "ceiling": rounds up to the upper bound of the bin.
    :type rounding_type: str, optional
    :raises ValueError: If `signal` is not CxN complex.
    :raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
    :return: A numpy array containing the quantized I and Q data samples if `signal` is an array.
        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
        the quantized array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[1+1j, 4+4j, 1+2j, 1+4j]])
    >>> new_rec = quantize_tape(rec)
    >>> new_rec.data
    array([[4+4j, 3+3j, 4+1j, 4+3j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if rounding_type not in {"ceiling", "floor"}:
        raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
    if c == 1:
        iq_data = convert_to_2xn(data)
        maximum, minimum = iq_data.max(), iq_data.min()
        bin_edges = np.linspace(minimum, maximum, bin_number + 1)
        indices = np.digitize(iq_data, bin_edges, right=True)
        # If data falls outside the first bin, map it back into the first bin, data will not fall outside of last bin
        indices[indices == 0] = 1
        # Map the data points to the correct bins
        if rounding_type == "ceiling":
            modified_iq_data = bin_edges[indices]
        else:
            modified_iq_data = bin_edges[indices - 1]
        new_data = modified_iq_data[0] + 1j * modified_iq_data[1]
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=new_data, metadata=signal.metadata)
    else:
        return new_data.reshape(c, n)
 def quantize_parts(
    signal: ArrayLike | Recording,
    max_section_size: Optional[int] = 2,
    bin_number: Optional[int] = 4,
    rounding_type: Optional[str] = "floor",
 ) -> np.ndarray | Recording:
    """Quantizes random parts of the IQ data within the provided `signal` array or `Recording` by a few bits.
    This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
    The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param max_section_size: Maximum allowable size of the section to be quantized. Default is 2.
    :type max_section_size: int, optional
    :param bin_number: The number of bins the signal should be divided into. Default is 4.
    :type bin_number: int, optional
    :param rounding_type: Type of rounding applied during processing. Default is "floor".
        "floor": rounds down to the lower bound of the bin.
        "ceiling": rounds up to the upper bound of the bin.
    :type rounding_type: str, optional
    :raises ValueError: If `signal` is not CxN complex.
    :raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
    :return: A numpy array containing the I and Q data samples with quantized subsections if `signal`
        is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute
        containing the partially quantized array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
    >>> new_rec = quantize_parts(rec)
    >>> new_rec.data
    array([[2+5j, 1+8j, 3.66666667+3.66666667j, 3+7j, 4+9j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if rounding_type not in {"ceiling", "floor"}:
        raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
    if c == 1:
        iq_data = convert_to_2xn(data)
        i_data, q_data = iq_data
        maximum, minimum = iq_data.max(), iq_data.min()
        bin_edges = np.linspace(minimum, maximum, bin_number + 1)
        indices = np.digitize(iq_data, bin_edges, right=True)
        # Map everything from bin 0 to bin 1
        indices[indices == 0] = 1
        i = -1
        j = -1
        # Pointers i and j point to exact positions
        while i < n:
            # Generate valid starting point so that at least 1 drop occurs
            i = np.random.randint(j + 1, j + n - max_section_size + 2)
            j = np.random.randint(i, i + max_section_size)
            if j > n - 1:  # Check that the full drop is within the dataset
                break
            if rounding_type == "ceiling":
                i_data[i : j + 1] = bin_edges[indices[0][i : j + 1]]
                q_data[i : j + 1] = bin_edges[indices[1][i : j + 1]]
            else:
                i_data[i : j + 1] = bin_edges[indices[0][i : j + 1] - 1]
                q_data[i : j + 1] = bin_edges[indices[1][i : j + 1] - 1]
        quantized_data = i_data + 1j * q_data
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=quantized_data, metadata=signal.metadata)
    else:
        return quantized_data.reshape(c, n)
 def magnitude_rescale(
    signal: ArrayLike | Recording,
    starting_bounds: Optional[tuple] = None,
    max_magnitude: Optional[int] = 1,
 ) -> np.ndarray | Recording:
    """Selects a random starting point from within the specified starting bounds and multiplies IQ data of the
    provided `signal` array or `Recording` by a random constant.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param starting_bounds: The bounds (inclusive) as indices in which the starting position of the rescaling occurs.
        Default is None, but if user does not assign any bounds, the bounds become (random index, N-1).
    :type starting_bounds: tuple, optional
    :param max_magnitude: The maximum value of the constant that is used to rescale the data. Default is 1.
    :type max_magnitude: int, optional
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array containing the I and Q data samples with the rescaled magnitude after the random
        starting point if `signal` is an array. If `signal` is a `Recording`, returns a `Recording`
        object with its `data` attribute containing the rescaled array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
    >>> new_rec = magniute_rescale(rec)
    >>> new_rec.data
    array([[2+5j, 1+8j, 6+4j, 3+7j, 3.03181761+6.82158963j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if starting_bounds is None:
        starting_bounds = (np.random.randint(0, n), n - 1)
    if starting_bounds[0] < 0 or starting_bounds[1] > n - 1:
        raise ValueError("starting_bounds must be valid indices for the dataset.")
    if c == 1:
        data = np.squeeze(data)
        starting_point = np.random.randint(starting_bounds[0], starting_bounds[1] + 1)
        magnitude = np.random.rand() * max_magnitude
        rescaled_section = data[starting_point:] * magnitude
        rescaled_data = np.concatenate((data[:starting_point], rescaled_section))
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=rescaled_data, metadata=signal.metadata)
    else:
        return rescaled_data.reshape(c, n)
 def cut_out(  # noqa: C901  # TODO: Simplify function
    signal: ArrayLike | Recording, max_section_size: Optional[int] = 3, fill_type: Optional[str] = "ones"
 ) -> np.ndarray | Recording:
    """Cuts out random sections of IQ data and replaces them with either 0s, 1s, or low, average, or high
    sound-to-noise ratio (SNR) additive white gausssian noise (AWGN) within the provided `signal` array or
    `Recording`.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param max_section_size: Maximum allowable size of the section to be quantized. Default is 3.
    :type max_section_size: int, optional
    :param fill_type: Fill option used to replace cutout section of data (zeros, ones, low-snr, avg-snr-1, avg-snr-2).
        Default is "ones".
        "zeros": replace cutout section with 0s.
        "ones": replace cutout section with 1s.
        "low-snr": replace cutout section with AWGN with an SNR of 0.5.
        "avg-snr": replace cutout section with AWGN with an SNR of 1.
        "high-snr": replace cutout section with AWGN with an SNR of 2.
    :type fill_type: str, optional
    :raises ValueError: If `signal` is not CxN complex.
    :raises UserWarning: If fill_type is not "zeros", "ones", "low-snr", "avg-snr", or "high-snr", "ones" is selected
        by default.
    :raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
    :return: A numpy array containing the I and Q data samples with random sections cut out and replaced according to
        `fill_type` if `signal` is an array. If `signal` is a `Recording`, returns a `Recording` object
        with its `data` attribute containing the cut out and replaced array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
    >>> new_rec = cut_out(rec)
    >>> new_rec.data
    array([[2+5j, 1+8j, 1+1j, 1+1j, 1+1j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if fill_type not in {"zeros", "ones", "low-snr", "avg-snr", "high-snr"}:
        raise UserWarning(
            """fill_type must be "zeros", "ones", "low-snr", "avg-snr", or "high-snr",
            "ones" has been selected by default"""
        )
    if max_section_size < 1 or max_section_size >= n:
        raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
    if c == 1:
        data = np.squeeze(data)
        i = -1
        j = -1
        # Pointers i and j point to exact positions
        while i < n:
            # Generate valid starting point so that at least 1 drop occurs
            i = np.random.randint(j + 1, j + n - max_section_size + 2)
            j = np.random.randint(i, i + max_section_size)
            if j > n - 1:  # Check that the full drop is within the dataset
                break
            # TODO: Check if we can collapse last three options which depends on what snr value the user enters
            if fill_type == "zeros":
                fill = 0 + 0j
            elif fill_type == "ones":
                fill = 1 + 1j
            elif fill_type == "low-snr":
                fill = generate_awgn([data[i : j + 1]], 0.5)
            elif fill_type == "avg-snr":
                fill = generate_awgn([data[i : j + 1]], 1)
            else:
                fill = generate_awgn([data[i : j + 1]], 2)
            data[i : j + 1] = fill
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=data, metadata=signal.metadata)
    else:
        return data.reshape(c, n)
 def patch_shuffle(signal: ArrayLike | Recording, max_patch_size: Optional[int] = 3) -> np.ndarray | Recording:
    """Selects random patches of the IQ data and randomly shuffles the data samples within the specified patch of
    the provided `signal` array or `Recording`.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param max_patch_size: Maximum allowable patch size of the data that can be shuffled. Default is 3.
    :type max_patch_size: int, optional
    :raises ValueError: If `signal` is not CxN complex.
    :raises ValueError: If `max_patch_size` is less than or equal to 1 or greater than length of `signal`.
    :return: A numpy array containing the I and Q data samples with randomly shuffled regions if `signal` is
        an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
        the shuffled array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
    >>> new_rec = patch_shuffle(rec)
    >>> new_rec.data
    array([[2+5j, 1+8j, 3+4j, 6+9j, 4+7j]])
    """
    if isinstance(signal, Recording):
        data = signal.data.copy()  # Cannot shuffle read-only array.
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if max_patch_size > n or max_patch_size <= 1:
        raise ValueError("max_patch_size must be less than or equal to the length of signal and greater than 1.")
    if c == 1:
        data = np.squeeze(data)
        i = -1
        j = -1
        # Pointers i and j point to exact positions
        while i < n:
            # Generate valid starting point so that at least 1 drop occurs
            i = np.random.randint(j + 1, j + n - max_patch_size + 2)
            j = np.random.randint(i, i + max_patch_size)
            if j > n - 1:  # Check that the full drop is within the dataset
                break
            np.random.shuffle(data.real[i : j + 1])
            np.random.shuffle(data.imag[i : j + 1])
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=data, metadata=signal.metadata)
    else:
        return data.reshape(c, n)
--- a/src/ria_toolkit/transforms/iq_impairments.py
+++ b/src/ria_toolkit/transforms/iq_impairments.py
@ -0,0 +1,365 @@
 """
 This module comprises various transforms designed to represent signal impairments.
 These transforms take a recording as input and return a corresponding recording with
 the impairment model applied; we call the latter an impaired recording.
 Signals travel through transmission media, which are not perfect. The imperfection
 causes signal impairment, meaning that the signal at the beginning of the medium is
 not the same as the signal at the end of the medium. What is sent is not what is received.
 Three causes of impairment are attenuation, distortion, and noise.
 """
 from typing import Optional
 import numpy as np
 from numpy.typing import ArrayLike
 from scipy.signal import resample_poly
 from utils.data import Recording
 from utils.transforms import iq_augmentations
 def add_awgn_to_signal(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
    """Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
    provided `signal` array or `Recording`.
    This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of the noise
    which matches the specified SNR. Then, the AWGN is generated after calculating the variance and randomly
    calculating the amplitude and phase of the noise. Then, this generated AWGN is added to the original signal and
    returned.
    :param signal: Input IQ data as a complex ``C x N`` array or `Recording`, where ``C`` is the number of channels
        and ``N`` is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param snr: The signal-to-noise ratio in dB. Default is 1.
    :type snr: float, optional
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array which is the sum of the noise (which matches the SNR) and the original signal. If `signal`
        is a `Recording`, returns a `Recording object` with its `data` attribute containing the noisy signal array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[1+1j, 2+2j]])
    >>> new_rec = add_awgn_to_signal(rec)
    >>> new_rec.data
    array([[0.83141973+0.32529242j, -1.00909846+2.39282713j]])
    """
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim != 2 or not np.iscomplexobj(data):
        raise ValueError("signal must be CxN complex.")
    noise = iq_augmentations.generate_awgn(signal=data, snr=snr)
    print(f"noise is {noise}")
    noisy_signal = data + noise
    if isinstance(signal, Recording):
        return Recording(data=noisy_signal, metadata=signal.metadata)
    else:
        return noisy_signal
 def time_shift(signal: ArrayLike | Recording, shift: Optional[int] = 1) -> np.ndarray | Recording:
    """Apply a time shift to a signal.
    After the time shift is applied, we fill any empty regions with zeros.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param shift: The number of indices to shift by. Default is 1.
    :type shift: int, optional
    :raises ValueError: If `signal` is not CxN complex.
    :raises UserWarning: If `shift` is greater than length of `signal`.
    :return: A numpy array which represents the time-shifted signal. If `signal` is a `Recording`,
        returns a `Recording object` with its `data` attribute containing the time-shifted array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j, 5+5j]])
    >>> new_rec = time_shift(rec, -2)
    >>> new_rec.data
    array([[3+3j, 4+4j, 5+5j, 0+0j, 0+0j]])
    """
    # TODO: Additional info needs to be added to docstring description
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if shift > n:
        raise UserWarning("shift is greater than signal length")
    shifted_data = np.zeros_like(data)
    if c == 1:
        # New iq array shifted left or right depending on sign of shift
        # This should work even if shift > iqdata.shape[1]
        if shift >= 0:
            # Shift to right
            shifted_data[:, shift:] = data[:, :-shift]
        else:
            # Shift to the left
            shifted_data[:, :shift] = data[:, -shift:]
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=shifted_data, metadata=signal.metadata)
    else:
        return shifted_data
 def frequency_shift(signal: ArrayLike | Recording, shift: Optional[float] = 0.5) -> np.ndarray | Recording:
    """Apply a frequency shift to a signal.
    .. note::
        The frequency shift is applied relative to the sample rate.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param shift: The frequency shift relative to the sample rate. Must be in the range ``[-0.5, 0.5]``.
        Default is 0.5.
    :type shift: float, optional
    :raises ValueError: If the provided frequency shift is not in the range ``[-0.5, 0.5]``.
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array which represents the frequency-shifted signal. If `signal` is a `Recording`,
        returns a `Recording object` with its `data` attribute containing the frequency-shifted array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
    >>> new_rec = frequency_shift(rec, -0.4)
    >>> new_rec.data
    array([[1+1j, -0.44246348-2.79360449j, -1.92611857+3.78022053j, 5.04029404-2.56815809j]])
    """
    # TODO: Additional info needs to be added to docstring description
    if shift > 0.5 or shift < -0.5:
        raise ValueError("Frequency shift must be in the range [-0.5, 0.5]")
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    shifted_data = np.zeros_like(data)
    if c == 1:
        # Calculate the phase shift for the frequency shift
        phase_shift_ = 2.0 * np.pi * shift * np.arange(n)
        # Use trigonometric identities to apply the frequency shift
        shifted_data.real = data.real * np.cos(phase_shift_) - data.imag * np.sin(phase_shift_)
        shifted_data.imag = data.real * np.sin(phase_shift_) + data.imag * np.cos(phase_shift_)
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=shifted_data, metadata=signal.metadata)
    else:
        return shifted_data
 def phase_shift(signal: ArrayLike | Recording, phase: Optional[float] = np.pi) -> np.ndarray | Recording:
    """Apply a phase shift to a signal.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param phase: The phase angle by which to rotate the IQ samples, in radians. Must be in the range ``[-π, π]``.
        Default is π.
    :type phase: float, optional
    :raises ValueError: If the provided phase rotation is not in the range ``[-π, π]``.
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array which represents the phase-shifted signal. If `signal` is a `Recording`,
        returns a `Recording object` with its `data` attribute containing the phase-shifted array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
    >>> new_rec = phase_shift(rec, np.pi/2)
    >>> new_rec.data
    array([[-1+1j, -2+2j -3+3j -4+4j]])
    """
    # TODO: Additional info needs to be added to docstring description
    if phase > np.pi or phase < -np.pi:
        raise ValueError("Phase rotation must be in the range [-π, π]")
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if c == 1:
        shifted_data = data * np.exp(1j * phase)
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=shifted_data, metadata=signal.metadata)
    else:
        return shifted_data
 def iq_imbalance(
    signal: ArrayLike | Recording,
    amplitude_imbalance: Optional[float] = 1.5,
    phase_imbalance: Optional[float] = np.pi,
    dc_offset: Optional[float] = 1.5,
 ) -> np.ndarray | Recording:
    """Apply an IQ Imbalance to a signal.
    .. note::
        Based on MathWorks' `I/Q Imbalance <https://www.mathworks.com/help/comm/ref/iqimbalance.html>`_.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param amplitude_imbalance: The IQ amplitude imbalance to apply, in dB. Default is 1.5.
    :type amplitude_imbalance: float, optional
    :param phase_imbalance: The IQ phase imbalance to apply, in radians. Default is π.
         Must be in the range ``[-π, π]``.
    :type phase_imbalance: float, optional
    :param dc_offset: The IQ DC offset to apply, in dB. Default is 1.5.
    :type dc_offset: float, optional
    :raises ValueError: If the phase imbalance is not in the range ``[-π, π]``.
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array which is the original signal with an applied IQ imbalance. If `signal` is a `Recording`,
        returns a `Recording object` with its `data` attribute containing the IQ imbalanced signal array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[2+18j, -34+2j, 3+9j]])
    >>> new_rec = iq_imbalance(rec, 1, np.pi, 2)
    >>> new_rec.data
    array([[-38.38613587-4.78555031j, -4.26512621+81.35435535j, -19.19306793-7.17832547j]])
    """
    # TODO: Additional info needs to be added to docstring description
    if phase_imbalance > np.pi or phase_imbalance < -np.pi:
        raise ValueError("Phase imbalance must be in the range [-π, π].")
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if c == 1:
        # Apply amplitude imbalance
        data = (
            10 ** (0.5 * amplitude_imbalance / 20.0) * data.real
            + 1j * 10 ** (-0.5 * amplitude_imbalance / 20.0) * data.imag
        )
        # Apply phase imbalance
        data = (
            np.exp(-1j * phase_imbalance / 2.0) * data.real
            + np.exp(1j * (np.pi / 2.0 + phase_imbalance / 2.0)) * data.imag
        )
        # Apply DC offset
        imbalanced_data = data + (10 ** (dc_offset / 20.0) * data.real + 1j * 10 ** (dc_offset / 20.0) * data.imag)
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=imbalanced_data, metadata=signal.metadata)
    else:
        return imbalanced_data
 def resample(signal: ArrayLike | Recording, up: Optional[int] = 4, down: Optional[int] = 2) -> np.ndarray | Recording:
    """Resample a signal using polyphase filtering.
    Uses scipy.signal.resample_poly to upsample the signal by the
    factor *up*, apply a zero-phase low-pass FIR filter, and downsample the
    signal by the factor *down*.
    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
        is the length of the IQ examples.
    :type signal: array_like or utils.data.Recording
    :param up: The upsampling factor. Default is 4.
    :type up: int, optional
    :param down: The downsampling factor. Default is 2.
    :type down: int, optional
    :raises ValueError: If `signal` is not CxN complex.
    :return: A numpy array which represents the resampled signal If `signal` is a `Recording`,
        returns a `Recording object` with its `data` attribute containing the resampled array.
    :rtype: np.ndarray or utils.data.Recording
    >>> rec = Recording(data=[[1+1j, 2+2j]])
    >>> new_rec = resample(rec, 2, 1)
    >>> new_rec.data
    array([[1.00051747+1.00051747j, 1.90020207+1.90020207j]])
    """
    # TODO: Additional info needs to be added to docstring description
    if isinstance(signal, Recording):
        data = signal.data
    else:
        data = np.asarray(signal)
    if data.ndim == 2 and np.iscomplexobj(data):
        c, n = data.shape
    else:
        raise ValueError("signal must be CxN complex.")
    if c == 1:
        data = np.squeeze(data)
        resampled_iqdata = resample_poly(x=data, up=up, down=down)
        # Reshape array so that slicing operations work on resampled data
        resampled_iqdata = np.reshape(resampled_iqdata, newshape=(1, len(resampled_iqdata)))
        if resampled_iqdata.shape[1] > n:
            resampled_iqdata = resampled_iqdata[:, :n]
        else:
            empty_array = np.zeros(resampled_iqdata.shape, dtype=resampled_iqdata.dtype)
            empty_array[:, : resampled_iqdata.shape[1]] = resampled_iqdata
    else:
        raise NotImplementedError
    if isinstance(signal, Recording):
        return Recording(data=resampled_iqdata, metadata=signal.metadata)
    else:
        return resampled_iqdata
--- a/src/ria_toolkit/utils/init.py
+++ b/src/ria_toolkit/utils/init.py
@ -0,0 +1,9 @@
 """
 The Helpers module contains a bunch of helper functions, including array conversion utilities.
 """
 __all__ = [
    "bytes_to_samples",
 ]
 from .bytes_to_samples import bytes_to_samples
--- a/src/ria_toolkit/utils/array_conversion.py
+++ b/src/ria_toolkit/utils/array_conversion.py
@ -0,0 +1,80 @@
 """
 IQ data represents the in-phase (I) and quadrature (Q) components of a signal. There are two ways to represent
 single-channel IQ signals:
 #. **Complex 1xN Format:** In the complex 1xN format, the IQ data is represented as a 2D array of complex numbers with
   shape 1xN. In this format, the real part of each complex number represents the in-phase component, while the
   imaginary part represents the quadrature component.
 #. **Real 2xN Format:** In the real 2xN format, the IQ data is represented as a 2D array of real numbers with shape
   2xN. In this format, the first row contains the in-phase components, while the second row contains the quadrature
   components.
 This submodule provides functions to verify and convert between these two formats.
 """
 import numpy as np
 from numpy.typing import ArrayLike
 def convert_to_2xn(arr: np.ndarray) -> np.ndarray:
    """Convert arr to the real 2xN format. If arr is already real 2xN, then you'll get back a copy.
    :param arr: Array of IQ samples, in the complex 1XN format.
    :type arr: array_like
    :return: The provided signal, in the real 2xN format.
    :rtype: np.ndarray
    """
    if is_1xn(arr):
        return np.vstack((np.real(arr[0]), np.imag(arr[0])))
    elif is_2xn(arr):
        return np.copy(arr)
    else:
        raise ValueError("arr is neither complex 1xN nor real 2xN.")
 def convert_to_1xn(arr: np.ndarray) -> np.ndarray:
    """Convert arr to the complex 1xN format. If arr is already complex 1xN, then you'll get back a copy.
    :param arr: Array of IQ samples, in the real 2xN format.
    :type arr: np.ndarray
    :return: The provided signal, in the complex 1xN format.
    :rtype: np.ndarray
    """
    if is_2xn(arr):
        return np.expand_dims(a=arr[0, :] + 1j * arr[1, :], axis=0)
    elif is_1xn(arr):
        return np.copy(arr)
    else:
        raise ValueError("arr is neither complex 1xN nor real 2xN.")
 def is_1xn(arr: ArrayLike) -> bool:
    """
    :return: True is arr is complex 1xN, False otherwise.
    :rtype: bool
    """
    a = np.asarray(arr)
    if a.ndim == 2 and a.shape[0] == 1 and np.iscomplexobj(a):
        return True
    else:
        return False
 def is_2xn(arr: ArrayLike) -> bool:
    """
    :return: True is arr is real 2xN, False otherwise.
    :rtype: bool
    """
    a = np.asarray(arr)
    if a.ndim == 2 and a.shape[0] == 2 and not np.iscomplexobj(a):
        return True
    else:
        return False
--- a/src/ria_toolkit/utils/bytes_to_samples.py
+++ b/src/ria_toolkit/utils/bytes_to_samples.py
@ -0,0 +1,18 @@
 from numpy.typing import NDArray
 def bytes_to_samples(data: bytes) -> NDArray:
    """Convert bytes to IQ samples, in the complex 1xN format.
    :param data: Array of bytes
    :type data: bytes
    :return: Tape of IQ samples, as numpy complex type
    :rtype: np.ndarray
    """
    # samples = np.frombuffer(data, dtype=np.int16).astype(np.float32)
    # samples /= 2048
    # samples = samples[::2] + 1j * samples[1::2]
    # # samples = samples.view(np.complex64)
    # return samples
    raise NotImplementedError
--- a/src/ria_toolkit/viz/init.py
+++ b/src/ria_toolkit/viz/init.py
@ -0,0 +1,12 @@
 """
 The package contains assorted plotting and report generation utilities to help visualize RIA components such as
 recordings and radio datasets.
 """
 __all__ = [
    "view_annotations",
    "view_channels",
    "view_sig",
 ]
 from .view_signal import view_annotations, view_channels, view_sig
--- a/src/ria_toolkit/viz/recording.py
+++ b/src/ria_toolkit/viz/recording.py
@ -0,0 +1,192 @@
 import numpy as np
 import plotly.graph_objects as go
 import scipy.signal as signal
 from plotly.graph_objs import Figure
 from scipy.fft import fft, fftshift
 from utils.data import Recording
 def spectrogram(rec: Recording, thumbnail: bool = False) -> Figure:
    """Create a spectrogram for the recording.
    :param rec: Signal to plot.
    :type rec: utils.data.Recording
    :param thumbnail: Whether to return a small thumbnail version or full plot.
    :type thumbnail: bool
    :return: Spectrogram, as a Plotly figure.
    """
    complex_signal = rec.data[0]
    sample_rate = int(rec.metadata.get("sample_rate", 1))
    plot_length = len(complex_signal)
    # Determine FFT size
    if plot_length < 2000:
        fft_size = 64
    elif plot_length < 10000:
        fft_size = 256
    elif plot_length < 1000000:
        fft_size = 1024
    else:
        fft_size = 2048
    frequencies, times, Sxx = signal.spectrogram(
        complex_signal,
        fs=sample_rate,
        nfft=fft_size,
        nperseg=fft_size,
        noverlap=fft_size // 8,
        scaling="density",
        mode="complex",
        return_onesided=False,
    )
    # Convert complex values to amplitude and then to log scale for visualization
    Sxx_magnitude = np.abs(Sxx)
    Sxx_log = np.log10(Sxx_magnitude + 1e-6)
    # Normalize spectrogram values between 0 and 1 for plotting
    Sxx_log_shifted = Sxx_log - np.min(Sxx_log)
    Sxx_log_norm = Sxx_log_shifted / np.max(Sxx_log_shifted)
    # Shift frequency bins and spectrogram rows so frequencies run from negative to positive
    frequencies_shifted = np.fft.fftshift(frequencies)
    Sxx_shifted = np.fft.fftshift(Sxx_log_norm, axes=0)
    fig = go.Figure(
        data=go.Heatmap(
            z=Sxx_shifted,
            x=times / 1e6,
            y=frequencies_shifted,
            colorscale="Viridis",
            zmin=0,
            zmax=1,
            reversescale=False,
            showscale=False,
        )
    )
    if thumbnail:
        fig.update_xaxes(showticklabels=False)
        fig.update_yaxes(showticklabels=False)
        fig.update_layout(
            template="plotly_dark",
            width=200,
            height=100,
            margin=dict(l=5, r=5, t=5, b=5),
            xaxis=dict(scaleanchor=None),
            yaxis=dict(scaleanchor=None),
        )
    else:
        fig.update_layout(
            title="Spectrogram",
            xaxis_title="Time [s]",
            yaxis_title="Frequency [Hz]",
            template="plotly_dark",
            height=300,
            width=800,
        )
    return fig
 def iq_time_series(rec: Recording) -> Figure:
    """Create a time series plot of the real and imaginary parts of signal.
    :param rec: Signal to plot.
    :type rec: utils.data.Recording
    :return: Time series plot as a Plotly figure.
    """
    complex_signal = rec.data[0]
    sample_rate = int(rec.metadata.get("sample_rate", 1))
    plot_length = len(complex_signal)
    t = np.arange(0, plot_length, 1) / sample_rate
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t, y=complex_signal.real, mode="lines", name="I (In-phase)", line=dict(width=0.6)))
    fig.add_trace(go.Scatter(x=t, y=complex_signal.imag, mode="lines", name="Q (Quadrature)", line=dict(width=0.6)))
    fig.update_layout(
        title="IQ Time Series",
        xaxis_title="Time [s]",
        yaxis_title="Amplitude",
        template="plotly_dark",
        height=300,
        width=800,
        showlegend=True,
    )
    return fig
 def frequency_spectrum(rec: Recording) -> Figure:
    """Create a frequency spectrum plot from the recording.
    :param rec: Input signal to plot.
    :type rec: utils.data.Recording
    :return: Frequency spectrum as a Plotly figure.
    """
    complex_signal = rec.data[0]
    center_frequency = int(rec.metadata.get("center_frequency", 0))
    sample_rate = int(rec.metadata.get("sample_rate", 1))
    epsilon = 1e-10
    spectrum = np.abs(fftshift(fft(complex_signal)))
    freqs = np.linspace(-sample_rate / 2, sample_rate / 2, len(complex_signal)) + center_frequency
    log_spectrum = np.log10(spectrum + epsilon)
    scaled_log_spectrum = (log_spectrum - log_spectrum.min()) / (log_spectrum.max() - log_spectrum.min())
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=freqs, y=scaled_log_spectrum, mode="lines", name="Spectrum", line=dict(width=0.4)))
    fig.update_layout(
        title="Frequency Spectrum",
        xaxis_title="Frequency [Hz]",
        yaxis_title="Magnitude",
        yaxis_type="log",
        template="plotly_dark",
        height=300,
        width=800,
        showlegend=False,
    )
    return fig
 def constellation(rec: Recording) -> Figure:
    """Create a constellation plot from the recording.
    :param rec: Input signal to plot.
    :type rec: utils.data.Recording
    :return: Constellation as a Plotly figure.
    """
    complex_signal = rec.data[0]
    # Downsample the IQ samples to a target number of points
    # This reduces the amount of data plotted, improving performance and interactivity
    #  without losing significant detail in the constellation visualization.
    target_number_of_points = 5000
    step = max(1, len(complex_signal) // target_number_of_points)
    i_ds = complex_signal.real[::step]
    q_ds = complex_signal.imag[::step]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=i_ds, y=q_ds, mode="lines", name="Constellation", line=dict(width=0.2)))
    fig.update_layout(
        title="Constellation",
        xaxis_title="In-phase (I)",
        yaxis_title="Quadrature (Q)",
        template="plotly_dark",
        height=400,
        width=400,
        showlegend=False,
        xaxis=dict(range=[-1.1, 1.1]),
        yaxis=dict(range=[-1.1, 1.1]),
    )
    return fig