Populating initial source code from RIA Utils project

2025-09-02 11:35:41 -04:00 · 2025-09-02 11:35:41 -04:00 · d8a7dc16be
commit d8a7dc16be
parent 25e39d3544
22 changed files with 4912 additions and 0 deletions
--- a/src/ria_toolkit/adt/init.py
+++ b/src/ria_toolkit/adt/init.py
@ -0,0 +1,8 @@
+"""
+The Data package contains abstract data types tailored for radio machine learning, such as ``Recording``, as well
+as the abstract interfaces for the radio dataset and radio dataset builder framework.
+"""
+
+__all__ = ["Annotation", "Recording"]
+from .annotation import Annotation
+from .recording import Recording
--- a/src/ria_toolkit/adt/annotation.py
+++ b/src/ria_toolkit/adt/annotation.py
@ -0,0 +1,128 @@
+from __future__ import annotations
+
+import json
+from typing import Any, Optional
+
+from sigmf import SigMFFile
+
+
+class Annotation:
+    """Signal annotations are labels or additional information associated with specific data points or segments within
+    a signal. These annotations could be used for tasks like supervised learning, where the goal is to train a model
+    to recognize patterns or characteristics in the signal associated with these annotations.
+
+    Annotations can be used to label interesting points in your recording.
+
+    :param sample_start: The index of the starting sample of the annotation.
+    :type sample_start: int
+    :param sample_count: The index of the ending sample of the annotation, inclusive.
+    :type sample_count: int
+    :param freq_lower_edge: The lower frequency of the annotation.
+    :type freq_lower_edge: float
+    :param freq_upper_edge: The upper frequency of the annotation.
+    :type freq_upper_edge: float
+    :param label: The label that will be displayed with the bounding box in compatible viewers including IQEngine.
+     Defaults to an emtpy string.
+    :type label: str, optional
+    :param comment: A human-readable comment. Defaults to an empty string.
+    :type comment: str, optional
+    :param detail: A dictionary of user defined annotation-specific metadata. Defaults to None.
+    :type detail: dict, optional
+    """
+
+    def __init__(
+        self,
+        sample_start: int,
+        sample_count: int,
+        freq_lower_edge: float,
+        freq_upper_edge: float,
+        label: Optional[str] = "",
+        comment: Optional[str] = "",
+        detail: Optional[dict] = None,
+    ):
+        """Initialize a new Annotation instance."""
+        self.sample_start = int(sample_start)
+        self.sample_count = int(sample_count)
+        self.freq_lower_edge = float(freq_lower_edge)
+        self.freq_upper_edge = float(freq_upper_edge)
+        self.label = str(label)
+        self.comment = str(comment)
+
+        if detail is None:
+            self.detail = {}
+        elif not _is_jsonable(detail):
+            raise ValueError(f"Detail object is not json serializable: {detail}")
+        else:
+            self.detail = detail
+
+    def is_valid(self) -> bool:
+        """
+        Check that the annotation sample count is > 0 and the freq_lower_edge<freq_upper_edge.
+
+        :returns: True if valid, False if not.
+        """
+
+        return self.sample_count > 0 and self.freq_lower_edge < self.freq_upper_edge
+
+    def overlap(self, other):
+        """
+        Quantify how much the bounding box in this annotation overlaps with another annotation.
+
+        :param other: The other annotation.
+        :type other: Annotation
+
+        :returns: The area of the overlap in samples*frequency, or 0 if they do not overlap."""
+
+        sample_overlap_start = max(self.sample_start, other.sample_start)
+        sample_overlap_end = min(self.sample_start + self.sample_count, other.sample_start + other.sample_count)
+
+        freq_overlap_start = max(self.freq_lower_edge, other.freq_lower_edge)
+        freq_overlap_end = min(self.freq_upper_edge, other.freq_upper_edge)
+
+        if freq_overlap_start >= freq_overlap_end or sample_overlap_start >= sample_overlap_end:
+            return 0
+        else:
+            return (sample_overlap_end - sample_overlap_start) * (freq_overlap_end - freq_overlap_start)
+
+    def area(self):
+        """
+        The 'area' of the bounding box, samples*frequency.
+        Useful to quantify annotation size.
+
+        :returns: sample length multiplied by bandwidth."""
+
+        return self.sample_count * (self.freq_upper_edge - self.freq_lower_edge)
+
+    def __eq__(self, other: Annotation) -> bool:
+        return self.__dict__ == other.__dict__
+
+    def to_sigmf_format(self):
+        """
+        Returns a JSON dictionary representing this annotation formatted to be saved in a .sigmf-meta file.
+        """
+
+        annotation_dict = {SigMFFile.START_INDEX_KEY: self.sample_start, SigMFFile.LENGTH_INDEX_KEY: self.sample_count}
+
+        annotation_dict["metadata"] = {
+            SigMFFile.LABEL_KEY: self.label,
+            SigMFFile.COMMENT_KEY: self.comment,
+            SigMFFile.FHI_KEY: self.freq_upper_edge,
+            SigMFFile.FLO_KEY: self.freq_lower_edge,
+            "ria:detail": self.detail,
+        }
+
+        if _is_jsonable(annotation_dict):
+            return annotation_dict
+        else:
+            raise ValueError("Annotation dictionary was not json serializable.")
+
+
+def _is_jsonable(x: Any) -> bool:
+    """
+    :return: True if x is JSON serializable, False otherwise.
+    """
+    try:
+        json.dumps(x)
+        return True
+    except (TypeError, OverflowError):
+        return False
--- a/src/ria_toolkit/adt/datasets/init.py
+++ b/src/ria_toolkit/adt/datasets/init.py
@ -0,0 +1,12 @@
+"""
+The Radio Dataset Subpackage defines the abstract interfaces and framework components for the management of machine
+learning datasets tailored for radio signal processing.
+"""
+
+__all__ = ["RadioDataset", "IQDataset", "SpectDataset", "DatasetBuilder", "split", "random_split"]
+
+from .dataset_builder import DatasetBuilder
+from .iq_dataset import IQDataset
+from .radio_dataset import RadioDataset
+from .spect_dataset import SpectDataset
+from .split import random_split, split
--- a/src/ria_toolkit/adt/datasets/dataset_builder.py
+++ b/src/ria_toolkit/adt/datasets/dataset_builder.py
@ -0,0 +1,137 @@
+"""
+A `DatasetBuilder` is a creator class that manages the download, preparation, and creation of radio datasets.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+
+from packaging.version import Version
+
+from utils._utils.abstract_attribute import abstract_attribute
+from utils.data.datasets.license.dataset_license import DatasetLicense
+from utils.data.datasets.radio_dataset import RadioDataset
+
+
+class DatasetBuilder(ABC):
+    """Abstract interface for radio dataset builders. These builder produce radio datasets for common and project
+    datasets related to radio science.
+
+    This class should not be instantiated directly. Instead, subclass it to define specific builders for different
+    datasets.
+    """
+
+    _url: str = abstract_attribute()
+    _SHA256: str  # SHA256 checksum.
+    _name: str = abstract_attribute()
+    _author: str = abstract_attribute()
+    _license: DatasetLicense = abstract_attribute()
+    _version: Version = abstract_attribute()
+    _latest_version: Version = None
+
+    def __init__(self):
+        super().__init__()
+
+    @property
+    def name(self) -> str:
+        """
+        :return: The name of the dataset.
+        :type: str
+        """
+        return self._name
+
+    @property
+    def author(self) -> str:
+        """
+        :return: The author of the dataset.
+        :type: str
+        """
+        return self._author
+
+    @property
+    def url(self) -> str:
+        """
+        :return: The URL where the dataset was accessed.
+        :type: str
+        """
+        return self._url
+
+    @property
+    def sha256(self) -> Optional[str]:
+        """
+        :return: The SHA256 checksum, or None if not set.
+        :type: str
+        """
+        return self._SHA256
+
+    @property
+    def md5(self) -> Optional[str]:
+        """
+        :return: The MD5 checksum, or None if not set.
+        :type: str
+        """
+        return self._MD5
+
+    @property
+    def version(self) -> Version:
+        """
+        :return: The version identifier of the dataset.
+        :type: Version Identifier
+        """
+        return self._version
+
+    @property
+    def latest_version(self) -> Optional[Version]:
+        """
+        :return: The version identifier of the latest available version of the dataset, or None if not set.
+        :type: Version Identifier or None
+        """
+        return self._latest_version
+
+    @property
+    def license(self) -> DatasetLicense:
+        """
+        :return: The dataset license information.
+        :type: DatasetLicense
+        """
+        return self._license
+
+    @property
+    def info(self) -> dict[str, Any]:
+        """
+        :return: Information about the dataset including the name, author, and version of the dataset.
+        :rtype: dict
+        """
+        # TODO: We should increase the amount of information that's included here. See the information included in
+        #  tdfs.core.DatasetInfo for more: https://www.tensorflow.org/datasets/api_docs/python/tfds/core/DatasetInfo.
+        return {
+            "name": self.name,
+            "author": self.author,
+            "url": self.url,
+            "sha256": self.sha256,
+            "md5": self.md5,
+            "version": self.version,
+            "license": self.license,
+            "latest_version": self.latest_version,
+        }
+
+    @abstractmethod
+    def download_and_prepare(self) -> None:
+        """Download and prepare the dataset for use as an HDF5 source file.
+
+        Once an HDF5 source file has been prepared, the downloaded files are deleted.
+        """
+        pass
+
+    @abstractmethod
+    def as_dataset(self, backend: str) -> RadioDataset:
+        """A factory method to manage the creation of radio datasets.
+
+        :param backend: Backend framework to use ("pytorch" or "tensorflow").
+        :type backend: str
+
+        Note: Depending on your installation, not all backends may be available.
+
+        :return: A new RadioDataset based on the signal representation and specified backend.
+        :type: RadioDataset
+        """
+        pass
--- a/src/ria_toolkit/adt/datasets/h5helpers.py
+++ b/src/ria_toolkit/adt/datasets/h5helpers.py
@ -0,0 +1,221 @@
+import os
+
+import h5py
+import numpy as np
+
+
+def copy_dataset_entry_by_index(
+    source: str | os.PathLike, destination: str | os.PathLike, dataset_path: str, idx: int
+) -> None:
+    """
+    Copies an entry from a dataset based on an index from the source HDF5 file to the destination HDF5 file.
+
+    :param source: The name of the original HDF5 file.
+    :type source: str
+    :param destination: The name of the new HDF5 file.
+    :type destination: str
+    :param dataset_path: The path of the dataset from the root of the file.
+    :type dataset_path: str
+    :param idx: The index of the specified example.
+    :type idx: int
+
+    :return: None
+    """
+    # TODO: Generalize so that source and destination can be file objects or strings
+    with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
+        original_ds = original_file[dataset_path]
+
+        entry = original_ds[idx]
+        new_ds = new_file[dataset_path]
+        new_ds.resize(new_ds.shape[0] + 1, axis=0)
+        new_ds[-1] = entry
+
+
+def copy_over_example(source: str | os.PathLike, destination: str | os.PathLike, idx: int) -> None:
+    """
+    Copies over an example and it's corresponding metadata located at the given index to a new file.
+        It appends the new example to the end of the new file.
+
+    :param source: The name of the original HDF5 file.
+    :type source: str or os.PathLike
+    :param destination: The name of the new HDF5 file.
+    :type destination: str or os.PathLike
+    :param idx: The index of the example within the dataset.
+    :type idx: int
+
+    :return: None
+    """
+
+    with h5py.File(source, "r") as original_file, h5py.File(destination, "a") as new_file:
+        ds, md = original_file["data"], original_file["metadata/metadata"]
+
+        new_ds, new_md = new_file["data"], new_file["metadata/metadata"]
+
+        new_ds.resize(new_ds.shape[0] + 1, axis=0)
+        new_md.resize(new_md.shape[0] + 1, axis=0)
+
+        new_ds[-1], new_md[-1] = ds[idx], md[idx]
+
+
+def append_entry_inplace(source: str | os.PathLike, dataset_path: str, entry: np.ndarray) -> None:
+    """
+    Appends an entry to the specified dataset of the source HDF5 file. This operation is done inplace.
+
+    :param source: The name of the source HDF5 file.
+    :type source: str or os.PathLike
+    :param dataset_path: The path of the dataset from the root of the file.
+    :type dataset_path: str
+    :param entry: The entry that is being copied.
+    :type entry: np.ndarray
+
+    :return: None
+    """
+    # TODO: Generalize so that source can be file object or string
+    with h5py.File(source, "a") as new_file:
+        new_ds = new_file[dataset_path]
+        new_ds.resize(new_ds.shape[0] + 1, axis=0)
+        new_ds[-1] = entry
+
+
+def duplicate_entry_inplace(source: str | os.PathLike, dataset_path: str, idx: int) -> None:
+    """
+    Appends the entry at index to the end of the dataset. This operation is done inplace.
+
+    :param source: The name of the source HDF5 file.
+    :type source: str or os.PathLike
+    :param dataset_path: The path of the dataset from the root of the file. This dataset is usually
+      'data' or 'metadata/metadata'.
+    :type dataset_path: str
+    :param idx: The index of the example within the dataset.
+    :type idx: int
+
+    :return: None
+    """
+    # This function appends to dataset, so upon dataset creation, chunks has to = True and max_size has to = None
+    with h5py.File(source, "a") as f:
+        ds = f[dataset_path]
+        entry = ds[idx]
+        ds.resize(ds.shape[0] + 1, axis=0)
+        ds[-1] = entry
+
+
+def copy_file(original_source: str | os.PathLike, new_source: str | os.PathLike) -> None:
+    """Copies contents of source HDF5 file to a new HDF5 file.
+
+    :param original_source: The name of the original HDF5 source file.
+    :type original_source: str or os.PathLike
+    :param new_source: The copy of the HDF5 source file.
+    :type new_source: str or os.PathLike
+
+    :return: None
+    """
+    original_file = h5py.File(original_source, "r")
+
+    with h5py.File(new_source, "w") as new_file:
+        for key in original_file.keys():
+            original_file.copy(key, new_file)
+
+    original_file.close()
+
+
+def make_empty_clone(original_source: str | os.PathLike, new_source: str | os.PathLike, example_length: int) -> None:
+    """Creates a new HDF5 file with the same structure but will leave metadata and dataset empty for operations.
+
+    :param original_source: The name of the original HDF5 source file.
+    :type original_source: str or os.PathLike
+    :param new_source: The name of the new HDF5 source file.
+    :type new_source: str or os.PathLike
+    :param example_length: The desired length of an example in the new file.
+    :type example_length: int
+
+    :return: None
+    """
+
+    with h5py.File(new_source, "w") as new_file, h5py.File(original_source, "r") as original_file:
+        for key in original_file.keys():
+            if key == "data":
+                ds = original_file["data"]
+                channels = ds.shape[1]
+                new_file.create_dataset(
+                    "data",
+                    shape=(0, channels, example_length),
+                    chunks=True,
+                    maxshape=(None, None, None),
+                    dtype=original_file["data"].dtype,
+                )
+            elif key == "metadata":
+                new_metadata_group = new_file.create_group("metadata")
+                new_metadata_group.create_dataset(
+                    "metadata",
+                    shape=(0,),
+                    chunks=True,
+                    maxshape=(None,),
+                    dtype=original_file["metadata/metadata"].dtype,
+                )
+            else:
+                original_file.copy(key, new_file)
+
+
+def delete_example_inplace(source: str | os.PathLike, idx: int) -> None:
+    """Deletes an example and it's corresponding metadata located at the given index.
+        This deletion is done by creating a temporary dataset and copying all contents
+        to the temporary dataset except for the example at idx. This operation is inplace.
+
+    :param source: The name of the source HDF5 file.
+    :type source: str or os.PathLike
+    :param idx: The index of the example and metadata to be deleted.
+    :type idx: int
+
+    :return: None
+    """
+
+    with h5py.File(source, "a") as f:
+        ds, md = f["data"], f["metadata/metadata"]
+        m, c, n = ds.shape
+        assert 0 <= idx <= m - 1
+        assert len(ds) == len(md)
+
+        new_ds = f.create_dataset(
+            "data.temp",
+            shape=(m - 1, c, n),
+            chunks=True,
+            dtype=ds.dtype,
+            maxshape=(None, None, None),  # Required to allow future mutations which expand the shape
+        )
+        new_md = f.create_dataset(
+            "metadata/metadata.temp", shape=len(md) - 1, chunks=True, dtype=md.dtype, maxshape=(None,)
+        )
+
+        for row in range(idx):
+            new_ds[row], new_md[row] = ds[row], md[row]
+
+        for row in range(idx + 1, len(md)):
+            new_ds[row - 1], new_md[row - 1] = ds[row], md[row]
+
+        del f["data"]
+        del f["metadata/metadata"]
+
+        f.move("data.temp", "data")
+        f.move("metadata/metadata.temp", "metadata/metadata")
+
+
+def overwrite_file(source: str | os.PathLike, new_data: np.ndarray) -> None:
+    """
+    Overwrites data in an HDF5 file with new data.
+
+    :param source: The copy of the HDF5 source file.
+    :type source: str or os.PathLike
+    :param new_data: The updated copy of the data that should be stored.
+    :type new_data: np.ndarray
+
+    :return: None
+    """
+
+    # TODO: Might need to pass in dataset_path instead of datastet_name depending on file structure
+    # Update copy to include augmented data
+
+    with h5py.File(source, "r+") as f:
+        ds_name = tuple(f.keys())[0]
+        del f[ds_name]
+        f.create_dataset(ds_name, data=new_data)
+        f.close()
--- a/src/ria_toolkit/adt/datasets/iq_dataset.py
+++ b/src/ria_toolkit/adt/datasets/iq_dataset.py
@ -0,0 +1,210 @@
+from __future__ import annotations
+
+import os
+from abc import ABC
+from typing import Optional
+
+import h5py
+import numpy as np
+
+from utils.data.datasets.h5helpers import (
+    append_entry_inplace,
+    copy_dataset_entry_by_index,
+)
+from utils.data.datasets.radio_dataset import RadioDataset
+
+
+class IQDataset(RadioDataset, ABC):
+    """An ``IQDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
+    radiofrequency (RF) signals represented as In-phase (I) and Quadrature (Q) samples.
+
+    For machine learning tasks that involve processing spectrograms, please use
+    utils.data.datasets.SpectDataset instead.
+
+    This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
+    should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
+    learning backends.
+
+    :param source: Path to the dataset source file. For more information on dataset source files
+        and their format, see :doc:`radio_datasets`.
+    :type source: str or os.PathLike
+    """
+
+    def __init__(self, source: str | os.PathLike):
+        """Create a new IQDataset."""
+        super().__init__(source=source)
+
+    @property
+    def shape(self) -> tuple[int]:
+        """IQ datasets are M x C x N, where M is the number of examples, C is the number of channels, N is the length
+         of the signals.
+
+        :return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
+            dataset dimensions.
+        :type: tuple of ints
+        """
+        return super().shape
+
+    def trim_examples(
+        self, trim_length: int, keep: Optional[str] = "start", inplace: Optional[bool] = False
+    ) -> IQDataset | None:
+        """Trims all examples in a dataset to a desired length.
+
+        :param trim_length: The desired length of the trimmed examples.
+        :type trim_length: int
+        :param keep: Specifies the part of the example to keep. Defaults to "start".
+            The options are:
+            - "start"
+            - "end"
+            - "middle"
+            - "random"
+        :type keep: str, optional
+        :param inplace: If True, the operation modifies the existing source file directly and returns None.
+            If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
+            dataset unchanged. Default is False.
+        :type inplace: bool
+
+        :raises ValueError: If trim_length is greater than or equal to the length of the examples.
+        :raises ValueError: If value of keep is not recognized.
+        :raises ValueError: If specified trim length is invalid for middle index.
+
+        :return: The dataset that is composed of shorter examples.
+        :rtype: IQDataset
+
+         **Examples:**
+
+        >>> from ria.dataset_manager.builders import AWGN_Builder()
+        >>> builder = AWGN_Builder()
+        >>> builder.download_and_prepare()
+        >>> ds = builder.as_dataset()
+        >>> ds.shape
+        (5, 1, 3)
+        >>> new_ds = ds.trim_examples(2)
+        >>> new_ds.shape
+        (5, 1, 2)
+        """
+
+        keep = keep.lower()
+
+        channels, example_length = np.shape(self[0])
+
+        if trim_length >= example_length:
+            raise ValueError(f"Trim length must be less than {example_length}")
+
+        if keep not in {"start", "end", "middle", "random"}:
+            raise ValueError('keep must be "start", "end", "middle", or "random"')
+
+        start = None
+        if keep == "middle":
+            start = int(example_length / 2)
+            if start + trim_length > example_length:
+                raise ValueError(f"Trim length of {trim_length} is invalid for middle index of: {start} ")
+
+        elif keep == "random":
+            start = np.random.randint(0, example_length - trim_length + 1)
+
+        if not inplace:
+            ds = self._create_next_dataset(example_length=trim_length)
+
+        with h5py.File(self.source, "a") as f:
+            data = f["data"]
+            for idx in range(len(self)):
+
+                trimmed_example = generate_trimmed_example(
+                    example=data[idx],
+                    keep=keep,
+                    trim_length=trim_length,
+                    start=start,
+                )
+
+                if not inplace:
+                    append_entry_inplace(source=ds.source, dataset_path="data", entry=trimmed_example)
+                    copy_dataset_entry_by_index(
+                        source=self.source, destination=ds.source, dataset_path="metadata/metadata", idx=idx
+                    )
+
+                else:
+                    trimmed_example = np.pad(
+                        trimmed_example, ((0, 0), (0, example_length - trim_length)), "constant", constant_values=0
+                    )
+                    data[idx] = trimmed_example
+
+            if not inplace:
+                return ds
+            else:
+                data.resize(trim_length, axis=2)
+
+    def split_examples(
+        self, split_factor: Optional[int] = None, example_length: Optional[int] = None, inplace: Optional[bool] = False
+    ) -> IQDataset | None:
+        """If the current example length is not evenly divisible by the provided example_length, excess samples are
+        discarded. Excess examples are always at the end of the slice. If the split factor results in non-integer
+        example lengths for the new example chunks, it rounds down.
+
+            For example:
+
+
+            Requires either split_factor or example_length to be specified but not both. If both are provided,
+            split factor will be used by default, and a warning will be raised.
+
+        :param split_factor: the number of new example chunks produced from each original example, defaults to None.
+        :type split_factor: int, optional
+        :param example_length: the example length of the new example chunks, defaults to None.
+        :type example_length: int, optional
+        :param inplace: If True, the operation modifies the existing source file directly and returns None.
+            If False, the operation creates a new dataset cbject and corresponding source file, leaving the original
+            dataset unchanged. Default is False.
+        :type inplace: bool, optional
+
+        :return: A dataset with more examples that are shorter.
+        :rtype: IQDataset
+
+        **Examples:**
+
+        If the dataset has 100 examples of length 1024 and the split factor is 2, the resulting dataset
+        will have 200 examples of 512. No samples have been discarded.
+
+        If the example dataset has 100 examples of length 1024 and the example length is 100, the resulting dataset
+        will have 1000 examples of length 100. The remaining 24 samples from each example have been discarded.
+        """
+
+        if split_factor is not None and example_length is not None:
+            # Raise warning and use split factor
+            raise Warning("split_factor and example_length should not both be specified.")
+
+        if not inplace:
+            # ds = self.create_new_dataset(example_length=example_length)
+            pass
+
+        raise NotImplementedError
+
+
+def generate_trimmed_example(
+    example: np.ndarray, keep: str, trim_length: int, start: Optional[int] = None
+) -> np.ndarray:
+    """Takes in an IQ example as input and returns a trimmed example.
+
+    :param example: The example to be trimmed.
+    :type example: np.ndarray
+    :param keep: The position the trimming occurs from.
+    :type keep: str
+    :param trim_length: The desired length of the trimmed example:
+    :type trim_length: int
+    :param start: The starting index if keep = "middle" or "random"
+    :type start: int, optional
+
+    :return: The trimmed example
+    :rtype: np.ndarray
+    """
+
+    if keep == "start":
+        return example[:, :trim_length]
+
+    elif keep == "end":
+        return example[:, -trim_length:]
+
+    elif keep == "middle":
+        return example[:, start : start + trim_length]
+
+    else:
+        return example[:, start : start + trim_length]
--- a/src/ria_toolkit/adt/datasets/license/init.py
+++ b/src/ria_toolkit/adt/datasets/license/init.py
@ -0,0 +1,211 @@
+"""
+This package contains the ``DatasetLicense`` class and a bunch of off-the-shelf implementations for several common
+license types.
+
+Common license types for datasets courtesy of the University of Calgary:
+`Common license types for datasets and what they mean <https://libanswers.ucalgary.ca/faq/200582>`_
+
+.. note::
+
+   License descriptions are provided for informational purposes only and should not be construed as legal advice.
+   For legal guidance, please refer to official licence documentation and consult with legal professionals specializing
+   in software and dataset licensing.
+
+.. note::
+
+    When licensing datasets, it's recommended to use licenses specifically designed for data, rather than using
+    software licenses such as MIT, Apache, or GPL.
+
+"""
+
+__all__ = [
+    "DatasetLicense",
+    "PUBLIC_DOMAIN",
+    "CC_0",
+    "CC_BY",
+    "CC_BY_NC",
+    "CC_BY_NC_ND",
+    "CC_BY_NC_SA",
+    "CC_BY_ND",
+    "CC_BY_SA",
+    "ODC_BY",
+    "ODC_PDDL",
+    "ODC_ODbL",
+    "RESTRICTED",
+]
+
+from .dataset_license import DatasetLicense
+
+PUBLIC_DOMAIN = DatasetLicense(
+    name="Public Domain (No License)",
+    identifier=None,
+    description="Technically not a license, the public domain mark relinquishes all rights to a dataset and "
+    "dedicates the dataset to the public domain.",
+    licence="https://creativecommons.org/public-domain/pdm/",
+)
+"""
+`Public Domain <https://creativecommons.org/public-domain/pdm/>`_: Technically not a license, the public domain mark
+relinquishes all rights to a dataset and dedicates the dataset to the public domain.
+"""
+
+
+CC_0 = DatasetLicense(
+    name="Creative Commons Public Domain Dedication",
+    identifier="CC0-1.0",
+    description="A  Creative Commons license and is like a public domain dedication. The copyright holder "
+    "surrenders rights in a dataset using this license.",
+    licence="https://creativecommons.org/publicdomain/zero/1.0/",
+)
+"""
+`Creative Commons Public Domain Dedication <https://creativecommons.org/public-domain/pdm/>`_: A Creative Commons
+license and is like a public domain dedication. The copyright holder surrenders rights in a dataset using this license.
+"""
+
+
+ODC_PDDL = DatasetLicense(
+    name="Open Data Commons Public Domain Dedication and License",
+    identifier="PDDL-1.0",
+    description="This license is one of the Open Data Commons licenses and is like a public domain dedication. "
+    "The copyright holder surrenders rights in a dataset using this license.",
+    licence="https://opendatacommons.org/licenses/pddl/",
+)
+"""
+`Open Data Commons Public Domain Dedication and License <https://opendatacommons.org/licenses/pddl/>`_: This license
+is one of the Open Data Commons licenses and is like a public domain dedication. The copyright holder surrenders rights
+in a dataset using this license.
+"""
+
+
+CC_BY = DatasetLicense(
+    name="Creative Commons Attribution 4.0 International",
+    identifier="CC-BY-4.0",
+    description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
+    "the dataset so long as they give credit to the copyright holder.",
+    licence="https://creativecommons.org/licenses/by/4.0/",
+)
+"""
+`Creative Commons Attribution 4.0 International <https://creativecommons.org/licenses/by/4.0/>`_: This license is one
+of the open Creative Commons licenses and allows users to share and adapt the dataset so long as they give credit to
+the copyright holder.
+"""
+
+
+ODC_BY = DatasetLicense(
+    name="Open Data Commons Attribution License",
+    identifier="ODC-By-1.0",
+    description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
+    "dataset as long as they give credit to the copyright holder.",
+    licence="https://opendatacommons.org/licenses/by/",
+)
+"""
+`Open Data Commons Attribution License <https://opendatacommons.org/licenses/by/>`_: This license is one of the Open
+Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
+holder.
+"""
+
+
+CC_BY_SA = DatasetLicense(
+    name="Creative Commons Attribution-ShareAlike 4.0 International",
+    identifier="CC-BY-SA-4.0",
+    description="This license is one of the open Creative Commons licenses and allows users to share and adapt "
+    "the dataset as long as they give credit to the copyright holder and distribute any additions, "
+    "transformations or changes to the dataset under this same license.",
+    licence="https://creativecommons.org/licenses/by-sa/4.0/",
+)
+"""
+`Creative Commons Attribution-ShareAlike 4.0 International <https://creativecommons.org/licenses/by-sa/4.0/>`_: This
+license is one of the open Creative Commons licenses and allows users to share and adapt the dataset as long as they
+give credit to the copyright holder and distribute any additions, transformations or changes to the dataset under
+this same license.
+"""
+
+
+ODC_ODbL = DatasetLicense(
+    name="Open Data Commons Open Database License",
+    identifier="ODbL-1.0",
+    description="This license is one of the Open Data Commons licenses and allows users to share and adapt the "
+    "dataset as long as they give credit to the copyright holder and distribute any additions, "
+    "transformation or changes to the dataset.",
+    licence="https://opendatacommons.org/licenses/odbl/",
+)
+"""
+`Open Data Commons Open Database License <https://opendatacommons.org/licenses/odbl/>`_: This license is one of the
+Open Data Commons licenses and allows users to share and adapt the dataset as long as they give credit to the copyright
+holder and distribute any additions, transformation or changes to the dataset.
+"""
+
+
+CC_BY_NC = DatasetLicense(
+    name="Creative Commons Attribution-NonCommercial 4.0 International",
+    identifier="CC-BY-NC-4.0",
+    description="This license is one of the Creative Commons licenses and allows users to share and adapt the "
+    "dataset if they give credit to the copyright holder and do not use the dataset for any "
+    "commercial purposes.",
+    licence="https://creativecommons.org/licenses/by-nc/4.0/",
+)
+"""
+`Creative Commons Attribution-NonCommercial 4.0 International <https://creativecommons.org/licenses/by-nc/4.0/>`_: This
+license is one of the Creative Commons licenses and allows users to share and adapt the dataset if they give credit to
+the copyright holder and do not use the dataset for any commercial purposes.
+"""
+
+
+CC_BY_ND = DatasetLicense(
+    name="Creative Commons Attribution-NoDerivatives 4.0 International",
+    identifier="CC-BY-ND-4.0",
+    description="This license is one of the Creative Commons licenses and allows users to share the dataset if "
+    "they give credit to copyright holder, but they cannot make any additions, transformations or "
+    "changes to the dataset under this license.",
+    licence="https://creativecommons.org/licenses/by-nd/4.0/",
+)
+"""
+`Creative Commons Attribution-NoDerivatives 4.0 International <https://creativecommons.org/licenses/by-nd/4.0/>`_: This
+license is one of the Creative Commons licenses and allows users to share the dataset if they give credit to copyright
+holder, but they cannot make any additions, transformations or changes to the dataset under this license.
+"""
+
+
+CC_BY_NC_SA = DatasetLicense(
+    name="Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International",
+    identifier="CC-BY-NC-SA-4.0",
+    description="This license is one of the Creative Commons licenses and allows users to share the dataset only "
+    "if they (1) give credit to the copyright holder, (2) do not use the dataset for any commercial "
+    "purposes, and (3) distribute any additions, transformations or changes to the dataset under this "
+    "same license.",
+    licence="https://creativecommons.org/licenses/by-nc-sa/4.0/",
+)
+"""
+`Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
+<https://creativecommons.org/licenses/by-nc-sa/4.0/>`_: This license is one of the Creative Commons licenses and allows
+users to share the dataset only if they (1) give credit to the copyright holder, (2) do not use the dataset for any
+commercial purposes, and (3) distribute any additions, transformations or changes to the dataset under this same
+license.
+"""
+
+
+CC_BY_NC_ND = DatasetLicense(
+    name="Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International",
+    identifier="CC-BY-NC-ND-4.0",
+    description="This license is one of the Creative Commons licenses and allows users to use only your "
+    "unmodified dataset if they give credit to the copyright holder and do not share it for "
+    "commercial purposes. Users cannot make any additions, transformations or changes to the dataset"
+    "under this license.",
+    licence="https://creativecommons.org/licenses/by-nc-nd/4.0/",
+)
+"""
+`Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International
+<https://creativecommons.org/licenses/by-nc-nd/4.0/>`_: This license is one of the Creative Commons licenses and allows
+users to use only your unmodified dataset if they give credit to the copyright holder and do not share it for
+commercial purposes. Users cannot make any additions, transformations or changes to the dataset under this license.
+"""
+
+
+RESTRICTED = DatasetLicense(
+    name="Restricted (All Rights Reserved)",
+    identifier="Restricted",
+    description="All rights reserved. No permissions granted for use, modification, or distribution of the dataset.",
+    licence="Restricted (All Rights Reserved)",
+)
+"""
+Restricted (All Rights Reserved): No permissions granted for use, modification, or distribution of the dataset.
+"""
--- a/src/ria_toolkit/adt/datasets/license/dataset_license.py
+++ b/src/ria_toolkit/adt/datasets/license/dataset_license.py
@ -0,0 +1,13 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class DatasetLicense:
+    """
+    Represents a dataset license.
+    """
+
+    name: str  #: The name or title of the license.
+    identifier: str | None  #: SPDX short identifier, or None if one does not exist.
+    description: str  #: A description of the license.
+    licence: str  #: Full license text or URL if the license is available online.
--- a/src/ria_toolkit/adt/datasets/radio_dataset.py
+++ b/src/ria_toolkit/adt/datasets/radio_dataset.py
--- a/src/ria_toolkit/adt/datasets/spect_dataset.py
+++ b/src/ria_toolkit/adt/datasets/spect_dataset.py
@ -0,0 +1,57 @@
+from __future__ import annotations
+
+import os
+from abc import ABC
+
+from utils.data.datasets.radio_dataset import RadioDataset
+
+
+class SpectDataset(RadioDataset, ABC):
+    """A ``SpectDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing
+    radiofrequency (RF) signals represented as spectrograms. This class is integrated with vision frameworks,
+    allowing you to leverage models and techniques from the field of computer vision for analyzing and processing
+    radio signal spectrograms.
+
+    For machine learning tasks that involve processing on IQ samples, please use
+    utils.data.datasets.IQDataset instead.
+
+    This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class
+    should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine
+    learning backends.
+
+    :param source: Path to the dataset source file. For more information on dataset source files
+        and their format, see :doc:`radio_datasets`.
+    :type source: str or os.PathLike
+    """
+
+    def __init__(self, source: str | os.PathLike):
+        """Create a new SpectDataset."""
+        super().__init__(source=source)
+
+    @property
+    def shape(self) -> tuple[int]:
+        """Spectrogram datasets are M x C x H x W, where M is the number of examples, C is the number of image
+        channels, H is the height of the spectrogram, and W is the width of the spectrogram.
+
+        :return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding
+            dataset dimensions.
+        :type: tuple of ints
+        """
+        return super().shape
+
+    def default_augmentations(self) -> list[callable]:
+        """Returns the list of default augmentations for spectrogram datasets.
+
+        .. todo:: This method is not yet implemented.
+
+        :return: A list of default augmentations.
+        :rtype: list[callable]
+        """
+        # Consider the following list of default augmentations:
+        # #. horizontal_flip
+        # #. vertical_flip
+        # #. sharpen
+        # #. darken
+        # #. lighten
+        # #. linear_rotate
+        raise NotImplementedError
--- a/src/ria_toolkit/adt/datasets/split.py
+++ b/src/ria_toolkit/adt/datasets/split.py
@ -0,0 +1,317 @@
+import math
+import os
+from collections import Counter
+from typing import Optional
+
+import numpy as np
+from numpy.random import Generator
+
+from utils.data.datasets import RadioDataset
+from utils.data.datasets.h5helpers import copy_over_example, make_empty_clone
+
+
+def split(dataset: RadioDataset, lengths: list[int | float]) -> list[RadioDataset]:
+    """Split a radio dataset into non-overlapping new datasets of given lengths.
+
+    Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
+    synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
+    longer-form tapes into shorter units called slices.
+
+    For each slice in the dataset, the metadata should include the unique ID of the recording from which the example
+    was cut ('rec_id'). To avoid leakage, all examples with the same 'rec_id' are assigned only to one of the new
+    datasets. This ensures, for example, that slices cut from the same recording do not appear in both the training
+    and test datasets.
+
+    This restriction makes it challenging to generate datasets with the exact lengths specified. To get as close as
+    possible, this method uses a greedy algorithm, which assigns the recordings with the most slices first, working
+    down to those with the fewest. This may not always provide a perfect split, but it works well in most practical
+    cases.
+
+    This function is deterministic, meaning it will always produce the same split. For a random split, see
+    utils.data.datasets.random_split.
+
+    :param dataset: Dataset to be split.
+    :type dataset: RadioDataset
+    :param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
+     sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
+     provided, and any remainders will be distributed in round-robin fashion.
+    :type lengths: list of ints (lengths) or floats (fractions)
+
+    :return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
+     'lengths' list.
+    :rtype: list of RadioDataset
+
+    **Examples:**
+
+    >>> import random
+    >>> import string
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from utils.data.datasets import split
+
+    First, let's generate some random data:
+
+    >>> shape = (24, 1, 1024)  # 24 examples, each of length 1024
+    >>> real_part, imag_part = np.random.randint(0, 12, size=shape), np.random.randint(0, 79, size=shape)
+    >>> data = real_part + 1j * imag_part
+
+    Then, a list of recording IDs. Let's pretend this data was cut from 4 separate recordings:
+
+    >>> rec_id_options = [''.join(random.choices(string.ascii_lowercase + string.digits, k=256)) for _ in range(4)]
+    >>> rec_id = [np.random.choice(rec_id_options) for _ in range(shape[0])]
+
+    Using this data and metadata, let's initialize a dataset:
+
+    >>> metadata = pd.DataFrame(data={"rec_id": rec_id}).to_records(index=False)
+    >>> fid = os.path.join(os.getcwd(), "source_file.hdf5")
+    >>> ds = RadioDataset(source=fid)
+
+    Finally, let's do an 80/20 train-test split:
+
+    >>> train_ds, test_ds = split(ds, lengths=[0.8, 0.2])
+    """
+    if not isinstance(dataset, RadioDataset):
+        raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
+
+    lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
+
+    if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
+        raise ValueError("Dataset missing string field 'rec_id'.")
+
+    rec_ids = dict(Counter(dataset.metadata["rec_id"]))
+
+    if len(rec_ids) < len(lengths_):
+        raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
+
+    # Sort the rec_ids in descending order by frequency.
+    ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
+    sorted_indices = np.flip(np.argsort(freqs))
+    sorted_rec_ids = [ids[x] for x in sorted_indices]
+    sorted_freqs = [freqs[x] for x in sorted_indices]
+
+    # Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
+    split_key_ids = [[] for _ in range(len(lengths_))]
+    split_key_freqs = [[] for _ in range(len(lengths_))]
+
+    for i in range(len(rec_ids)):
+        # Find the subset whose current length is farthest from its target length.
+        current_lengths = [sum(subkey) for subkey in split_key_freqs]
+        diffs = [lengths_[j] - current_lengths[j] for j in range(len(lengths_))]
+        index = np.argmax(diffs)
+
+        # Add the 'rec_id' with the highest frequency to the subset farthest from its target.
+        split_key_freqs[index].append(sorted_freqs[i])
+        split_key_ids[index].append(sorted_rec_ids[i])
+
+    _validate_sublists(list_of_lists=split_key_ids, ids=ids)
+
+    return _split_datasets(dataset=dataset, key=split_key_ids)
+
+
+def random_split(
+    dataset: RadioDataset, lengths: list[int | float], generator: Optional[Generator] = None
+) -> list[RadioDataset]:
+    """Randomly split a radio dataset into non-overlapping new datasets of given lengths.
+
+    Recordings are long-form tapes, which can be obtained either from a software-defined radio (SDR) or generated
+    synthetically. Then, radio datasets are curated from collections of recordings by segmenting these
+    longer-form tapes into shorter units called slices.
+
+    For each slice in the dataset, the metadata should include the unique recording ID ('rec_id') of the recording
+    from which the example was cut. To avoid leakage, all examples with the same 'rec_id' are assigned only to one of
+    the new datasets. This ensures, for example, that slices cut from the same recording do not appear in both the
+    training and test datasets.
+
+    This restriction makes it unlikely that a random split will produce datasets with the exact lengths specified.
+    If it is important to ensure the closest possible split, consider using utils.data.datasets.split instead.
+
+    :param dataset: Dataset to be split.
+    :type dataset: RadioDataset
+    :param: lengths: Lengths or fractions of splits to be produced. If given a list of fractions, the list should
+     sum up to 1. The lengths will be computed automatically as ``floor(frac * len(dataset))`` for each fraction
+     provided, and any remainders will be distributed in round-robin fashion.
+    :type lengths: list of ints (lengths) or floats (fractions)
+
+    :param generator: Random generator. Defaults to None.
+    :type generator: NumPy Generator Object, optional.
+
+    :return: List of radio datasets. The number of returned datasets will correspond to the length of the provided
+     'lengths' list.
+    :rtype: list of RadioDataset
+
+    See Also:
+        utils.data.datasets.split: Usage is the same as for ``random_split()``.
+    """
+    if not isinstance(dataset, RadioDataset):
+        raise ValueError(f"'dataset' must be RadioDataset or one of its subclasses, got {type(dataset)}.")
+
+    lengths_ = _validate_lengths(dataset=dataset, lengths=lengths)
+
+    if generator is None:
+        rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
+    else:
+        rng = generator
+
+    if "rec_id" not in dataset.metadata or not isinstance(dataset.metadata["rec_id"][0], str):
+        raise ValueError("Dataset missing string field 'rec_id'.")
+
+    rec_ids = dict(Counter(dataset.metadata["rec_id"]))
+
+    if len(rec_ids) < len(lengths_):
+        raise ValueError(f"Not enough Recordings IDs in the dataset for a {len(lengths_)}-way split.")
+
+    ids, freqs = list(rec_ids.keys()), list(rec_ids.values())
+    sorted_indices = np.flip(np.argsort(freqs))
+    sorted_rec_ids = [ids[x] for x in sorted_indices]
+    sorted_freqs = [freqs[x] for x in sorted_indices]
+
+    # Preallocate keys, which we'll use to track which recordings are assigned to which subsets.
+    n = len(lengths_)
+    split_key_ids = [[] for _ in range(n)]
+    split_key_freqs = [[] for _ in range(n)]
+
+    # Taking from the bottom (least frequent), assign one recording to each subset. This is important to ensure we
+    # don't end up with any empty subsets, and serves to help randomize the results.
+    top_rec_ids, bottom_rec_ids = sorted_rec_ids[:-n], sorted_rec_ids[-n:]
+    top_freqs, bottom_freqs = sorted_freqs[:-n], sorted_freqs[-n:]
+    bottom_indices = rng.permutation(x=np.asarray(range(n)))
+
+    for i in range(n):
+        split_key_freqs[i].append(bottom_freqs[bottom_indices[i]])
+        split_key_ids[i].append(bottom_rec_ids[bottom_indices[i]])
+
+    for i in range(len(top_rec_ids)):
+        # Find the subset whose current length is farthest from its target length.
+        current_lengths = np.array([sum(subkey) for subkey in split_key_freqs])
+        diffs = np.array([lengths_[j] - current_lengths[j] for j in range(n)])
+
+        # Use the normalized diffs as probabilities. This results in a higher probability for larger diffs.
+        diffs = np.asarray([0 if d < 0 else d for d in diffs])  # Don't add to full or overfull subsets.
+        probabilities = diffs / sum(diffs)
+
+        index = rng.choice(range(n), p=probabilities)
+
+        # Add the 'rec_id' with the highest frequency to the chosen subset.
+        split_key_freqs[index].append(top_freqs[i])
+        split_key_ids[index].append(top_rec_ids[i])
+
+    _validate_sublists(list_of_lists=split_key_ids, ids=ids)
+
+    return _split_datasets(dataset=dataset, key=split_key_ids, generator=rng)
+
+
+def _validate_lengths(dataset: RadioDataset, lengths: list[int | float]) -> list[int]:
+    """Validate lengths. If lengths are fractions of splits, lengths will be computed automatically.
+
+    :param dataset: Dataset to be split.
+    :type dataset: RadioDataset
+    :param: lengths: Lengths or fractions of splits to be produced.
+    :type lengths: list of ints (lengths) or floats (fractions)
+
+    :return: List of lengths to be produced.
+    :rtype: list of ints
+    """
+    if not isinstance(lengths, list):
+        raise ValueError(f"'lengths' must be a list of ints or a list of floats, got {type(lengths)}.")
+
+    if len(lengths) < 2:
+        raise ValueError("'lengths' list must contain at least 2 elements.")
+
+    if not all(isinstance(sub, type(lengths[0])) for sub in lengths[1:]):
+        raise ValueError("All elements of 'lengths' must be of the same type.")
+
+    if sum(lengths) == len(dataset):
+        return [int(i) for i in lengths]
+
+    elif math.isclose(sum(lengths), 1, abs_tol=1e-9):
+        # Fractions of splits, which add to 1.
+        lengths_ = [math.floor(f * len(dataset)) for f in lengths]
+
+        # Distribute remainders in round-robin fashion to the lengths until there are no remainders left.
+        i = 0
+        while len(dataset) > sum(lengths_):
+            lengths_[i] = lengths_[i] + 1
+            i = i + 1
+
+        return lengths_
+
+    else:
+        raise ValueError("'lengths' must sum to either the length of 'dataset' or 1.")
+
+
+def _validate_sublists(list_of_lists: list[list[str]], ids: list[str]) -> None:
+    """Ensure that each ID is present in one and only one sublist."""
+    all_elements = [item for sublist in list_of_lists for item in sublist]
+
+    assert len(all_elements) == len(set(all_elements)) and list(set(ids)).sort() == list(set(all_elements)).sort()
+
+
+def _generate_split_source_filenames(
+    parent_dataset: RadioDataset, n_new_datasets: int, generator: Generator
+) -> list[str]:
+    """Generate source filenames for each new dataset.
+
+    Examples:
+
+    .../file_name.hdf5 -> [
+        .../file_name.split66ce07f-0.hdf5,
+        .../file_name.split66ce07f-1.hdf5,
+        .../file_name.split66ce07f-2.hdf5
+    ]
+
+    .../file_name.002.hdf5 -> [
+        .../file_name.002.split156afd7-0.hdf5,
+        .../file_name.002.split156afd7-1.hdf5,
+        .../file_name.002.split156afd7-2.hdf5
+    ]
+    """
+    parent_file_name = str(parent_dataset.source)
+    parent_base_name = os.path.splitext(parent_file_name)[0]
+
+    random_tag = generator.bytes(length=4).hex()[:7]
+
+    return [f"{parent_base_name}.split{random_tag}-{i}.hdf5" for i in range(n_new_datasets)]
+
+
+def _split_datasets(
+    dataset: RadioDataset, key: list[list[str]], generator: Optional[Generator] = None
+) -> list[RadioDataset]:
+    """Once we know how we'd like to split up the dataset (i.e., which slices are to be included in which new
+    dataset), this helper function does the actual split.
+
+    :param dataset: Dataset to be split.
+    :type dataset: RadioDataset
+    :param key: A key indicating which slices are to be included in which dataset. This is a list of lists, where
+     each sublist contains the recordings IDs of the slices to be included in the corresponding subset.
+    :type key: A list of lists
+
+    :param generator: Random generator. Defaults to None.
+    :type generator: NumPy Generator Object, optional.
+
+    :return: Non-overlapping datasets
+    :rtype: list of RadioDataset
+    """
+    if generator is None:
+        rng = np.random.default_rng(np.random.randint(0, np.iinfo(np.int32).max))
+    else:
+        rng = generator
+
+    new_source_filenames = _generate_split_source_filenames(
+        parent_dataset=dataset, n_new_datasets=len(key), generator=rng
+    )
+
+    for new_source in new_source_filenames:
+        make_empty_clone(original_source=dataset.source, new_source=new_source, example_length=len(dataset.data[0, 0]))
+
+    new_datasets = [dataset.__class__(source=new_source) for new_source in new_source_filenames]
+
+    rec_ids = list(dataset.metadata["rec_id"])
+
+    for i, sublist in enumerate(key):
+        for rec_id in sublist:
+            # The examples at these indices are part of the corresponding new dataset.
+            indices = [index for index, value in enumerate(rec_ids) if value == rec_id]
+            for idx in indices:
+                copy_over_example(source=dataset.source, destination=new_datasets[i].source, idx=idx)
+
+    return new_datasets
--- a/src/ria_toolkit/adt/recording.py
+++ b/src/ria_toolkit/adt/recording.py
@ -0,0 +1,763 @@
+from __future__ import annotations
+
+import copy
+import datetime
+import hashlib
+import json
+import os
+import re
+import time
+import warnings
+from typing import Any, Iterator, Optional
+
+import numpy as np
+from numpy.typing import ArrayLike
+from quantiphy import Quantity
+
+from utils.data.annotation import Annotation
+
+PROTECTED_KEYS = ["rec_id", "timestamp"]
+
+
+class Recording:
+    """Tape of complex IQ (in-phase and quadrature) samples with associated metadata and annotations.
+
+    Recording data is a complex array of shape C x N, where C is the number of channels
+    and N is the number of samples in each channel.
+
+    Metadata is stored in a dictionary of key value pairs,
+    to include information such as sample_rate and center_frequency.
+
+    Annotations are a list of :ref:`Annotation <utils.data.Annotation>`,
+    defining bounding boxes in time and frequency with labels and metadata.
+
+    Here, signal data is represented as a NumPy array. This class is then extended in the RIA Backends to provide
+    support for different data structures, such as Tensors.
+
+    Recordings are long-form tapes can be obtained either from a software-defined radio (SDR) or generated
+    synthetically. Then, machine learning datasets are curated from collection of recordings by segmenting these
+    longer-form tapes into shorter units called slices.
+
+    All recordings are assigned a unique 64-character recording ID, ``rec_id``. If this field is missing from the
+    provided metadata, a new ID will be generated upon object instantiation.
+
+    :param data: Signal data as a tape IQ samples, either C x N complex, where C is the number of
+        channels and N is number of samples in the signal. If data is a one-dimensional array of complex samples with
+        length N, it will be reshaped to a two-dimensional array with dimensions 1 x N.
+    :type data: array_like
+
+    :param metadata: Additional information associated with the recording.
+    :type metadata: dict, optional
+    :param annotations: A collection of ``Annotation`` objects defining bounding boxes.
+    :type annotations: list of Annotations, optional
+
+    :param dtype: Explicitly specify the data-type of the complex samples. Must be a complex NumPy type, such as
+        ``np.complex64`` or ``np.complex128``. Default is None, in which case the type is determined implicitly. If
+        ``data`` is a NumPy array, the Recording will use the dtype of ``data`` directly without any conversion.
+    :type dtype: numpy dtype object, optional
+    :param timestamp: The timestamp when the recording data was generated. If provided, it should be a float or integer
+        representing the time in seconds since epoch (e.g., ``time.time()``). Only used if the `timestamp` field is not
+        present in the provided metadata.
+    :type dtype: float or int, optional
+
+    :raises ValueError: If data is not complex 1xN or CxN.
+    :raises ValueError: If metadata is not a python dict.
+    :raises ValueError: If metadata is not json serializable.
+    :raises ValueError: If annotations is not a list of valid annotation objects.
+
+    **Examples:**
+
+    >>> import numpy
+    >>> from utils.data import Recording, Annotation
+
+    >>> # Create an array of complex samples, just 1s in this case.
+    >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+
+    >>> # Create a dictionary of relevant metadata.
+    >>> sample_rate = 1e6
+    >>> center_frequency = 2.44e9
+    >>> metadata = {
+    ...     "sample_rate": sample_rate,
+    ...     "center_frequency": center_frequency,
+    ...     "author": "me",
+    ... }
+
+    >>> # Create an annotation for the annotations list.
+    >>> annotations = [
+    ...     Annotation(
+    ...         sample_start=0,
+    ...         sample_count=1000,
+    ...         freq_lower_edge=center_frequency - (sample_rate / 2),
+    ...         freq_upper_edge=center_frequency + (sample_rate / 2),
+    ...         label="example",
+    ...     )
+    ... ]
+
+    >>> # Store samples, metadata, and annotations together in a convenient object.
+    >>> recording = Recording(data=samples, metadata=metadata, annotations=annotations)
+    >>> print(recording.metadata)
+    {'sample_rate': 1000000.0, 'center_frequency': 2440000000.0, 'author': 'me'}
+    >>> print(recording.annotations[0].label)
+    'example'
+    """
+
+    def __init__(  # noqa C901
+        self,
+        data: ArrayLike | list[list],
+        metadata: Optional[dict[str, any]] = None,
+        dtype: Optional[np.dtype] = None,
+        timestamp: Optional[float | int] = None,
+        annotations: Optional[list[Annotation]] = None,
+    ):
+
+        data_arr = np.asarray(data)
+
+        if np.iscomplexobj(data_arr):
+            # Expect C x N
+            if data_arr.ndim == 1:
+                self._data = np.expand_dims(data_arr, axis=0)  # N -> 1 x N
+            elif data_arr.ndim == 2:
+                self._data = data_arr
+            else:
+                raise ValueError("Complex data must be C x N.")
+
+        else:
+            raise ValueError("Input data must be complex.")
+
+        if dtype is not None:
+            self._data = self._data.astype(dtype)
+
+        assert np.iscomplexobj(self._data)
+
+        if metadata is None:
+            self._metadata = {}
+        elif isinstance(metadata, dict):
+            self._metadata = metadata
+        else:
+            raise ValueError(f"Metadata must be a python dict, but was {type(metadata)}.")
+
+        if not _is_jsonable(metadata):
+            raise ValueError("Value must be JSON serializable.")
+
+        if "timestamp" not in self.metadata:
+            if timestamp is not None:
+                if not isinstance(timestamp, (int, float)):
+                    raise ValueError(f"timestamp must be int or float, not {type(timestamp)}")
+                self._metadata["timestamp"] = timestamp
+            else:
+                self._metadata["timestamp"] = time.time()
+        else:
+            if not isinstance(self._metadata["timestamp"], (int, float)):
+                raise ValueError("timestamp must be int or float, not ", type(self._metadata["timestamp"]))
+
+        if "rec_id" not in self.metadata:
+            self._metadata["rec_id"] = generate_recording_id(data=self.data, timestamp=self._metadata["timestamp"])
+
+        if annotations is None:
+            self._annotations = []
+        elif isinstance(annotations, list):
+            self._annotations = annotations
+        else:
+            raise ValueError("Annotations must be a list or None.")
+
+        if not all(isinstance(annotation, Annotation) for annotation in self._annotations):
+            raise ValueError("All elements in self._annotations must be of type Annotation.")
+
+        self._index = 0
+
+    @property
+    def data(self) -> np.ndarray:
+        """
+        :return: Recording data, as a complex array.
+        :type: np.ndarray
+
+        .. note::
+
+           For recordings with more than 1,024 samples, this property returns a read-only view of the data.
+
+        .. note::
+
+           To access specific samples, consider indexing the object directly with ``rec[c, n]``.
+        """
+        if self._data.size > 1024:
+            # Returning a read-only view prevents mutation at a distance while maintaining performance.
+            v = self._data.view()
+            v.setflags(write=False)
+            return v
+        else:
+            return self._data.copy()
+
+    @property
+    def metadata(self) -> dict:
+        """
+        :return: Dictionary of recording metadata.
+        :type: dict
+        """
+        return self._metadata.copy()
+
+    @property
+    def annotations(self) -> list[Annotation]:
+        """
+        :return: List of recording annotations
+        :type: list of Annotation objects
+        """
+        return self._annotations.copy()
+
+    @property
+    def shape(self) -> tuple[int]:
+        """
+        :return: The shape of the data array.
+        :type: tuple of ints
+        """
+        return np.shape(self.data)
+
+    @property
+    def n_chan(self) -> int:
+        """
+        :return: The number of channels in the recording.
+        :type: int
+        """
+        return self.shape[0]
+
+    @property
+    def rec_id(self) -> str:
+        """
+        :return: Recording ID.
+        :type: str
+        """
+        return self.metadata["rec_id"]
+
+    @property
+    def dtype(self) -> str:
+        """
+        :return: Data-type of the data array's elements.
+        :type: numpy dtype object
+        """
+        return self.data.dtype
+
+    @property
+    def timestamp(self) -> float | int:
+        """
+        :return: Recording timestamp (time in seconds since epoch).
+        :type: float or int
+        """
+        return self.metadata["timestamp"]
+
+    @property
+    def sample_rate(self) -> float | None:
+        """
+        :return: Sample rate of the recording, or None is 'sample_rate' is not in metadata.
+        :type: str
+        """
+        return self.metadata.get("sample_rate")
+
+    @sample_rate.setter
+    def sample_rate(self, sample_rate: float | int) -> None:
+        """Set the sample rate of the recording.
+
+        :param sample_rate: The sample rate of the recording.
+        :type sample_rate: float or int
+
+        :return: None
+        """
+        self.add_to_metadata(key="sample_rate", value=sample_rate)
+
+    def astype(self, dtype: np.dtype) -> Recording:
+        """Copy of the recording, data cast to a specified type.
+
+        .. todo: This method is not yet implemented.
+
+        :param dtype: Data-type to which the array is cast. Must be a complex scalar type, such as ``np.complex64`` or
+            ``np.complex128``.
+        :type dtype: NumPy data type, optional
+
+        .. note: Casting to a data type with less precision can risk losing data by truncating or rounding values,
+          potentially resulting in a loss of accuracy and significant information.
+
+        :return: A new recording with the same metadata and data, with dtype.
+
+        TODO: Add example usage.
+        """
+        # Rather than check for a valid datatype, let's cast and check the result. This makes it easier to provide
+        # cross-platform support where the types are aliased across platforms.
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")  # Casting may generate user warnings. E.g., complex -> real
+            data = self.data.astype(dtype)
+
+        if np.iscomplexobj(data):
+            return Recording(data=data, metadata=self.metadata, annotations=self.annotations)
+        else:
+            raise ValueError("dtype must be a complex number scalar type.")
+
+    def add_to_metadata(self, key: str, value: Any) -> None:
+        """Add a new key-value pair to the recording metadata.
+
+        :param key: New metadata key, must be snake_case.
+        :type key: str
+        :param value: Corresponding metadata value.
+        :type value: any
+
+        :raises ValueError: If key is already in metadata or if key is not a valid metadata key.
+        :raises ValueError: If value is not JSON serializable.
+
+        :return: None.
+
+        **Examples:**
+
+        Create a recording and add metadata:
+
+        >>> import numpy
+        >>> from utils.data import Recording
+        >>>
+        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+        >>> metadata = {
+        >>>     "sample_rate": 1e6,
+        >>>     "center_frequency": 2.44e9,
+        >>> }
+        >>>
+        >>> recording = Recording(data=samples, metadata=metadata)
+        >>> print(recording.metadata)
+        {'sample_rate': 1000000.0,
+        'center_frequency': 2440000000.0,
+        'timestamp': 17369...,
+        'rec_id': 'fda0f41...'}
+        >>>
+        >>> recording.add_to_metadata(key="author", value="me")
+        >>> print(recording.metadata)
+        {'sample_rate': 1000000.0,
+        'center_frequency': 2440000000.0,
+        'author': 'me',
+        'timestamp': 17369...,
+        'rec_id': 'fda0f41...'}
+        """
+        if key in self.metadata:
+            raise ValueError(
+                f"Key {key} already in metadata. Use Recording.update_metadata() to modify existing fields."
+            )
+
+        if not _is_valid_metadata_key(key):
+            raise ValueError(f"Invalid metadata key: {key}.")
+
+        if not _is_jsonable(value):
+            raise ValueError("Value must be JSON serializable.")
+
+        self._metadata[key] = value
+
+    def update_metadata(self, key: str, value: Any) -> None:
+        """Update the value of an existing metadata key,
+        or add the key value pair if it does not already exist.
+
+        :param key: Existing metadata key.
+        :type key: str
+        :param value: New value to enter at key.
+        :type value: any
+
+        :raises ValueError: If value is not JSON serializable
+        :raises ValueError: If key is protected.
+
+        :return: None.
+
+        **Examples:**
+
+        Create a recording and update metadata:
+
+        >>> import numpy
+        >>> from utils.data import Recording
+
+        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+        >>> metadata = {
+        >>>     "sample_rate": 1e6,
+        >>>     "center_frequency": 2.44e9,
+        >>>     "author": "me"
+        >>> }
+
+        >>> recording = Recording(data=samples, metadata=metadata)
+        >>> print(recording.metadata)
+        {'sample_rate': 1000000.0,
+        'center_frequency': 2440000000.0,
+        'author': "me",
+        'timestamp': 17369...
+        'rec_id': 'fda0f41...'}
+
+        >>> recording.update_metadata(key="author", value=you")
+        >>> print(recording.metadata)
+        {'sample_rate': 1000000.0,
+        'center_frequency': 2440000000.0,
+        'author': "you",
+        'timestamp': 17369...
+        'rec_id': 'fda0f41...'}
+        """
+        if key not in self.metadata:
+            self.add_to_metadata(key=key, value=value)
+
+        if not _is_jsonable(value):
+            raise ValueError("Value must be JSON serializable.")
+
+        if key in PROTECTED_KEYS:  # Check protected keys.
+            raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
+
+        else:
+            self._metadata[key] = value
+
+    def remove_from_metadata(self, key: str):
+        """
+        Remove a key from the recording metadata.
+        Does not remove key if it is protected.
+
+        :param key: The key to remove.
+        :type key: str
+
+        :raises ValueError: If key is protected.
+
+        :return: None.
+
+        **Examples:**
+
+        Create a recording and add metadata:
+
+        >>> import numpy
+        >>> from utils.data import Recording
+
+        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+        >>> metadata = {
+        ...     "sample_rate": 1e6,
+        ...     "center_frequency": 2.44e9,
+        ... }
+
+        >>> recording = Recording(data=samples, metadata=metadata)
+        >>> print(recording.metadata)
+        {'sample_rate': 1000000.0,
+        'center_frequency': 2440000000.0,
+        'timestamp': 17369...,  # Example value
+        'rec_id': 'fda0f41...'}  # Example value
+
+        >>> recording.add_to_metadata(key="author", value="me")
+        >>> print(recording.metadata)
+        {'sample_rate': 1000000.0,
+        'center_frequency': 2440000000.0,
+        'author': 'me',
+        'timestamp': 17369...,  # Example value
+        'rec_id': 'fda0f41...'}  # Example value
+        """
+        if key not in PROTECTED_KEYS:
+            self._metadata.pop(key)
+        else:
+            raise ValueError(f"Key {key} is protected and cannot be modified or removed.")
+
+    def view(self, output_path: Optional[str] = "images/signal.png", **kwargs) -> None:
+        """Create a plot of various signal visualizations as a PNG image.
+
+        :param output_path: The output image path. Defaults to "images/signal.png".
+        :type output_path: str, optional
+        :param kwargs: Keyword arguments passed on to utils.view.view_sig.
+        :type: dict of keyword arguments
+
+        **Examples:**
+
+        Create a recording and view it as a plot in a .png image:
+
+        >>> import numpy
+        >>> from utils.data import Recording
+
+        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+        >>> metadata = {
+        >>>     "sample_rate": 1e6,
+        >>>     "center_frequency": 2.44e9,
+        >>> }
+
+        >>> recording = Recording(data=samples, metadata=metadata)
+        >>> recording.view()
+        """
+        from utils.view import view_sig
+
+        view_sig(recording=self, output_path=output_path, **kwargs)
+
+    def to_sigmf(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
+        """Write recording to a set of SigMF files.
+
+        The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
+
+        :param recording: The recording to be written to file.
+        :type recording: utils.data.Recording
+        :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
+        :type filename: os.PathLike or str, optional
+        :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
+        :type path: os.PathLike or str, optional
+
+        :raises IOError: If there is an issue encountered during the file writing process.
+
+        :return: None
+
+        **Examples:**
+
+        Create a recording and view it as a plot in a `.png` image:
+
+        >>> import numpy
+        >>> from utils.data import Recording
+
+        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+        >>> metadata = {
+        ...     "sample_rate": 1e6,
+        ...     "center_frequency": 2.44e9,
+        ... }
+
+        >>> recording = Recording(data=samples, metadata=metadata)
+        >>> recording.view()
+        """
+        from utils.io.recording import to_sigmf
+
+        to_sigmf(filename=filename, path=path, recording=self)
+
+    def to_npy(self, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
+        """Write recording to ``.npy`` binary file.
+
+        :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
+        :type filename: os.PathLike or str, optional
+        :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
+        :type path: os.PathLike or str, optional
+
+        :raises IOError: If there is an issue encountered during the file writing process.
+
+        :return: Path where the file was saved.
+        :rtype: str
+
+        **Examples:**
+
+        Create a recording and save it to a .npy file:
+
+        >>> import numpy
+        >>> from utils.data import Recording
+
+        >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+        >>> metadata = {
+        >>>     "sample_rate": 1e6,
+        >>>     "center_frequency": 2.44e9,
+        >>> }
+
+        >>> recording = Recording(data=samples, metadata=metadata)
+        >>> recording.to_npy()
+        """
+        from utils.io.recording import to_npy
+
+        to_npy(recording=self, filename=filename, path=path)
+
+    def trim(self, num_samples: int, start_sample: Optional[int] = 0) -> Recording:
+        """Trim Recording samples to a desired length, shifting annotations to maintain alignment.
+
+         :param start_sample: The start index of the desired trimmed recording. Defaults to 0.
+         :type start_sample: int, optional
+         :param num_samples: The number of samples that the output trimmed recording will have.
+         :type num_samples: int
+         :raises IndexError: If start_sample + num_samples is greater than the length of the recording.
+         :raises IndexError: If sample_start < 0 or num_samples < 0.
+
+         :return: The trimmed Recording.
+         :rtype: Recording
+
+        **Examples:**
+
+         Create a recording and trim it:
+
+         >>> import numpy
+         >>> from utils.data import Recording
+
+         >>> samples = numpy.ones(10000, dtype=numpy.complex64)
+         >>> metadata = {
+         ...     "sample_rate": 1e6,
+         ...     "center_frequency": 2.44e9,
+         ... }
+
+         >>> recording = Recording(data=samples, metadata=metadata)
+         >>> print(len(recording))
+         10000
+
+         >>> trimmed_recording = recording.trim(start_sample=1000, num_samples=1000)
+         >>> print(len(trimmed_recording))
+         1000
+        """
+
+        if start_sample < 0:
+            raise IndexError("start_sample cannot be < 0.")
+        elif start_sample + num_samples > len(self):
+            raise IndexError(
+                f"start_sample {start_sample} + num_samples {num_samples} > recording length {len(self)}."
+            )
+
+        end_sample = start_sample + num_samples
+
+        data = self.data[:, start_sample:end_sample]
+
+        new_annotations = copy.deepcopy(self.annotations)
+        for annotation in new_annotations:
+            # trim annotation if it goes outside the trim boundaries
+            if annotation.sample_start < start_sample:
+                annotation.sample_count = annotation.sample_count - (start_sample - annotation.sample_start)
+                annotation.sample_start = start_sample
+
+            if annotation.sample_start + annotation.sample_count > end_sample:
+                annotation.sample_count = end_sample - annotation.sample_start
+
+            # shift annotation to align with the new start point
+            annotation.sample_start = annotation.sample_start - start_sample
+
+        return Recording(data=data, metadata=self.metadata, annotations=new_annotations)
+
+    def normalize(self) -> Recording:
+        """Scale the recording data, relative to its maximum value, so that the magnitude of the maximum sample is 1.
+
+        :return: Recording where the maximum sample amplitude is 1.
+        :rtype: Recording
+
+        **Examples:**
+
+        Create a recording with maximum amplitude 0.5 and normalize to a maximum amplitude of 1:
+
+        >>> import numpy
+        >>> from utils.data import Recording
+
+        >>> samples = numpy.ones(10000, dtype=numpy.complex64) * 0.5
+        >>> metadata = {
+        ...     "sample_rate": 1e6,
+        ...     "center_frequency": 2.44e9,
+        ... }
+
+        >>> recording = Recording(data=samples, metadata=metadata)
+        >>> print(numpy.max(numpy.abs(recording.data)))
+        0.5
+
+        >>> normalized_recording = recording.normalize()
+        >>> print(numpy.max(numpy.abs(normalized_recording.data)))
+        1
+        """
+        scaled_data = self.data / np.max(abs(self.data))
+        return Recording(data=scaled_data, metadata=self.metadata, annotations=self.annotations)
+
+    def generate_filename(self, tag: Optional[str] = "rec"):
+        """Generate a filename from metadata.
+
+        :param tag: The string at the beginning of the generated filename. Default is "rec".
+        :type tag: str, optional
+
+        :return: A filename without an extension.
+        :rtype: str
+        """
+        # TODO: This method should be refactored to use the first 7 characters of the 'rec_id' field.
+
+        tag = tag + "_"
+        source = self.metadata.get("source", "")
+        if source != "":
+            source = source + "_"
+
+        # converts 1000 to 1k for example
+        center_frequency = str(Quantity(self.metadata.get("center_frequency", 0)))
+        if center_frequency != "0":
+            num = center_frequency[:-1]
+            suffix = center_frequency[-1]
+            num = int(np.round(float(num)))
+        else:
+            num = 0
+            suffix = ""
+        center_frequency = str(num) + suffix + "Hz_"
+
+        timestamp = int(self.timestamp)
+        timestamp = datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d_%H-%M-%S") + "_"
+
+        # Add first seven characters of rec_id for uniqueness
+        rec_id = self.rec_id[0:7]
+        return tag + source + center_frequency + timestamp + rec_id
+
+    def __len__(self) -> int:
+        """The length of a recording is defined by the number of complex samples in each channel of the recording."""
+        return self.shape[1]
+
+    def __eq__(self, other: Recording) -> bool:
+        """Two Recordings are equal if all data, metadata, and annotations are the same."""
+
+        # counter used to allow for differently ordered annotation lists
+        return (
+            np.array_equal(self.data, other.data)
+            and self.metadata == other.metadata
+            and self.annotations == other.annotations
+        )
+
+    def __ne__(self, other: Recording) -> bool:
+        """Two Recordings are equal if all data, and metadata, and annotations are the same."""
+        return not self.__eq__(other=other)
+
+    def __iter__(self) -> Iterator:
+        self._index = 0
+        return self
+
+    def __next__(self) -> np.ndarray:
+        if self._index < self.n_chan:
+            to_ret = self.data[self._index]
+            self._index += 1
+            return to_ret
+        else:
+            raise StopIteration
+
+    def __getitem__(self, key: int | tuple[int] | slice) -> np.ndarray | np.complexfloating:
+        """If key is an integer, tuple of integers, or a slice, return the corresponding samples.
+
+        For arrays with 1,024 or fewer samples, return a copy of the recording data. For larger arrays, return a
+        read-only view. This prevents mutation at a distance while maintaining performance.
+        """
+        if isinstance(key, (int, tuple, slice)):
+            v = self._data[key]
+            if isinstance(v, np.complexfloating):
+                return v
+            elif v.size > 1024:
+                v.setflags(write=False)  # Make view read-only.
+                return v
+            else:
+                return v.copy()
+
+        else:
+            raise ValueError(f"Key must be an integer, tuple, or slice but was {type(key)}.")
+
+    def __setitem__(self, *args, **kwargs) -> None:
+        """Raise an error if an attempt is made to assign to the recording."""
+        raise ValueError("Assignment to Recording is not allowed.")
+
+
+def generate_recording_id(data: np.ndarray, timestamp: Optional[float | int] = None) -> str:
+    """Generate unique 64-character recording ID. The recording ID is generated by hashing the recording data with
+    the datetime that the recording data was generated. If no datatime is provided, the current datatime is used.
+
+    :param data: Tape of IQ samples, as a NumPy array.
+    :type data: np.ndarray
+    :param timestamp: Unix timestamp in seconds. Defaults to None.
+    :type timestamp: float or int, optional
+
+    :return: 256-character hash, to be used as the recording ID.
+    :rtype: str
+    """
+    if timestamp is None:
+        timestamp = time.time()
+
+    byte_sequence = data.tobytes() + str(timestamp).encode("utf-8")
+    sha256_hash = hashlib.sha256(byte_sequence)
+
+    return sha256_hash.hexdigest()
+
+
+def _is_jsonable(x: Any) -> bool:
+    """
+    :return: True if x is JSON serializable, False otherwise.
+    """
+    try:
+        json.dumps(x)
+        return True
+    except (TypeError, OverflowError):
+        return False
+
+
+def _is_valid_metadata_key(key: Any) -> bool:
+    """
+    :return: True if key is a valid metadata key, False otherwise.
+    """
+    if isinstance(key, str) and key.islower() and re.match(pattern=r"^[a-z_]+$", string=key) is not None:
+        return True
+
+    else:
+        return False
--- a/src/ria_toolkit/io/init.py
+++ b/src/ria_toolkit/io/init.py
@ -0,0 +1,22 @@
+"""
+The IO package contains utilities for input and output operations, such as loading and saving recordings to and from
+file.
+"""
+
+__all__ = [
+    # Common:
+    "exists",
+    "copy",
+    "move",
+    "validate",
+    # Recording:
+    "save_recording",
+    "load_recording",
+    "to_sigmf",
+    "from_sigmf",
+    "to_npy",
+    "from_npy",
+]
+
+from .common import copy, exists, move, validate
+from .recording import from_npy, from_sigmf, load_recording, to_npy, to_sigmf
--- a/src/ria_toolkit/io/recording.py
+++ b/src/ria_toolkit/io/recording.py
@ -0,0 +1,331 @@
+"""
+Utilities for input/output operations on the utils.data.Recording object.
+"""
+
+import datetime as dt
+import os
+from datetime import timezone
+from typing import Optional
+
+import numpy as np
+import sigmf
+from sigmf import SigMFFile, sigmffile
+from sigmf.utils import get_data_type_str
+
+from utils.data import Annotation
+from utils.data.recording import Recording
+
+
+def load_rec(file: os.PathLike) -> Recording:
+    """Load a recording from file.
+
+    :param file: The directory path to the file(s) to load, **with** the file extension.
+        To loading from SigMF, the file extension must be one of *sigmf*, *sigmf-data*, or *sigmf-meta*,
+        either way both the SigMF data and meta files must be present for a successful read.
+    :type file: os.PathLike
+
+    :raises IOError: If there is an issue encountered during the file reading process.
+
+    :raises ValueError: If the inferred file extension is not supported.
+
+    :return: The recording, as initialized from file(s).
+    :rtype: utils.data.Recording
+    """
+    _, extension = os.path.splitext(file)
+    extension = extension.lstrip(".")
+
+    if extension.lower() in ["sigmf", "sigmf-data", "sigmf-meta"]:
+        return from_sigmf(file=file)
+
+    elif extension.lower() == "npy":
+        return from_npy(file=file)
+
+    else:
+        raise ValueError(f"File extension {extension} not supported.")
+
+
+SIGMF_KEY_CONVERSION = {
+    SigMFFile.AUTHOR_KEY: "author",
+    SigMFFile.COLLECTION_KEY: "sigmf:collection",
+    SigMFFile.DATASET_KEY: "sigmf:dataset",
+    SigMFFile.DATATYPE_KEY: "datatype",
+    SigMFFile.DATA_DOI_KEY: "data_doi",
+    SigMFFile.DESCRIPTION_KEY: "description",
+    SigMFFile.EXTENSIONS_KEY: "sigmf:extensions",
+    SigMFFile.GEOLOCATION_KEY: "geolocation",
+    SigMFFile.HASH_KEY: "sigmf:hash",
+    SigMFFile.HW_KEY: "sdr",
+    SigMFFile.LICENSE_KEY: "license",
+    SigMFFile.META_DOI_KEY: "metadata",
+    SigMFFile.METADATA_ONLY_KEY: "sigmf:metadata_only",
+    SigMFFile.NUM_CHANNELS_KEY: "sigmf:num_channels",
+    SigMFFile.RECORDER_KEY: "source_software",
+    SigMFFile.SAMPLE_RATE_KEY: "sample_rate",
+    SigMFFile.START_OFFSET_KEY: "sigmf:start_offset",
+    SigMFFile.TRAILING_BYTES_KEY: "sigmf:trailing_bytes",
+    SigMFFile.VERSION_KEY: "sigmf:version",
+}
+
+
+def convert_to_serializable(obj):
+    """
+    Recursively convert a JSON-compatible structure into a fully JSON-serializable one.
+    Handles cases like NumPy data types, nested dicts, lists, and sets.
+    """
+    if isinstance(obj, np.integer):
+        return int(obj)  # Convert NumPy int to Python int
+    elif isinstance(obj, np.floating):
+        return float(obj)  # Convert NumPy float to Python float
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()  # Convert NumPy array to list
+    elif isinstance(obj, (list, tuple)):
+        return [convert_to_serializable(item) for item in obj]  # Process list or tuple
+    elif isinstance(obj, dict):
+        return {key: convert_to_serializable(value) for key, value in obj.items()}  # Process dict
+    elif isinstance(obj, set):
+        return list(obj)  # Convert set to list
+    elif obj in [float("inf"), float("-inf"), None]:  # Handle infinity or None
+        return None
+    elif isinstance(obj, (str, int, float, bool)) or obj is None:
+        return obj  # Base case: already serializable
+    else:
+        raise TypeError(f"Value of type {type(obj)} is not JSON serializable: {obj}")
+
+
+def to_sigmf(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> None:
+    """Write recording to a set of SigMF files.
+
+    The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
+
+    :param recording: The recording to be written to file.
+    :type recording: utils.data.Recording
+    :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
+    :type filename: os.PathLike or str, optional
+    :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
+    :type path: os.PathLike or str, optional
+
+    :raises IOError: If there is an issue encountered during the file writing process.
+
+    :return: None
+
+    **Examples:**
+
+    >>> from utils.sdr import Synth
+    >>> from utils.data import Recording
+    >>> from utils.io import to_sigmf
+    >>> sdr = Synth()
+    >>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
+    >>> to_sigmf(recording=rec, file="sample_recording")
+    """
+
+    if filename is not None:
+        filename, _ = os.path.splitext(filename)
+    else:
+        filename = recording.generate_filename()
+
+    if path is None:
+        path = "recordings"
+
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+    multichannel_samples = recording.data
+    metadata = recording.metadata
+    annotations = recording.annotations
+
+    if multichannel_samples.shape[0] > 1:
+        raise NotImplementedError("SigMF File Saving Not Implemented for Multichannel Recordings")
+    else:
+        # extract single channel
+        samples = multichannel_samples[0]
+
+    data_file_path = os.path.join(path, f"{filename}.sigmf-data")
+
+    samples.tofile(data_file_path)
+    global_info = {
+        SigMFFile.DATATYPE_KEY: get_data_type_str(samples),
+        SigMFFile.VERSION_KEY: sigmf.__version__,
+        SigMFFile.RECORDER_KEY: "RIA",
+    }
+
+    converted_metadata = {
+        sigmf_key: metadata[metadata_key]
+        for sigmf_key, metadata_key in SIGMF_KEY_CONVERSION.items()
+        if metadata_key in metadata
+    }
+
+    # Merge dictionaries, giving priority to sigmf_meta
+    global_info = {**converted_metadata, **global_info}
+
+    ria_metadata = {f"ria:{key}": value for key, value in metadata.items()}
+    ria_metadata = convert_to_serializable(ria_metadata)
+    global_info.update(ria_metadata)
+
+    sigMF_metafile = SigMFFile(
+        data_file=data_file_path,
+        global_info=global_info,
+    )
+
+    for annotation_object in annotations:
+        annotation_dict = annotation_object.to_sigmf_format()
+        annotation_dict = convert_to_serializable(annotation_dict)
+        sigMF_metafile.add_annotation(
+            start_index=annotation_dict[SigMFFile.START_INDEX_KEY],
+            length=annotation_dict[SigMFFile.LENGTH_INDEX_KEY],
+            metadata=annotation_dict["metadata"],
+        )
+
+    sigMF_metafile.add_capture(
+        0,
+        metadata={
+            SigMFFile.FREQUENCY_KEY: metadata.get("center_frequency", 0),
+            SigMFFile.DATETIME_KEY: dt.datetime.fromtimestamp(float(metadata.get("timestamp", 0)), tz=timezone.utc)
+            .isoformat()
+            .replace("+00:00", "Z"),
+        },
+    )
+
+    meta_dict = sigMF_metafile.ordered_metadata()
+    meta_dict["ria"] = metadata
+
+    sigMF_metafile.tofile(f"{os.path.join(path,filename)}.sigmf-meta")
+
+
+def from_sigmf(file: os.PathLike | str) -> Recording:
+    """Load a recording from a set of SigMF files.
+
+    :param file: The directory path to the SigMF recording files, without any file extension.
+        The recording will be initialized from ``file_name.sigmf-data`` and ``file_name.sigmf-meta``.
+        Both the data and meta files must be present for a successful read.
+    :type file: str or os.PathLike
+
+    :raises IOError: If there is an issue encountered during the file reading process.
+
+    :return: The recording, as initialized from the SigMF files.
+    :rtype: utils.data.Recording
+    """
+
+    if len(file) > 11:
+        if file[-11:-5] != ".sigmf":
+            file = file + ".sigmf-data"
+
+    sigmf_file = sigmffile.fromfile(file)
+
+    data = sigmf_file.read_samples()
+    global_metadata = sigmf_file.get_global_info()
+    dict_annotations = sigmf_file.get_annotations()
+
+    processed_metadata = {}
+    for key, value in global_metadata.items():
+        # Process core keys
+        if key.startswith("core:"):
+            base_key = key[5:]  # Remove 'core:' prefix
+            converted_key = SIGMF_KEY_CONVERSION.get(base_key, base_key)
+        # Process ria keys
+        elif key.startswith("ria:"):
+            converted_key = key[4:]  # Remove 'ria:' prefix
+        else:
+            # Load non-core/ria keys as is
+            converted_key = key
+
+        processed_metadata[converted_key] = value
+
+    annotations = []
+
+    for dict in dict_annotations:
+        annotations.append(
+            Annotation(
+                sample_start=dict[SigMFFile.START_INDEX_KEY],
+                sample_count=dict[SigMFFile.LENGTH_INDEX_KEY],
+                freq_lower_edge=dict.get(SigMFFile.FLO_KEY, None),
+                freq_upper_edge=dict.get(SigMFFile.FHI_KEY, None),
+                label=dict.get(SigMFFile.LABEL_KEY, None),
+                comment=dict.get(SigMFFile.COMMENT_KEY, None),
+                detail=dict.get("ria:detail", None),
+            )
+        )
+
+    output_recording = Recording(data=data, metadata=processed_metadata, annotations=annotations)
+    return output_recording
+
+
+def to_npy(recording: Recording, filename: Optional[str] = None, path: Optional[os.PathLike | str] = None) -> str:
+    """Write recording to ``.npy`` binary file.
+
+    :param recording: The recording to be written to file.
+    :type recording: utils.data.Recording
+    :param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
+    :type filename: os.PathLike or str, optional
+    :param path: The directory path to where the recording is to be saved. Defaults to recordings/.
+    :type path: os.PathLike or str, optional
+
+    :raises IOError: If there is an issue encountered during the file writing process.
+
+    :return: Path where the file was saved.
+    :rtype: str
+
+    **Examples:**
+
+    >>> from utils.sdr import Synth
+    >>> from utils.data import Recording
+    >>> from utils.io import to_npy
+    >>> sdr = Synth()
+    >>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
+    >>> to_npy(recording=rec, file="sample_recording.npy")
+    """
+    if filename is not None:
+        filename, _ = os.path.splitext(filename)
+    else:
+        filename = recording.generate_filename()
+    filename = filename + ".npy"
+
+    if path is None:
+        path = "recordings"
+
+    if not os.path.exists(path):
+        os.makedirs(path)
+    fullpath = os.path.join(path, filename)
+
+    data = np.array(recording.data)
+    metadata = recording.metadata
+    annotations = recording.annotations
+
+    with open(file=fullpath, mode="wb") as f:
+        np.save(f, data)
+        np.save(f, metadata)
+        np.save(f, annotations)
+
+    # print(f"Saved recording to {os.getcwd()}/{fullpath}")
+    return str(fullpath)
+
+
+def from_npy(file: os.PathLike | str) -> Recording:
+    """Load a recording from a ``.npy`` binary file.
+
+    :param file: The directory path to the recording file, with or without the ``.npy`` file extension.
+    :type file: str or os.PathLike
+
+    :raises IOError: If there is an issue encountered during the file reading process.
+
+    :return: The recording, as initialized from the ``.npy`` file.
+    :rtype: utils.data.Recording
+    """
+
+    filename, extension = os.path.splitext(file)
+    if extension != ".npy" and extension != "":
+        raise ValueError("Cannot use from_npy if file extension is not .npy")
+
+    # Rebuild with .npy extension.
+    filename = str(filename) + ".npy"
+
+    with open(file=filename, mode="rb") as f:
+        data = np.load(f, allow_pickle=True)
+        metadata = np.load(f, allow_pickle=True)
+        metadata = metadata.tolist()
+        try:
+            annotations = list(np.load(f, allow_pickle=True))
+        except EOFError:
+            annotations = []
+
+    recording = Recording(data=data, metadata=metadata, annotations=annotations)
+    return recording
--- a/src/ria_toolkit/transforms/init.py
+++ b/src/ria_toolkit/transforms/init.py
@ -0,0 +1,8 @@
+"""
+The transforms package houses a collection of functions to manipulate and transform radio data.
+
+This package contains various functions that operate on NumPy arrays. These functions are utilized within the machine
+learning backends to build transforms and functions that seamlessly integrate with those from the respective backend.
+
+All the transforms in this package expect data in the complex 1xN format.
+"""
--- a/src/ria_toolkit/transforms/iq_augmentations.py
+++ b/src/ria_toolkit/transforms/iq_augmentations.py
@ -0,0 +1,717 @@
+"""
+This module comprises the functionals of various transforms designed to create new training examples by augmenting
+existing examples or recordings using a variety of techniques These transforms take an ArrayLike object as input
+and return a corresponding numpy.ndarray with the impairment model applied;
+we call the latter the impaired data.
+"""
+
+from typing import Optional
+
+import numpy as np
+from numpy.typing import ArrayLike
+
+from utils.data.recording import Recording
+from utils.helpers.array_conversion import convert_to_2xn
+
+# TODO: For round 2 of index generation, should j be at min 2 spots away from where it was to prevent adjacent patches.
+
+# TODO: All the transforms with some randomness need to be refactored to use a random generator.
+
+
+def generate_awgn(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
+    """Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
+    provided `signal` array or `Recording`.
+
+    This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of
+    the noise which matches the specified SNR. Then, the AWGN is generated after calculating the variance and
+    randomly calculating the amplitude and phase of the noise.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param snr: The signal-to-noise ratio in dB. Default is 1.
+    :type snr: float, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array representing the generated noise which matches the SNR of `signal`. If `signal` is a
+        Recording, returns a Recording object with its `data` attribute containing the generated noise array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[2 + 5j, 1 + 8j]])
+    >>> new_rec = generate_awgn(rec)
+    >>> new_rec.data
+    array([[2.15991777 + 0.69673915j, 0.2814541 - 0.12111976j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    snr_linear = 10 ** (snr / 10)
+
+    # Calculate the RMS power of the signal to solve for the RMS power of the noise
+    signal_rms_power = np.sqrt(np.mean(np.abs(data) ** 2))
+    noise_rms_power = signal_rms_power / snr_linear
+
+    # Generate the AWGN noise which has the same shape as data
+    variance = noise_rms_power**2
+    magnitude = np.random.normal(loc=0, scale=np.sqrt(variance), size=(c, n))
+    phase = np.random.uniform(low=0, high=2 * np.pi, size=(c, n))
+    complex_awgn = magnitude * np.exp(1j * phase)
+
+    if isinstance(signal, Recording):
+        return Recording(data=complex_awgn, metadata=signal.metadata)
+    else:
+        return complex_awgn
+
+
+def time_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
+    """Reverses the order of the I (In-phase) and Q (Quadrature) data samples along the time axis of the provided
+    `signal` array or `Recording`.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array containing the reversed I and Q data samples if `signal` is an array.
+        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
+        reversed array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[1+2j, 3+4j, 5+6j]])
+    >>> new_rec = time_reversal(rec)
+    >>> new_rec.data
+    array([[5+6j, 3+4j, 1+2j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if c == 1:
+        # If 1xN complex
+        reversed_data = np.squeeze(data)[::-1]
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=reversed_data, metadata=signal.metadata)
+    else:
+        return reversed_data.reshape(c, n)
+
+
+def spectral_inversion(signal: ArrayLike | Recording) -> np.ndarray | Recording:
+    """Negates the imaginary components (Q, Quadrature) of the data samples contained within the
+    provided `signal` array or `Recording`.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array containing the original I and negated Q data samples if `signal` is an array.
+        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
+        inverted array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[0+45j, 2-10j]])
+    >>> new_rec = spectral_inversion(rec)
+    >>> new_rec.data
+    array([[0-45j, 2+10j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if c == 1:
+        new_data = np.squeeze(data).real - 1j * np.squeeze(data).imag
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=new_data, metadata=signal.metadata)
+    else:
+        return new_data.reshape(c, n)
+
+
+def channel_swap(signal: ArrayLike | Recording) -> np.ndarray | Recording:
+    """Switches the I (In-phase) with the and Q (Quadrature) data samples for each sample within the
+    provided `signal` array or `Recording`.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array containing the swapped I and Q data samples if `signal` is an array.
+        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
+        swapped array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[10+20j, 7+35j]])
+    >>> new_rec = channel_swap(rec)
+    >>> new_rec.data
+    array([[20+10j, 35+7j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if c == 1:
+        swapped_data = np.squeeze(data).imag + 1j * np.squeeze(data).real
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=swapped_data, metadata=signal.metadata)
+    else:
+        return swapped_data.reshape(c, n)
+
+
+def amplitude_reversal(signal: ArrayLike | Recording) -> np.ndarray | Recording:
+    """Negates the amplitudes of both the I (In-phase) and Q (Quadrature) data samples contained within the
+    provided `signal` array or `Recording`.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array containing the negated I and Q data samples if `signal` is an array.
+        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing the
+        negated array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[4-3j, -5-2j, -9+1j]])
+    >>> new_rec = amplitude_reversal(rec)
+    >>> new_rec.data
+    array([[-4+3j, 5+2j, 9-1j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if c == 1:
+        reversed_data = -1 * np.squeeze(data).real - 1j * np.squeeze(data).imag
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=reversed_data, metadata=signal.metadata)
+    else:
+        return reversed_data.reshape(c, n)
+
+
+def drop_samples(  # noqa: C901  # TODO: Simplify function
+    signal: ArrayLike | Recording, max_section_size: Optional[int] = 2, fill_type: Optional[str] = "zeros"
+) -> np.ndarray | Recording:
+    """Randomly drops IQ data samples contained within the provided `signal` array or `Recording`.
+
+    This function randomly selects sections of the signal and replaces the current data samples in the specified
+    section with another value dependent on the fill type.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param max_section_size: Maximum allowable size of the section to be dropped and replaced. Default is 2.
+    :type max_section_size: int, optional
+    :param fill_type: Fill option used to replace dropped section of data (back-fill, front-fill, mean, zeros).
+        Default is "zeros".
+
+
+        "back-fill": replace dropped section with the data sample occuring before the section.
+
+        "front-fill": replace dropped section with the data sample occuring after the section.
+
+        "mean": replace dropped section with mean of the entire signal.
+
+        "zeros": replace dropped section with constant value of 0+0j.
+    :type fill_type: str, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+    :raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
+
+    :return: A numpy array containing the I and Q data samples with replaced subsections if
+        `signal` is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data`
+        attribute containing the array with dropped samples.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
+    >>> new_rec = drop_samples(rec)
+    >>> new_rec.data
+    array([[2+5j, 0, 0, 0, 4+9j]])
+    """
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if max_section_size < 1 or max_section_size >= n:
+        raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
+
+    if c == 1:
+        data = np.squeeze(data)
+
+        if fill_type == "mean":
+            mean = np.mean(data)
+
+        i = -1
+        j = -1
+
+        # Pointers i and j point to exact positions
+        while i < n:
+            # Generate valid starting point so that at least 1 drop occurs
+            i = np.random.randint(j + 1, j + n - max_section_size + 2)
+            j = np.random.randint(i, i + max_section_size)
+
+            if j > n - 1:  # Check that the full drop is within the dataset
+                break
+
+            # Generate fill based on fill_type
+            if fill_type == "back-fill":
+                fill = data[i - 1] if i > 0 else data[i]
+            elif fill_type == "front-fill":
+                fill = data[j + 1] if j < n - 1 else data[j]
+            elif fill_type == "mean":
+                fill = mean
+            elif fill_type == "zeros":
+                fill = 0 + 0j
+            else:
+                raise ValueError(f"fill_type {fill_type} not recognized.")
+
+            # Replaces dropped samples with fill values
+            data[i : j + 1] = fill
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=data, metadata=signal.metadata)
+    else:
+        return data.reshape(c, n)
+
+
+def quantize_tape(
+    signal: ArrayLike | Recording, bin_number: Optional[int] = 4, rounding_type: Optional[str] = "floor"
+) -> np.ndarray | Recording:
+    """Quantizes the IQ data of the provided `signal` array or `Recording` by a few bits.
+
+    This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
+    The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param bin_number: The number of bins the signal should be divided into. Default is 4.
+    :type bin_number: int, optional
+    :param rounding_type: The type of rounding applied during processing. Default is "floor".
+
+        "floor": rounds down to the lower bound of the bin.
+
+        "ceiling": rounds up to the upper bound of the bin.
+    :type rounding_type: str, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+    :raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
+
+    :return: A numpy array containing the quantized I and Q data samples if `signal` is an array.
+        If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
+        the quantized array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[1+1j, 4+4j, 1+2j, 1+4j]])
+    >>> new_rec = quantize_tape(rec)
+    >>> new_rec.data
+    array([[4+4j, 3+3j, 4+1j, 4+3j]])
+    """
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if rounding_type not in {"ceiling", "floor"}:
+        raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
+
+    if c == 1:
+        iq_data = convert_to_2xn(data)
+        maximum, minimum = iq_data.max(), iq_data.min()
+        bin_edges = np.linspace(minimum, maximum, bin_number + 1)
+        indices = np.digitize(iq_data, bin_edges, right=True)
+
+        # If data falls outside the first bin, map it back into the first bin, data will not fall outside of last bin
+        indices[indices == 0] = 1
+
+        # Map the data points to the correct bins
+        if rounding_type == "ceiling":
+            modified_iq_data = bin_edges[indices]
+        else:
+            modified_iq_data = bin_edges[indices - 1]
+
+        new_data = modified_iq_data[0] + 1j * modified_iq_data[1]
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=new_data, metadata=signal.metadata)
+    else:
+        return new_data.reshape(c, n)
+
+
+def quantize_parts(
+    signal: ArrayLike | Recording,
+    max_section_size: Optional[int] = 2,
+    bin_number: Optional[int] = 4,
+    rounding_type: Optional[str] = "floor",
+) -> np.ndarray | Recording:
+    """Quantizes random parts of the IQ data within the provided `signal` array or `Recording` by a few bits.
+
+    This function emulates an analog-to-digital converter (ADC) which is commonly seen in digital RF systems.
+    The relationship between the number of bins and number of bits is: log(# of bins) / log(2) = # of bits.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param max_section_size: Maximum allowable size of the section to be quantized. Default is 2.
+    :type max_section_size: int, optional
+    :param bin_number: The number of bins the signal should be divided into. Default is 4.
+    :type bin_number: int, optional
+    :param rounding_type: Type of rounding applied during processing. Default is "floor".
+
+        "floor": rounds down to the lower bound of the bin.
+
+        "ceiling": rounds up to the upper bound of the bin.
+    :type rounding_type: str, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+    :raises UserWarning: If `rounding_type` is not "floor" or "ceiling", "floor" is selected by default.
+
+    :return: A numpy array containing the I and Q data samples with quantized subsections if `signal`
+        is an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute
+        containing the partially quantized array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
+    >>> new_rec = quantize_parts(rec)
+    >>> new_rec.data
+    array([[2+5j, 1+8j, 3.66666667+3.66666667j, 3+7j, 4+9j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if rounding_type not in {"ceiling", "floor"}:
+        raise UserWarning('rounding_type must be either "floor" or "ceiling", floor has been selected by default')
+
+    if c == 1:
+        iq_data = convert_to_2xn(data)
+        i_data, q_data = iq_data
+        maximum, minimum = iq_data.max(), iq_data.min()
+        bin_edges = np.linspace(minimum, maximum, bin_number + 1)
+        indices = np.digitize(iq_data, bin_edges, right=True)
+
+        # Map everything from bin 0 to bin 1
+        indices[indices == 0] = 1
+
+        i = -1
+        j = -1
+
+        # Pointers i and j point to exact positions
+        while i < n:
+            # Generate valid starting point so that at least 1 drop occurs
+            i = np.random.randint(j + 1, j + n - max_section_size + 2)
+            j = np.random.randint(i, i + max_section_size)
+
+            if j > n - 1:  # Check that the full drop is within the dataset
+                break
+
+            if rounding_type == "ceiling":
+                i_data[i : j + 1] = bin_edges[indices[0][i : j + 1]]
+                q_data[i : j + 1] = bin_edges[indices[1][i : j + 1]]
+            else:
+                i_data[i : j + 1] = bin_edges[indices[0][i : j + 1] - 1]
+                q_data[i : j + 1] = bin_edges[indices[1][i : j + 1] - 1]
+
+        quantized_data = i_data + 1j * q_data
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=quantized_data, metadata=signal.metadata)
+    else:
+        return quantized_data.reshape(c, n)
+
+
+def magnitude_rescale(
+    signal: ArrayLike | Recording,
+    starting_bounds: Optional[tuple] = None,
+    max_magnitude: Optional[int] = 1,
+) -> np.ndarray | Recording:
+    """Selects a random starting point from within the specified starting bounds and multiplies IQ data of the
+    provided `signal` array or `Recording` by a random constant.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param starting_bounds: The bounds (inclusive) as indices in which the starting position of the rescaling occurs.
+        Default is None, but if user does not assign any bounds, the bounds become (random index, N-1).
+    :type starting_bounds: tuple, optional
+    :param max_magnitude: The maximum value of the constant that is used to rescale the data. Default is 1.
+    :type max_magnitude: int, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array containing the I and Q data samples with the rescaled magnitude after the random
+        starting point if `signal` is an array. If `signal` is a `Recording`, returns a `Recording`
+        object with its `data` attribute containing the rescaled array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
+    >>> new_rec = magniute_rescale(rec)
+    >>> new_rec.data
+    array([[2+5j, 1+8j, 6+4j, 3+7j, 3.03181761+6.82158963j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if starting_bounds is None:
+        starting_bounds = (np.random.randint(0, n), n - 1)
+
+    if starting_bounds[0] < 0 or starting_bounds[1] > n - 1:
+        raise ValueError("starting_bounds must be valid indices for the dataset.")
+
+    if c == 1:
+        data = np.squeeze(data)
+        starting_point = np.random.randint(starting_bounds[0], starting_bounds[1] + 1)
+        magnitude = np.random.rand() * max_magnitude
+
+        rescaled_section = data[starting_point:] * magnitude
+        rescaled_data = np.concatenate((data[:starting_point], rescaled_section))
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=rescaled_data, metadata=signal.metadata)
+    else:
+        return rescaled_data.reshape(c, n)
+
+
+def cut_out(  # noqa: C901  # TODO: Simplify function
+    signal: ArrayLike | Recording, max_section_size: Optional[int] = 3, fill_type: Optional[str] = "ones"
+) -> np.ndarray | Recording:
+    """Cuts out random sections of IQ data and replaces them with either 0s, 1s, or low, average, or high
+    sound-to-noise ratio (SNR) additive white gausssian noise (AWGN) within the provided `signal` array or
+    `Recording`.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param max_section_size: Maximum allowable size of the section to be quantized. Default is 3.
+    :type max_section_size: int, optional
+    :param fill_type: Fill option used to replace cutout section of data (zeros, ones, low-snr, avg-snr-1, avg-snr-2).
+        Default is "ones".
+
+        "zeros": replace cutout section with 0s.
+
+        "ones": replace cutout section with 1s.
+
+        "low-snr": replace cutout section with AWGN with an SNR of 0.5.
+
+        "avg-snr": replace cutout section with AWGN with an SNR of 1.
+
+        "high-snr": replace cutout section with AWGN with an SNR of 2.
+    :type fill_type: str, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+    :raises UserWarning: If fill_type is not "zeros", "ones", "low-snr", "avg-snr", or "high-snr", "ones" is selected
+        by default.
+    :raises ValueError: If `max_section_size` is less than 1 or greater than or equal to length of `signal`.
+
+    :return: A numpy array containing the I and Q data samples with random sections cut out and replaced according to
+        `fill_type` if `signal` is an array. If `signal` is a `Recording`, returns a `Recording` object
+        with its `data` attribute containing the cut out and replaced array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
+    >>> new_rec = cut_out(rec)
+    >>> new_rec.data
+    array([[2+5j, 1+8j, 1+1j, 1+1j, 1+1j]])
+    """
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if fill_type not in {"zeros", "ones", "low-snr", "avg-snr", "high-snr"}:
+        raise UserWarning(
+            """fill_type must be "zeros", "ones", "low-snr", "avg-snr", or "high-snr",
+            "ones" has been selected by default"""
+        )
+
+    if max_section_size < 1 or max_section_size >= n:
+        raise ValueError("max_section_size must be at least 1 and must be less than the length of signal.")
+
+    if c == 1:
+        data = np.squeeze(data)
+
+        i = -1
+        j = -1
+
+        # Pointers i and j point to exact positions
+        while i < n:
+            # Generate valid starting point so that at least 1 drop occurs
+            i = np.random.randint(j + 1, j + n - max_section_size + 2)
+            j = np.random.randint(i, i + max_section_size)
+
+            if j > n - 1:  # Check that the full drop is within the dataset
+                break
+
+            # TODO: Check if we can collapse last three options which depends on what snr value the user enters
+            if fill_type == "zeros":
+                fill = 0 + 0j
+            elif fill_type == "ones":
+                fill = 1 + 1j
+            elif fill_type == "low-snr":
+                fill = generate_awgn([data[i : j + 1]], 0.5)
+            elif fill_type == "avg-snr":
+                fill = generate_awgn([data[i : j + 1]], 1)
+            else:
+                fill = generate_awgn([data[i : j + 1]], 2)
+
+            data[i : j + 1] = fill
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=data, metadata=signal.metadata)
+    else:
+        return data.reshape(c, n)
+
+
+def patch_shuffle(signal: ArrayLike | Recording, max_patch_size: Optional[int] = 3) -> np.ndarray | Recording:
+    """Selects random patches of the IQ data and randomly shuffles the data samples within the specified patch of
+    the provided `signal` array or `Recording`.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param max_patch_size: Maximum allowable patch size of the data that can be shuffled. Default is 3.
+    :type max_patch_size: int, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+    :raises ValueError: If `max_patch_size` is less than or equal to 1 or greater than length of `signal`.
+
+    :return: A numpy array containing the I and Q data samples with randomly shuffled regions if `signal` is
+        an array. If `signal` is a `Recording`, returns a `Recording` object with its `data` attribute containing
+        the shuffled array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[2+5j, 1+8j, 6+4j, 3+7j, 4+9j]])
+    >>> new_rec = patch_shuffle(rec)
+    >>> new_rec.data
+    array([[2+5j, 1+8j, 3+4j, 6+9j, 4+7j]])
+    """
+    if isinstance(signal, Recording):
+        data = signal.data.copy()  # Cannot shuffle read-only array.
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if max_patch_size > n or max_patch_size <= 1:
+        raise ValueError("max_patch_size must be less than or equal to the length of signal and greater than 1.")
+
+    if c == 1:
+        data = np.squeeze(data)
+
+        i = -1
+        j = -1
+
+        # Pointers i and j point to exact positions
+        while i < n:
+            # Generate valid starting point so that at least 1 drop occurs
+            i = np.random.randint(j + 1, j + n - max_patch_size + 2)
+            j = np.random.randint(i, i + max_patch_size)
+
+            if j > n - 1:  # Check that the full drop is within the dataset
+                break
+
+            np.random.shuffle(data.real[i : j + 1])
+            np.random.shuffle(data.imag[i : j + 1])
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=data, metadata=signal.metadata)
+    else:
+        return data.reshape(c, n)
--- a/src/ria_toolkit/transforms/iq_impairments.py
+++ b/src/ria_toolkit/transforms/iq_impairments.py
@ -0,0 +1,365 @@
+"""
+This module comprises various transforms designed to represent signal impairments.
+These transforms take a recording as input and return a corresponding recording with
+the impairment model applied; we call the latter an impaired recording.
+
+Signals travel through transmission media, which are not perfect. The imperfection
+causes signal impairment, meaning that the signal at the beginning of the medium is
+not the same as the signal at the end of the medium. What is sent is not what is received.
+Three causes of impairment are attenuation, distortion, and noise.
+"""
+
+from typing import Optional
+
+import numpy as np
+from numpy.typing import ArrayLike
+from scipy.signal import resample_poly
+
+from utils.data import Recording
+from utils.transforms import iq_augmentations
+
+
+def add_awgn_to_signal(signal: ArrayLike | Recording, snr: Optional[float] = 1) -> np.ndarray | Recording:
+    """Generates additive white gaussian noise (AWGN) relative to the signal-to-noise ratio (SNR) of the
+    provided `signal` array or `Recording`.
+
+    This function calculates the root mean squared (RMS) power of `signal` and then finds the RMS power of the noise
+    which matches the specified SNR. Then, the AWGN is generated after calculating the variance and randomly
+    calculating the amplitude and phase of the noise. Then, this generated AWGN is added to the original signal and
+    returned.
+
+    :param signal: Input IQ data as a complex ``C x N`` array or `Recording`, where ``C`` is the number of channels
+        and ``N`` is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param snr: The signal-to-noise ratio in dB. Default is 1.
+    :type snr: float, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array which is the sum of the noise (which matches the SNR) and the original signal. If `signal`
+        is a `Recording`, returns a `Recording object` with its `data` attribute containing the noisy signal array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[1+1j, 2+2j]])
+    >>> new_rec = add_awgn_to_signal(rec)
+    >>> new_rec.data
+    array([[0.83141973+0.32529242j, -1.00909846+2.39282713j]])
+    """
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim != 2 or not np.iscomplexobj(data):
+        raise ValueError("signal must be CxN complex.")
+
+    noise = iq_augmentations.generate_awgn(signal=data, snr=snr)
+    print(f"noise is {noise}")
+
+    noisy_signal = data + noise
+
+    if isinstance(signal, Recording):
+        return Recording(data=noisy_signal, metadata=signal.metadata)
+    else:
+        return noisy_signal
+
+
+def time_shift(signal: ArrayLike | Recording, shift: Optional[int] = 1) -> np.ndarray | Recording:
+    """Apply a time shift to a signal.
+
+    After the time shift is applied, we fill any empty regions with zeros.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param shift: The number of indices to shift by. Default is 1.
+    :type shift: int, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+    :raises UserWarning: If `shift` is greater than length of `signal`.
+
+    :return: A numpy array which represents the time-shifted signal. If `signal` is a `Recording`,
+        returns a `Recording object` with its `data` attribute containing the time-shifted array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j, 5+5j]])
+    >>> new_rec = time_shift(rec, -2)
+    >>> new_rec.data
+    array([[3+3j, 4+4j, 5+5j, 0+0j, 0+0j]])
+    """
+    # TODO: Additional info needs to be added to docstring description
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if shift > n:
+        raise UserWarning("shift is greater than signal length")
+
+    shifted_data = np.zeros_like(data)
+
+    if c == 1:
+        # New iq array shifted left or right depending on sign of shift
+        # This should work even if shift > iqdata.shape[1]
+        if shift >= 0:
+            # Shift to right
+            shifted_data[:, shift:] = data[:, :-shift]
+
+        else:
+            # Shift to the left
+            shifted_data[:, :shift] = data[:, -shift:]
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=shifted_data, metadata=signal.metadata)
+    else:
+        return shifted_data
+
+
+def frequency_shift(signal: ArrayLike | Recording, shift: Optional[float] = 0.5) -> np.ndarray | Recording:
+    """Apply a frequency shift to a signal.
+
+    .. note::
+
+        The frequency shift is applied relative to the sample rate.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param shift: The frequency shift relative to the sample rate. Must be in the range ``[-0.5, 0.5]``.
+        Default is 0.5.
+    :type shift: float, optional
+
+    :raises ValueError: If the provided frequency shift is not in the range ``[-0.5, 0.5]``.
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array which represents the frequency-shifted signal. If `signal` is a `Recording`,
+        returns a `Recording object` with its `data` attribute containing the frequency-shifted array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
+    >>> new_rec = frequency_shift(rec, -0.4)
+    >>> new_rec.data
+    array([[1+1j, -0.44246348-2.79360449j, -1.92611857+3.78022053j, 5.04029404-2.56815809j]])
+    """
+    # TODO: Additional info needs to be added to docstring description
+
+    if shift > 0.5 or shift < -0.5:
+        raise ValueError("Frequency shift must be in the range [-0.5, 0.5]")
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    shifted_data = np.zeros_like(data)
+
+    if c == 1:
+        # Calculate the phase shift for the frequency shift
+        phase_shift_ = 2.0 * np.pi * shift * np.arange(n)
+
+        # Use trigonometric identities to apply the frequency shift
+        shifted_data.real = data.real * np.cos(phase_shift_) - data.imag * np.sin(phase_shift_)
+        shifted_data.imag = data.real * np.sin(phase_shift_) + data.imag * np.cos(phase_shift_)
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=shifted_data, metadata=signal.metadata)
+    else:
+        return shifted_data
+
+
+def phase_shift(signal: ArrayLike | Recording, phase: Optional[float] = np.pi) -> np.ndarray | Recording:
+    """Apply a phase shift to a signal.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param phase: The phase angle by which to rotate the IQ samples, in radians. Must be in the range ``[-π, π]``.
+        Default is π.
+    :type phase: float, optional
+
+    :raises ValueError: If the provided phase rotation is not in the range ``[-π, π]``.
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array which represents the phase-shifted signal. If `signal` is a `Recording`,
+        returns a `Recording object` with its `data` attribute containing the phase-shifted array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[1+1j, 2+2j, 3+3j, 4+4j]])
+    >>> new_rec = phase_shift(rec, np.pi/2)
+    >>> new_rec.data
+    array([[-1+1j, -2+2j -3+3j -4+4j]])
+    """
+    # TODO: Additional info needs to be added to docstring description
+
+    if phase > np.pi or phase < -np.pi:
+        raise ValueError("Phase rotation must be in the range [-π, π]")
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if c == 1:
+        shifted_data = data * np.exp(1j * phase)
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=shifted_data, metadata=signal.metadata)
+    else:
+        return shifted_data
+
+
+def iq_imbalance(
+    signal: ArrayLike | Recording,
+    amplitude_imbalance: Optional[float] = 1.5,
+    phase_imbalance: Optional[float] = np.pi,
+    dc_offset: Optional[float] = 1.5,
+) -> np.ndarray | Recording:
+    """Apply an IQ Imbalance to a signal.
+
+    .. note::
+
+        Based on MathWorks' `I/Q Imbalance <https://www.mathworks.com/help/comm/ref/iqimbalance.html>`_.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param amplitude_imbalance: The IQ amplitude imbalance to apply, in dB. Default is 1.5.
+    :type amplitude_imbalance: float, optional
+    :param phase_imbalance: The IQ phase imbalance to apply, in radians. Default is π.
+         Must be in the range ``[-π, π]``.
+    :type phase_imbalance: float, optional
+    :param dc_offset: The IQ DC offset to apply, in dB. Default is 1.5.
+    :type dc_offset: float, optional
+
+    :raises ValueError: If the phase imbalance is not in the range ``[-π, π]``.
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array which is the original signal with an applied IQ imbalance. If `signal` is a `Recording`,
+        returns a `Recording object` with its `data` attribute containing the IQ imbalanced signal array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[2+18j, -34+2j, 3+9j]])
+    >>> new_rec = iq_imbalance(rec, 1, np.pi, 2)
+    >>> new_rec.data
+    array([[-38.38613587-4.78555031j, -4.26512621+81.35435535j, -19.19306793-7.17832547j]])
+    """
+    # TODO: Additional info needs to be added to docstring description
+
+    if phase_imbalance > np.pi or phase_imbalance < -np.pi:
+        raise ValueError("Phase imbalance must be in the range [-π, π].")
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if c == 1:
+        # Apply amplitude imbalance
+        data = (
+            10 ** (0.5 * amplitude_imbalance / 20.0) * data.real
+            + 1j * 10 ** (-0.5 * amplitude_imbalance / 20.0) * data.imag
+        )
+
+        # Apply phase imbalance
+        data = (
+            np.exp(-1j * phase_imbalance / 2.0) * data.real
+            + np.exp(1j * (np.pi / 2.0 + phase_imbalance / 2.0)) * data.imag
+        )
+
+        # Apply DC offset
+        imbalanced_data = data + (10 ** (dc_offset / 20.0) * data.real + 1j * 10 ** (dc_offset / 20.0) * data.imag)
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=imbalanced_data, metadata=signal.metadata)
+    else:
+        return imbalanced_data
+
+
+def resample(signal: ArrayLike | Recording, up: Optional[int] = 4, down: Optional[int] = 2) -> np.ndarray | Recording:
+    """Resample a signal using polyphase filtering.
+
+    Uses scipy.signal.resample_poly to upsample the signal by the
+    factor *up*, apply a zero-phase low-pass FIR filter, and downsample the
+    signal by the factor *down*.
+
+    :param signal: Input IQ data as a complex CxN array or `Recording`, where C is the number of channels and N
+        is the length of the IQ examples.
+    :type signal: array_like or utils.data.Recording
+    :param up: The upsampling factor. Default is 4.
+    :type up: int, optional
+    :param down: The downsampling factor. Default is 2.
+    :type down: int, optional
+
+    :raises ValueError: If `signal` is not CxN complex.
+
+    :return: A numpy array which represents the resampled signal If `signal` is a `Recording`,
+        returns a `Recording object` with its `data` attribute containing the resampled array.
+    :rtype: np.ndarray or utils.data.Recording
+
+    >>> rec = Recording(data=[[1+1j, 2+2j]])
+    >>> new_rec = resample(rec, 2, 1)
+    >>> new_rec.data
+    array([[1.00051747+1.00051747j, 1.90020207+1.90020207j]])
+    """
+    # TODO: Additional info needs to be added to docstring description
+
+    if isinstance(signal, Recording):
+        data = signal.data
+    else:
+        data = np.asarray(signal)
+
+    if data.ndim == 2 and np.iscomplexobj(data):
+        c, n = data.shape
+    else:
+        raise ValueError("signal must be CxN complex.")
+
+    if c == 1:
+        data = np.squeeze(data)
+        resampled_iqdata = resample_poly(x=data, up=up, down=down)
+
+        # Reshape array so that slicing operations work on resampled data
+        resampled_iqdata = np.reshape(resampled_iqdata, newshape=(1, len(resampled_iqdata)))
+
+        if resampled_iqdata.shape[1] > n:
+            resampled_iqdata = resampled_iqdata[:, :n]
+
+        else:
+            empty_array = np.zeros(resampled_iqdata.shape, dtype=resampled_iqdata.dtype)
+            empty_array[:, : resampled_iqdata.shape[1]] = resampled_iqdata
+    else:
+        raise NotImplementedError
+
+    if isinstance(signal, Recording):
+        return Recording(data=resampled_iqdata, metadata=signal.metadata)
+    else:
+        return resampled_iqdata
--- a/src/ria_toolkit/utils/init.py
+++ b/src/ria_toolkit/utils/init.py
@ -0,0 +1,9 @@
+"""
+The Helpers module contains a bunch of helper functions, including array conversion utilities.
+"""
+
+__all__ = [
+    "bytes_to_samples",
+]
+
+from .bytes_to_samples import bytes_to_samples
--- a/src/ria_toolkit/utils/array_conversion.py
+++ b/src/ria_toolkit/utils/array_conversion.py
@ -0,0 +1,80 @@
+"""
+IQ data represents the in-phase (I) and quadrature (Q) components of a signal. There are two ways to represent
+single-channel IQ signals:
+
+#. **Complex 1xN Format:** In the complex 1xN format, the IQ data is represented as a 2D array of complex numbers with
+   shape 1xN. In this format, the real part of each complex number represents the in-phase component, while the
+   imaginary part represents the quadrature component.
+#. **Real 2xN Format:** In the real 2xN format, the IQ data is represented as a 2D array of real numbers with shape
+   2xN. In this format, the first row contains the in-phase components, while the second row contains the quadrature
+   components.
+
+This submodule provides functions to verify and convert between these two formats.
+"""
+
+import numpy as np
+from numpy.typing import ArrayLike
+
+
+def convert_to_2xn(arr: np.ndarray) -> np.ndarray:
+    """Convert arr to the real 2xN format. If arr is already real 2xN, then you'll get back a copy.
+
+    :param arr: Array of IQ samples, in the complex 1XN format.
+    :type arr: array_like
+
+    :return: The provided signal, in the real 2xN format.
+    :rtype: np.ndarray
+    """
+    if is_1xn(arr):
+        return np.vstack((np.real(arr[0]), np.imag(arr[0])))
+
+    elif is_2xn(arr):
+        return np.copy(arr)
+
+    else:
+        raise ValueError("arr is neither complex 1xN nor real 2xN.")
+
+
+def convert_to_1xn(arr: np.ndarray) -> np.ndarray:
+    """Convert arr to the complex 1xN format. If arr is already complex 1xN, then you'll get back a copy.
+
+    :param arr: Array of IQ samples, in the real 2xN format.
+    :type arr: np.ndarray
+
+    :return: The provided signal, in the complex 1xN format.
+    :rtype: np.ndarray
+    """
+    if is_2xn(arr):
+        return np.expand_dims(a=arr[0, :] + 1j * arr[1, :], axis=0)
+
+    elif is_1xn(arr):
+        return np.copy(arr)
+
+    else:
+        raise ValueError("arr is neither complex 1xN nor real 2xN.")
+
+
+def is_1xn(arr: ArrayLike) -> bool:
+    """
+    :return: True is arr is complex 1xN, False otherwise.
+    :rtype: bool
+    """
+    a = np.asarray(arr)
+
+    if a.ndim == 2 and a.shape[0] == 1 and np.iscomplexobj(a):
+        return True
+    else:
+        return False
+
+
+def is_2xn(arr: ArrayLike) -> bool:
+    """
+    :return: True is arr is real 2xN, False otherwise.
+    :rtype: bool
+    """
+    a = np.asarray(arr)
+
+    if a.ndim == 2 and a.shape[0] == 2 and not np.iscomplexobj(a):
+        return True
+    else:
+        return False
--- a/src/ria_toolkit/utils/bytes_to_samples.py
+++ b/src/ria_toolkit/utils/bytes_to_samples.py
@ -0,0 +1,18 @@
+from numpy.typing import NDArray
+
+
+def bytes_to_samples(data: bytes) -> NDArray:
+    """Convert bytes to IQ samples, in the complex 1xN format.
+
+    :param data: Array of bytes
+    :type data: bytes
+
+    :return: Tape of IQ samples, as numpy complex type
+    :rtype: np.ndarray
+    """
+    # samples = np.frombuffer(data, dtype=np.int16).astype(np.float32)
+    # samples /= 2048
+    # samples = samples[::2] + 1j * samples[1::2]
+    # # samples = samples.view(np.complex64)
+    # return samples
+    raise NotImplementedError
--- a/src/ria_toolkit/viz/init.py
+++ b/src/ria_toolkit/viz/init.py
@ -0,0 +1,12 @@
+"""
+The package contains assorted plotting and report generation utilities to help visualize RIA components such as
+recordings and radio datasets.
+"""
+
+__all__ = [
+    "view_annotations",
+    "view_channels",
+    "view_sig",
+]
+
+from .view_signal import view_annotations, view_channels, view_sig
--- a/src/ria_toolkit/viz/recording.py
+++ b/src/ria_toolkit/viz/recording.py
@ -0,0 +1,192 @@
+import numpy as np
+import plotly.graph_objects as go
+import scipy.signal as signal
+from plotly.graph_objs import Figure
+from scipy.fft import fft, fftshift
+
+from utils.data import Recording
+
+
+def spectrogram(rec: Recording, thumbnail: bool = False) -> Figure:
+    """Create a spectrogram for the recording.
+
+    :param rec: Signal to plot.
+    :type rec: utils.data.Recording
+    :param thumbnail: Whether to return a small thumbnail version or full plot.
+    :type thumbnail: bool
+
+    :return: Spectrogram, as a Plotly figure.
+    """
+    complex_signal = rec.data[0]
+    sample_rate = int(rec.metadata.get("sample_rate", 1))
+    plot_length = len(complex_signal)
+
+    # Determine FFT size
+    if plot_length < 2000:
+        fft_size = 64
+    elif plot_length < 10000:
+        fft_size = 256
+    elif plot_length < 1000000:
+        fft_size = 1024
+    else:
+        fft_size = 2048
+
+    frequencies, times, Sxx = signal.spectrogram(
+        complex_signal,
+        fs=sample_rate,
+        nfft=fft_size,
+        nperseg=fft_size,
+        noverlap=fft_size // 8,
+        scaling="density",
+        mode="complex",
+        return_onesided=False,
+    )
+
+    # Convert complex values to amplitude and then to log scale for visualization
+    Sxx_magnitude = np.abs(Sxx)
+    Sxx_log = np.log10(Sxx_magnitude + 1e-6)
+
+    # Normalize spectrogram values between 0 and 1 for plotting
+    Sxx_log_shifted = Sxx_log - np.min(Sxx_log)
+    Sxx_log_norm = Sxx_log_shifted / np.max(Sxx_log_shifted)
+
+    # Shift frequency bins and spectrogram rows so frequencies run from negative to positive
+    frequencies_shifted = np.fft.fftshift(frequencies)
+    Sxx_shifted = np.fft.fftshift(Sxx_log_norm, axes=0)
+
+    fig = go.Figure(
+        data=go.Heatmap(
+            z=Sxx_shifted,
+            x=times / 1e6,
+            y=frequencies_shifted,
+            colorscale="Viridis",
+            zmin=0,
+            zmax=1,
+            reversescale=False,
+            showscale=False,
+        )
+    )
+
+    if thumbnail:
+        fig.update_xaxes(showticklabels=False)
+        fig.update_yaxes(showticklabels=False)
+        fig.update_layout(
+            template="plotly_dark",
+            width=200,
+            height=100,
+            margin=dict(l=5, r=5, t=5, b=5),
+            xaxis=dict(scaleanchor=None),
+            yaxis=dict(scaleanchor=None),
+        )
+    else:
+        fig.update_layout(
+            title="Spectrogram",
+            xaxis_title="Time [s]",
+            yaxis_title="Frequency [Hz]",
+            template="plotly_dark",
+            height=300,
+            width=800,
+        )
+
+    return fig
+
+
+def iq_time_series(rec: Recording) -> Figure:
+    """Create a time series plot of the real and imaginary parts of signal.
+
+    :param rec: Signal to plot.
+    :type rec: utils.data.Recording
+
+    :return: Time series plot as a Plotly figure.
+    """
+    complex_signal = rec.data[0]
+    sample_rate = int(rec.metadata.get("sample_rate", 1))
+    plot_length = len(complex_signal)
+    t = np.arange(0, plot_length, 1) / sample_rate
+
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=t, y=complex_signal.real, mode="lines", name="I (In-phase)", line=dict(width=0.6)))
+    fig.add_trace(go.Scatter(x=t, y=complex_signal.imag, mode="lines", name="Q (Quadrature)", line=dict(width=0.6)))
+
+    fig.update_layout(
+        title="IQ Time Series",
+        xaxis_title="Time [s]",
+        yaxis_title="Amplitude",
+        template="plotly_dark",
+        height=300,
+        width=800,
+        showlegend=True,
+    )
+
+    return fig
+
+
+def frequency_spectrum(rec: Recording) -> Figure:
+    """Create a frequency spectrum plot from the recording.
+
+    :param rec: Input signal to plot.
+    :type rec: utils.data.Recording
+
+    :return: Frequency spectrum as a Plotly figure.
+    """
+    complex_signal = rec.data[0]
+    center_frequency = int(rec.metadata.get("center_frequency", 0))
+    sample_rate = int(rec.metadata.get("sample_rate", 1))
+
+    epsilon = 1e-10
+    spectrum = np.abs(fftshift(fft(complex_signal)))
+    freqs = np.linspace(-sample_rate / 2, sample_rate / 2, len(complex_signal)) + center_frequency
+    log_spectrum = np.log10(spectrum + epsilon)
+    scaled_log_spectrum = (log_spectrum - log_spectrum.min()) / (log_spectrum.max() - log_spectrum.min())
+
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=freqs, y=scaled_log_spectrum, mode="lines", name="Spectrum", line=dict(width=0.4)))
+
+    fig.update_layout(
+        title="Frequency Spectrum",
+        xaxis_title="Frequency [Hz]",
+        yaxis_title="Magnitude",
+        yaxis_type="log",
+        template="plotly_dark",
+        height=300,
+        width=800,
+        showlegend=False,
+    )
+
+    return fig
+
+
+def constellation(rec: Recording) -> Figure:
+    """Create a constellation plot from the recording.
+
+    :param rec: Input signal to plot.
+    :type rec: utils.data.Recording
+
+    :return: Constellation as a Plotly figure.
+    """
+    complex_signal = rec.data[0]
+
+    # Downsample the IQ samples to a target number of points
+    # This reduces the amount of data plotted, improving performance and interactivity
+    #  without losing significant detail in the constellation visualization.
+    target_number_of_points = 5000
+    step = max(1, len(complex_signal) // target_number_of_points)
+    i_ds = complex_signal.real[::step]
+    q_ds = complex_signal.imag[::step]
+
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=i_ds, y=q_ds, mode="lines", name="Constellation", line=dict(width=0.2)))
+
+    fig.update_layout(
+        title="Constellation",
+        xaxis_title="In-phase (I)",
+        yaxis_title="Quadrature (Q)",
+        template="plotly_dark",
+        height=400,
+        width=400,
+        showlegend=False,
+        xaxis=dict(range=[-1.1, 1.1]),
+        yaxis=dict(range=[-1.1, 1.1]),
+    )
+
+    return fig