from __future__ import annotations import os from abc import ABC from typing import Optional import h5py import numpy as np from ria_toolkit_oss.data.datasets.h5helpers import ( append_entry_inplace, copy_dataset_entry_by_index, ) from ria_toolkit_oss.data.datasets.radio_dataset import RadioDataset class IQDataset(RadioDataset, ABC): """An ``IQDataset`` is a ``RadioDataset`` tailored for machine learning tasks that involve processing radiofrequency (RF) signals represented as In-phase (I) and Quadrature (Q) samples. For machine learning tasks that involve processing spectrograms, please use ria_toolkit_oss.data.datasets.SpectDataset instead. This is an abstract interface defining common properties and behaviour of IQDatasets. Therefore, this class should not be instantiated directly. Instead, it is subclassed to define custom interfaces for specific machine learning backends. :param source: Path to the dataset source file. For more information on dataset source files and their format, see :doc:`radio_datasets`. :type source: str or os.PathLike """ def __init__(self, source: str | os.PathLike): """Create a new IQDataset.""" super().__init__(source=source) @property def shape(self) -> tuple[int]: """IQ datasets are M x C x N, where M is the number of examples, C is the number of channels, N is the length of the signals. :return: The shape of the dataset. The elements of the shape tuple give the lengths of the corresponding dataset dimensions. :type: tuple of ints """ return super().shape def trim_examples( self, trim_length: int, keep: Optional[str] = "start", inplace: Optional[bool] = False ) -> IQDataset | None: """Trims all examples in a dataset to a desired length. :param trim_length: The desired length of the trimmed examples. :type trim_length: int :param keep: Specifies the part of the example to keep. Defaults to "start". The options are: - "start" - "end" - "middle" - "random" :type keep: str, optional :param inplace: If True, the operation modifies the existing source file directly and returns None. If False, the operation creates a new dataset cbject and corresponding source file, leaving the original dataset unchanged. Default is False. :type inplace: bool :raises ValueError: If trim_length is greater than or equal to the length of the examples. :raises ValueError: If value of keep is not recognized. :raises ValueError: If specified trim length is invalid for middle index. :return: The dataset that is composed of shorter examples. :rtype: IQDataset **Examples:** >>> from ria.dataset_manager.builders import AWGN_Builder() >>> builder = AWGN_Builder() >>> builder.download_and_prepare() >>> ds = builder.as_dataset() >>> ds.shape (5, 1, 3) >>> new_ds = ds.trim_examples(2) >>> new_ds.shape (5, 1, 2) """ keep = keep.lower() channels, example_length = np.shape(self[0]) if trim_length >= example_length: raise ValueError(f"Trim length must be less than {example_length}") if keep not in {"start", "end", "middle", "random"}: raise ValueError('keep must be "start", "end", "middle", or "random"') start = None if keep == "middle": start = int(example_length / 2) if start + trim_length > example_length: raise ValueError(f"Trim length of {trim_length} is invalid for middle index of: {start} ") elif keep == "random": start = np.random.randint(0, example_length - trim_length + 1) if not inplace: ds = self._create_next_dataset(example_length=trim_length) with h5py.File(self.source, "a") as f: data = f["data"] for idx in range(len(self)): trimmed_example = generate_trimmed_example( example=data[idx], keep=keep, trim_length=trim_length, start=start, ) if not inplace: append_entry_inplace(source=ds.source, dataset_path="data", entry=trimmed_example) copy_dataset_entry_by_index( source=self.source, destination=ds.source, dataset_path="metadata/metadata", idx=idx ) else: trimmed_example = np.pad( trimmed_example, ((0, 0), (0, example_length - trim_length)), "constant", constant_values=0 ) data[idx] = trimmed_example if not inplace: return ds else: data.resize(trim_length, axis=2) def split_examples( self, split_factor: Optional[int] = None, example_length: Optional[int] = None, inplace: Optional[bool] = False ) -> IQDataset | None: """If the current example length is not evenly divisible by the provided example_length, excess samples are discarded. Excess examples are always at the end of the slice. If the split factor results in non-integer example lengths for the new example chunks, it rounds down. For example: Requires either split_factor or example_length to be specified but not both. If both are provided, split factor will be used by default, and a warning will be raised. :param split_factor: the number of new example chunks produced from each original example, defaults to None. :type split_factor: int, optional :param example_length: the example length of the new example chunks, defaults to None. :type example_length: int, optional :param inplace: If True, the operation modifies the existing source file directly and returns None. If False, the operation creates a new dataset cbject and corresponding source file, leaving the original dataset unchanged. Default is False. :type inplace: bool, optional :return: A dataset with more examples that are shorter. :rtype: IQDataset **Examples:** If the dataset has 100 examples of length 1024 and the split factor is 2, the resulting dataset will have 200 examples of 512. No samples have been discarded. If the example dataset has 100 examples of length 1024 and the example length is 100, the resulting dataset will have 1000 examples of length 100. The remaining 24 samples from each example have been discarded. """ if split_factor is not None and example_length is not None: # Warn and use split factor import warnings warnings.warn("split_factor and example_length should not both be specified.") if not inplace: # ds = self.create_new_dataset(example_length=example_length) pass raise NotImplementedError def generate_trimmed_example( example: np.ndarray, keep: str, trim_length: int, start: Optional[int] = None ) -> np.ndarray: """Takes in an IQ example as input and returns a trimmed example. :param example: The example to be trimmed. :type example: np.ndarray :param keep: The position the trimming occurs from. :type keep: str :param trim_length: The desired length of the trimmed example: :type trim_length: int :param start: The starting index if keep = "middle" or "random" :type start: int, optional :return: The trimmed example :rtype: np.ndarray """ if keep == "start": return example[:, :trim_length] elif keep == "end": return example[:, -trim_length:] elif keep == "middle": return example[:, start : start + trim_length] else: return example[:, start : start + trim_length]