ria-toolkit-oss/src/ria_toolkit_oss/annotations/cusum_annotator.py

import json
from typing import Optional

import numpy as np

from ria_toolkit_oss.datatypes import Annotation, Recording


def annotate_with_cusum(
    recording: Recording,
    label: Optional[str] = "segment",
    window_size: Optional[int] = 1,
    min_duration: Optional[float] = None,
    tolerance: Optional[int] = None,
    annotation_type: Optional[str] = "standalone",
):
    """
    Add annotations that divide the recording into distinct time segments.

    This algorithm computes the cumulative sum of the sample magnitudes and
    determines break points in the signal.

    This tool can be used to find points where a signal turns on or off, or
    changes between a low and high amplitude.

    :param recording: A ``Recording`` object to annotate.
    :type recording: ``ria_toolkit_oss.datatypes.Recording``
    :param label: Label for the detected segments.
    :type label: str
    :param window_size: The length (in samples) of the moving average window.
    :type window_size: int
    :param min_duration: The minimum duration (in ms) of a segment.
        The algorithm will not produce annotations shorter than this length.
    :type min_duration: float
    :param tolerance: The minimum length (in samples) of a segment.
    :type tolerance: int
    :param annotation_type: Annotation type (standalone, parallel, intersection).
    :type annotation_type: str
    """

    sample_rate = recording.metadata["sample_rate"]
    center_frequency = recording.metadata.get("center_frequency", 0)

    # Create an object of the time segmenter
    time_segmenter = TimeSegmenter(sample_rate, min_duration, window_size, tolerance)

    change_points = time_segmenter.apply(recording.data[0])

    time_segments_indices = np.append(np.insert(change_points, 0, 0), len(recording.data[0]))
    annotations = []
    for i in range(len(time_segments_indices) - 1):
        # Build comment JSON with type metadata
        comment_data = {
            "type": annotation_type,
            "generator": "cusum_annotator",
            "params": {
                "window_size": window_size,
                "min_duration": min_duration,
                "tolerance": tolerance,
            },
        }
        f_min, f_max = detect_frequency(
            signal=recording.data[0],
            start=time_segments_indices[i],
            stop=time_segments_indices[i + 1],
            sample_rate=sample_rate,
        )

        annotations.append(
            Annotation(
                sample_start=time_segments_indices[i],
                sample_count=time_segments_indices[i + 1] - time_segments_indices[i],
                freq_lower_edge=center_frequency + f_min,
                freq_upper_edge=center_frequency + f_max,
                label=label,
                comment=json.dumps(comment_data),
                detail={"generator": "cusum_annotator"},
            )
        )

    return Recording(data=recording.data, metadata=recording.metadata, annotations=recording.annotations + annotations)


def _compute_cusum(_signal, sample_rate: int, tolerance: int = None, min_duration: float = -1):
    """
    This function efficiently computes the cumulative sum of a give list (_signal), with an optional tolerance.

    Args:
    - _signal: array of iq samples.
    - Tolerance: the least acceptable length of a block, Defaults to None.

    Returns:
    - cusum (array): Array of the cumulative sum of the given list
    - sample_rate (int): __description_
    - change_points (array): Array of the indices at which a change in the CUSUM direction happens.
    - min_duration (float): The least acceptable time width of each segment (in ms). Defaults to -1.
    """

    # efficiently calculate the running sum of the signal
    # cusum = list(itertools.accumulate((_signal - np.mean(_signal))))
    x = _signal - np.mean(_signal)
    cusum = np.cumsum(x)

    # 'diff' computes the differences between the consecutive values,
    # then 'sign' determines if it is +ve or -ve.
    change_indicators = np.sign(np.diff(cusum))
    change_points = np.where(np.diff(change_indicators))[0] + 1

    # Limit the change_points
    # Reject those whose number of samples < minimum accepted #n of samples in (min duration) ms.
    if min_duration is not None and min_duration > 0:
        min_samples_wide = int(min_duration * sample_rate / 1000)
        segments_lengths = np.diff(change_points)
        segments_lengths = np.insert(segments_lengths, 0, change_points[0])
        change_points = change_points[np.where(segments_lengths > min_samples_wide)[0]]
    return cusum, change_points


def detect_frequency(signal, start, stop, sample_rate):
    signal_segment = signal[start:stop]
    if len(signal_segment) > 0:
        fft_data = np.abs(np.fft.fftshift(np.fft.fft(signal_segment)))
        fft_freqs = np.fft.fftshift(np.fft.fftfreq(len(signal_segment), 1 / sample_rate))

        # Use a spectral threshold to find the 'height' of the orange block
        spectral_thresh = np.max(fft_data) * 0.15
        sig_indices = np.where(fft_data > spectral_thresh)[0]

        if len(sig_indices) > 4:
            return fft_freqs[sig_indices[0]], fft_freqs[sig_indices[-1]]
        else:
            return -sample_rate / 4, sample_rate / 4
    else:
        return -sample_rate / 4, sample_rate / 4


class TimeSegmenter:
    """Time Segmenter class, it creates a segmenter object with certain\
    characteristics to easily split an input signal to segments based on\
    the cumulative sum of deviations (of the signal mean)
    """

    def __init__(
        self, sample_rate: int, min_duration: float = 1, moving_average_window: int = 3, tolerance: int = None
    ):
        """_summary_

        Args:
            sample_rate (int): _description_
            min_duration (float, optional): _description_. Defaults to 1.
            moving_average_window (int, optional): _description_. Defaults to 3.
            tolerance (int, optional): _description_. Defaults to None.
        """
        self.sample_rate = sample_rate
        self.min_duration = min_duration
        self.moving_average_window = moving_average_window
        self._moving_avg_filter = self._init_filter()
        self.tolerance = tolerance

    def _init_filter(self):
        """_summary_

        Returns:
            _type_: _description_
        """
        return np.ones(self.moving_average_window) / self.moving_average_window

    def _apply_filter(self, iqsignal: np.array):
        """_summary_

        Args:
            iqsignal (np.array): _description_

        Returns:
            _type_: _description_
        """
        return np.convolve(abs(iqsignal), self._moving_avg_filter, mode="same")

    def _create_segments(self, iq_signal: np.array, change_points: np.array):
        """_summary_

        Args:
            iq_signal (np.array): _description_
            change_points (np.array): _description_

        Returns:
            _type_: _description_
        """
        return np.split(iq_signal, change_points)

    def apply(self, iq_signal: np.array):
        """_summary_

        Args:
            iq_signal (np.array): _description_

        Returns:
            _type_: _description_
        """
        smoothed_signal = self._apply_filter(iq_signal)
        _, change_points = _compute_cusum(smoothed_signal, self.sample_rate, self.tolerance, self.min_duration)
        # segments = self._create_segments(iq_signal, change_points)
        return change_points
M updated annotations from utils to oss 2026-02-23 14:12:34 -05:00			`import json`
			`from typing import Optional`

			`import numpy as np`

F Port annotation system from utils and fix ria package imports Annotations package (new): - Add threshold_qualifier with 3-pass hysteresis detector (Pass 1: strong bursts, Pass 2: weak residual bursts, Pass 3: macro-window faint burst detection), auto window_size scaled to 1ms, channel selection, and stable noise_floor baseline throughout - Add energy_detector, cusum_annotator, parallel_signal_separator, qualify_slice, signal_isolation, annotation_transforms - Add __init__.py exporting the four functions used by the CLI - Fix all imports from utils.data → ria_toolkit_oss.datatypes CLI annotate command (new): - Port full annotate CLI from utils including list, add, remove, clear, energy, cusum, threshold, and separate subcommands - Fix imports from utils.* → ria_toolkit_oss.* and utils_cli.* → ria_toolkit_oss_cli.* - Safe overwrite logic: _annotated files always writable, originals protected; --overwrite writes in-place only on _annotated inputs CLI view command: - Add 'annotations' as a valid --type, wiring view_annotations from view_signal view_signal.py: - Add view_annotations function with blue/purple alternating palette and threshold %-sorted drawing order (lower % renders on top) recording.py (datatypes): - Fix lazy imports in to_wav() and to_blue() from utils.io → ria_toolkit_oss.io io/recording.py: - Add compatibility shim in from_npy to remap utils.data.annotation.Annotation to ria_toolkit_oss.datatypes.annotation.Annotation when loading .npy files pickled by the utils package 2026-03-31 13:34:00 -04:00			`from ria_toolkit_oss.datatypes import Annotation, Recording`
M updated annotations from utils to oss 2026-02-23 14:12:34 -05:00

			`def annotate_with_cusum(`
			`recording: Recording,`
			`label: Optional[str] = "segment",`
			`window_size: Optional[int] = 1,`
			`min_duration: Optional[float] = None,`
			`tolerance: Optional[int] = None,`
			`annotation_type: Optional[str] = "standalone",`
			`):`
			`"""`
			`Add annotations that divide the recording into distinct time segments.`

			`This algorithm computes the cumulative sum of the sample magnitudes and`
			`determines break points in the signal.`

			`This tool can be used to find points where a signal turns on or off, or`
			`changes between a low and high amplitude.`

			:param recording: A ``Recording`` object to annotate.
F Port annotation system from utils and fix ria package imports Annotations package (new): - Add threshold_qualifier with 3-pass hysteresis detector (Pass 1: strong bursts, Pass 2: weak residual bursts, Pass 3: macro-window faint burst detection), auto window_size scaled to 1ms, channel selection, and stable noise_floor baseline throughout - Add energy_detector, cusum_annotator, parallel_signal_separator, qualify_slice, signal_isolation, annotation_transforms - Add __init__.py exporting the four functions used by the CLI - Fix all imports from utils.data → ria_toolkit_oss.datatypes CLI annotate command (new): - Port full annotate CLI from utils including list, add, remove, clear, energy, cusum, threshold, and separate subcommands - Fix imports from utils.* → ria_toolkit_oss.* and utils_cli.* → ria_toolkit_oss_cli.* - Safe overwrite logic: _annotated files always writable, originals protected; --overwrite writes in-place only on _annotated inputs CLI view command: - Add 'annotations' as a valid --type, wiring view_annotations from view_signal view_signal.py: - Add view_annotations function with blue/purple alternating palette and threshold %-sorted drawing order (lower % renders on top) recording.py (datatypes): - Fix lazy imports in to_wav() and to_blue() from utils.io → ria_toolkit_oss.io io/recording.py: - Add compatibility shim in from_npy to remap utils.data.annotation.Annotation to ria_toolkit_oss.datatypes.annotation.Annotation when loading .npy files pickled by the utils package 2026-03-31 13:34:00 -04:00			:type recording: ``ria_toolkit_oss.datatypes.Recording``
M updated annotations from utils to oss 2026-02-23 14:12:34 -05:00			`:param label: Label for the detected segments.`
			`:type label: str`
			`:param window_size: The length (in samples) of the moving average window.`
			`:type window_size: int`
			`:param min_duration: The minimum duration (in ms) of a segment.`
			`The algorithm will not produce annotations shorter than this length.`
			`:type min_duration: float`
			`:param tolerance: The minimum length (in samples) of a segment.`
			`:type tolerance: int`
			`:param annotation_type: Annotation type (standalone, parallel, intersection).`
			`:type annotation_type: str`
			`"""`

			`sample_rate = recording.metadata["sample_rate"]`
			`center_frequency = recording.metadata.get("center_frequency", 0)`

			`# Create an object of the time segmenter`
			`time_segmenter = TimeSegmenter(sample_rate, min_duration, window_size, tolerance)`

			`change_points = time_segmenter.apply(recording.data[0])`

			`time_segments_indices = np.append(np.insert(change_points, 0, 0), len(recording.data[0]))`
			`annotations = []`
			`for i in range(len(time_segments_indices) - 1):`
			`# Build comment JSON with type metadata`
			`comment_data = {`
			`"type": annotation_type,`
			`"generator": "cusum_annotator",`
			`"params": {`
			`"window_size": window_size,`
			`"min_duration": min_duration,`
			`"tolerance": tolerance,`
			`},`
			`}`
			`f_min, f_max = detect_frequency(`
			`signal=recording.data[0],`
			`start=time_segments_indices[i],`
			`stop=time_segments_indices[i + 1],`
			`sample_rate=sample_rate,`
			`)`

			`annotations.append(`
			`Annotation(`
			`sample_start=time_segments_indices[i],`
			`sample_count=time_segments_indices[i + 1] - time_segments_indices[i],`
			`freq_lower_edge=center_frequency + f_min,`
			`freq_upper_edge=center_frequency + f_max,`
			`label=label,`
			`comment=json.dumps(comment_data),`
			`detail={"generator": "cusum_annotator"},`
			`)`
			`)`

			`return Recording(data=recording.data, metadata=recording.metadata, annotations=recording.annotations + annotations)`


			`def _compute_cusum(_signal, sample_rate: int, tolerance: int = None, min_duration: float = -1):`
			`"""`
			`This function efficiently computes the cumulative sum of a give list (_signal), with an optional tolerance.`

			`Args:`
			`- _signal: array of iq samples.`
			`- Tolerance: the least acceptable length of a block, Defaults to None.`

			`Returns:`
			`- cusum (array): Array of the cumulative sum of the given list`
			`- sample_rate (int): __description_`
			`- change_points (array): Array of the indices at which a change in the CUSUM direction happens.`
			`- min_duration (float): The least acceptable time width of each segment (in ms). Defaults to -1.`
			`"""`

			`# efficiently calculate the running sum of the signal`
			`# cusum = list(itertools.accumulate((_signal - np.mean(_signal))))`
			`x = _signal - np.mean(_signal)`
			`cusum = np.cumsum(x)`

			`# 'diff' computes the differences between the consecutive values,`
			`# then 'sign' determines if it is +ve or -ve.`
			`change_indicators = np.sign(np.diff(cusum))`
			`change_points = np.where(np.diff(change_indicators))[0] + 1`

			`# Limit the change_points`
			`# Reject those whose number of samples < minimum accepted #n of samples in (min duration) ms.`
			`if min_duration is not None and min_duration > 0:`
			`min_samples_wide = int(min_duration * sample_rate / 1000)`
			`segments_lengths = np.diff(change_points)`
			`segments_lengths = np.insert(segments_lengths, 0, change_points[0])`
			`change_points = change_points[np.where(segments_lengths > min_samples_wide)[0]]`
			`return cusum, change_points`


			`def detect_frequency(signal, start, stop, sample_rate):`
			`signal_segment = signal[start:stop]`
			`if len(signal_segment) > 0:`
			`fft_data = np.abs(np.fft.fftshift(np.fft.fft(signal_segment)))`
			`fft_freqs = np.fft.fftshift(np.fft.fftfreq(len(signal_segment), 1 / sample_rate))`

			`# Use a spectral threshold to find the 'height' of the orange block`
			`spectral_thresh = np.max(fft_data) * 0.15`
			`sig_indices = np.where(fft_data > spectral_thresh)[0]`

			`if len(sig_indices) > 4:`
			`return fft_freqs[sig_indices[0]], fft_freqs[sig_indices[-1]]`
			`else:`
			`return -sample_rate / 4, sample_rate / 4`
			`else:`
			`return -sample_rate / 4, sample_rate / 4`


			`class TimeSegmenter:`
			`"""Time Segmenter class, it creates a segmenter object with certain\`
			`characteristics to easily split an input signal to segments based on\`
			`the cumulative sum of deviations (of the signal mean)`
			`"""`

			`def __init__(`
			`self, sample_rate: int, min_duration: float = 1, moving_average_window: int = 3, tolerance: int = None`
			`):`
			`"""_summary_`

			`Args:`
			`sample_rate (int): _description_`
			`min_duration (float, optional): _description_. Defaults to 1.`
			`moving_average_window (int, optional): _description_. Defaults to 3.`
			`tolerance (int, optional): _description_. Defaults to None.`
			`"""`
			`self.sample_rate = sample_rate`
			`self.min_duration = min_duration`
			`self.moving_average_window = moving_average_window`
			`self._moving_avg_filter = self._init_filter()`
			`self.tolerance = tolerance`

			`def _init_filter(self):`
			`"""_summary_`

			`Returns:`
			`_type_: _description_`
			`"""`
			`return np.ones(self.moving_average_window) / self.moving_average_window`

			`def _apply_filter(self, iqsignal: np.array):`
			`"""_summary_`

			`Args:`
			`iqsignal (np.array): _description_`

			`Returns:`
			`_type_: _description_`
			`"""`
			`return np.convolve(abs(iqsignal), self._moving_avg_filter, mode="same")`

			`def _create_segments(self, iq_signal: np.array, change_points: np.array):`
			`"""_summary_`

			`Args:`
			`iq_signal (np.array): _description_`
			`change_points (np.array): _description_`

			`Returns:`
			`_type_: _description_`
			`"""`
			`return np.split(iq_signal, change_points)`

			`def apply(self, iq_signal: np.array):`
			`"""_summary_`

			`Args:`
			`iq_signal (np.array): _description_`

			`Returns:`
			`_type_: _description_`
			`"""`
			`smoothed_signal = self._apply_filter(iq_signal)`
			`_, change_points = _compute_cusum(smoothed_signal, self.sample_rate, self.tolerance, self.min_duration)`
			`# segments = self._create_segments(iq_signal, change_points)`
			`return change_points`