File size: 4,859 Bytes

f47b72d

from __future__ import annotations
from typing import Iterable, Union
from numpy import ndarray
from pandas import DataFrame
from sklearn.pipeline import Pipeline
import numpy as np
import sys
import skops.io as sio
from huggingface_hub import hf_hub_download
import pandas as pd

PREDICTOR_SPLIT_TARGET = 0  # Represents 'not noise'.

class CascadedClassifier(Pipeline):
    def __init__(self, steps: list[tuple[str, Pipeline]], memory=None):
        """

        Initializes a cascaded classifier pipeline with two classification steps.



        Parameters

        ----------

        steps: list[tuple[str, Pipeline]]

            A list of (name, pipeline) tuples for noise and SUA classifiers.

        memory: optional

            Used to cache the fitted transformers of the pipeline.

        """
        super().__init__(steps, memory=memory)
        assert len(steps) == 2, 'CascadedClassifier must have exactly 2 steps'
        self._steps = steps

    @property
    def feature_names_in_(self) -> list[str]:
        """

        Returns the feature names used in the noise classifier.



        Returns

        -------

        list[str]

            The input feature names.

        """
        return self.named_steps["noise"][0].feature_names_in_

    def predict(self, X: list[str] | ndarray | Iterable | DataFrame, **predict_params) -> ndarray:
        """

        Predicts labels for the input data using a cascading approach.



        Parameters

        ----------

        X: list[str] | ndarray | Iterable | DataFrame

            The input data.



        predict_params: dict

            Parameters for the predict method.



        Returns

        -------

        ndarray

            The predicted labels.

        """
        # Step 1: Get initial predictions from the noise classifier.
        y = self.named_steps["noise"][0].predict(X)

        # Identify rows where the prediction is 'not noise'.
        predict_rows = (y == PREDICTOR_SPLIT_TARGET)
        X_predict = X[predict_rows]

        # If no rows require further classification, return the initial predictions.
        if len(X_predict) == 0:
            return y

        # Step 2: Get predictions from the SUA classifier for the 'not noise' subset.
        y2 = self.named_steps["sua"][0].predict(X_predict)

        # Shift the SUA/MUA labels to avoid overlap with noise labels.
        y2 += 2  # Assuming noise is labeled as 0 or 1.

        # Update the initial predictions with the SUA classifier results.
        y[predict_rows] = y2

        return y

    def predict_proba(

            self, 

            X: Union[list[str], ndarray, Iterable, pd.DataFrame],

        ) -> ndarray:
            """

            Predict the probabilities for the input data and normalize them so the sum is 1.



            Parameters

            ----------

            X : Union[list[str], ndarray, Iterable, pd.DataFrame]

                The input data.

            predict_params : dict

                Parameters for the predict method.



            Returns

            -------

            ndarray

                The normalized predicted probabilities for noise, SUA, and MUA.

                Shape: (n_samples, 3)



            Notes

            -----

            The output probabilities are ordered as [SUA, noise, MUA].

            All rows sum to 1 after normalization.

            """
            if len(X) == 0:
                return np.array([], dtype=np.float64).reshape(0, 3)

            # Initialize probabilities array with zeros
            n_samples = len(X)
            out_proba = np.zeros((n_samples, 3), dtype=np.float64)

            try:
                # Get noise classifier probabilities
                y_proba_noise = self.named_steps["noise"][0].predict_proba(X)
                # Get SUA vs MUA probabilities
                y_proba_sua = self.named_steps["sua"][0].predict_proba(X)

                for i in range(n_samples):
                    if y_proba_noise[i, 0] > y_proba_noise[i, 1]:  # neural > noise
                        out_proba[i, 0] = 0                     # noise, there is no noise
                        out_proba[i, 1] = y_proba_sua[i, 0]  # MUA 
                        out_proba[i, 2] = y_proba_sua[i, 1]    # SUA
                    else:  # noise >= neural
                        out_proba[i, 0] = y_proba_noise[i, 1]  # noise
                        out_proba[i, 1] = y_proba_noise[i, 0]  # MUA (neural probability)
                        out_proba[i, 2] = 0                      # SUA (no SUA)

                return out_proba

            except Exception as e:
                raise RuntimeError(
                    f"Error during probability prediction: {str(e)}"
                ) from e