Delete src/deprecated

Browse files

Files changed (10) hide show

src/deprecated/__init__.py +0 -0
src/deprecated/configs.py +0 -41
src/deprecated/conversion.py +0 -85
src/deprecated/ecg_feature_extraction.py +0 -37
src/deprecated/ecg_preprocessing.py +0 -93
src/deprecated/ecg_processing.py +0 -54
src/deprecated/pipeline.py +0 -63
src/deprecated/pipeline_wrapper.py +0 -50
src/deprecated/pydantic_models.py +0 -146
src/deprecated/utils.py +0 -80

src/deprecated/__init__.py DELETED Viewed

File without changes

src/deprecated/configs.py DELETED Viewed

@@ -1,41 +0,0 @@
-"""This module contains all the configurations and statics for the project."""
-from enum import Enum
-class SignalEnum(str, Enum):
-    chest = 'chest'
-    wrest = 'wrest'
-class WindowSlicingMethodEnum(str, Enum):
-    time_related = 'time_related'
-    label_related_before = 'label_related_before'
-    label_related_after = 'label_related_after'
-    label_related_middle = 'label_related_centered'
-class NormalizationMethodEnum(str, Enum):
-    baseline_difference = 'baseline_difference'
-    baseline_relative = 'baseline_relative'
-    separate = 'separate'
-class BColors(str, Enum):
-    HEADER = '\033[95m'
-    OKBLUE = '\033[94m'
-    OKCYAN = '\033[96m'
-    INFO = '\033[92m'
-    WARNING = '\033[93m'
-    FAIL = '\033[91m'
-    ENDC = '\033[0m'
-    BOLD = '\033[1m'
-    UNDERLINE = '\033[4m'
-class OutputFormats(str, Enum):
-    JSON = 'json'
-    CSV = 'csv'
-    EXCEL_SPREADSHEET = 'excel_spreadsheet'
-selected_features = ["HRV_MeanNN", "HRV_SDNN", "HRV_RMSSD", "HRV_pNN50"]

src/deprecated/conversion.py DELETED Viewed

@@ -1,85 +0,0 @@
-import h5py
-import pandas as pd
-def h5_to_pandas(h5_file, ecg_channel_name='channel_1') -> pd.DataFrame:
-    """
-    Converts a h5 file to a pandas DataFrame. It must contain the following attributes: sampling rate, date, time,
-    nsamples, device, device name, duration, and raw. The raw attribute must contain the ecg data in a 2D numpy array.
-    The DataFrame will contain the following columns: timestamp_idx, ecg, record_date, configs.frequency,
-    configs.device_name.
-    h5 formats supported are of the company Bioplux (https://www.pluxbiosignals.com/) with its Recording Software
-    OpenSignals Revolution (https://support.pluxbiosignals.com/knowledge-base/introducing-opensignals-revolution/).
-    :param path_to_h5_file: Path to the h5 file.
-    :type path_to_h5_file: str
-    :param ecg_channel_name: The name of the ecg channel in the h5 file.
-    :type ecg_channel_name: str
-    :return: The pandas DataFrame.
-    :rtype: pd.DataFrame
-    """
-    with h5py.File(h5_file, 'r') as file:
-        # Get the first key as the group key
-        group_key = next(iter(file.keys()))
-        h5_group = file[group_key]
-        # Convert ECG data to a flattened numpy array
-        ecg = h5_group['raw'][ecg_channel_name][:].astype(float).flatten()
-        # Extract metadata
-        attrs = h5_group.attrs
-        sampling_rate = attrs['sampling rate']
-        date = attrs['date']
-        time = attrs['time']
-        num_samples = attrs['nsamples']
-        device = attrs['device']
-        device_name = attrs['device name']
-        duration = attrs['duration']
-        # Create the timestamp column
-        start = pd.to_datetime(date + ' ' + time)
-        freq = f'{1 / sampling_rate}S'
-        timestamps = pd.date_range(start=start, periods=num_samples, freq=freq)
-        # Check if the overall time range of the calculated timestamps fit the given duration attribute of the h5 file
-        end = start + pd.Timedelta(duration)
-        assert abs((end - timestamps[-1]).total_seconds()) < 1
-        # Create the DataFrame
-        df = pd.DataFrame({
-            'record_date': date,
-            'frequency': sampling_rate,
-            'device_name': f'{device}_{device_name}',
-            'timestamp_idx': timestamps,
-            'ecg': ecg,
-        })
-    return df
-def csv_to_pandas(path: str) -> pd.DataFrame:
-    """ Converts a CSV file in a pandas dataframe fitted to the ECG-HRV pipeline pydantic models.
-    :param path: Path to the csv file.
-    :type path: str
-    :return: The pandas DataFrame.
-    :rtype: pd.DataFrame
-    """
-    # Get metadata of csv file
-    with open(path, 'r') as file:
-        metadata = file.readline()
-        metadata = metadata.replace('# ', '')
-        metadata = eval(metadata)
-    configs = {key: value for key, value in metadata.items() if key.startswith('config')}
-    batch = {key: value for key, value in metadata.items() if key.startswith('batch')}
-    # Get samples from csv file
-    df = pd.read_csv(path, comment='#')
-    # Add metadata to samples
-    df = df.assign(**batch)
-    df = df.assign(**configs)
-    return df

src/deprecated/ecg_feature_extraction.py DELETED Viewed

@@ -1,37 +0,0 @@
-"""This file contains the ecg feature extraction pipelines and functions used for calculating the features."""
-import neurokit2 as nk
-from .configs import selected_features
-def get_hrv_features(ecg_signal, fs):
-    # Find peaks
-    peaks, info = nk.ecg_peaks(ecg_signal, sampling_rate=fs, method="pantompkins1985")
-    # Compute time domain features
-    hrv_time_features = nk.hrv_time(peaks, sampling_rate=fs)
-    # Compute frequency domain features
-    #hrv_frequency_features = nk.hrv_frequency(peaks, sampling_rate=fs, method="welch", show=False)
-    # Concat features
-    #hrv_features = pd.concat([hrv_time_features, hrv_frequency_features], axis=1)
-    hrv_features = hrv_time_features
-    # to dict
-    hrv_features = hrv_features[selected_features].to_dict(orient="records")
-    return hrv_features
-def normalize_features(features_df, normalization_method):
-    if normalization_method == "difference":
-        baseline_features = features_df[features_df['baseline'] == True].iloc[0]
-        features_df.loc[features_df['baseline'] == False, features_df.columns.isin(selected_features)] -= baseline_features
-    elif normalization_method == "relative":
-        baseline_features = features_df[features_df['baseline'] == True].iloc[0]
-        features_df.loc[features_df['baseline'] == False, features_df.columns.isin(selected_features)] /= baseline_features
-    elif (normalization_method == "separate") or (normalization_method is None):
-        pass
-    return features_df

src/deprecated/ecg_preprocessing.py DELETED Viewed

@@ -1,93 +0,0 @@
-from scipy.signal import filtfilt, butter, resample
-from sklearn.preprocessing import StandardScaler
-def remove_basline_wander(data, sampling_rate=360, cutoff_freq=0.05):
-    """
-    Remove baseline wander from ECG data using a high-pass filter. The high-pass filter will remove all frequencies
-    below the cutoff frequency. The cutoff frequency should be set to the lowest frequency that is still considered
-    baseline wander and not part of the ECG signal. For example, baseline wander is typically between 0.05 Hz and
-    0.5 Hz. Therefore, a cutoff frequency of 0.05 Hz is a good starting point. However, if the ECG signal contains
-    low-frequency components of interest, such as the T wave or P wave, then a higher cutoff frequency may be necessary
-    to avoid over-filtering and loss of important ECG signal components.
-    See https://en.wikipedia.org/wiki/High-pass_filter for more information on high-pass filters.
-    :param data: ECG data as a 1-dimensional numpy array.
-    :type data: numpy array
-    :param sampling_rate: Sampling rate of ECG data (Hz), defaults to 360.
-    :type sampling_rate: int, optional
-    :param cutoff_freq: cutoff frequency of high-pass filter (Hz), defaults to 0.05.
-    :type cutoff_freq: float, optional
-    :return: ECG data with baseline wander removed.
-    :rtype: numpy array
-    """
-    # Define filter parameters Nyquist frequency - The highest frequency that can be represented given the sampling
-    # frequency. Nyquist Frequency is half the sampling rate (in Hz).
-    nyquist_freq = 0.5 * sampling_rate
-    # Filter order - The higher the order, the steeper the filter roll-off (i.e. the more aggressive the filter is at
-    # removing frequencies outside the passband).
-    filter_order = 3
-    # Apply high-pass filter
-    b, a = butter(filter_order, cutoff_freq / nyquist_freq, 'highpass')
-    filtered_data = filtfilt(b, a, data)
-    return filtered_data
-def remove_noise(data, sampling_rate=360, lowcut=0.5, highcut=45):
-    """
-    Remove noise from ECG data using a band-pass filter. The band-pass filter will remove all frequencies below the
-    lowcut frequency and above the highcut frequency. The lowcut frequency should be set to the lowest frequency that
-    is still considered noise and not part of the ECG signal. For example, noise is typically between 0.5 Hz and 45
-    Hz. Therefore, a lowcut frequency of 0.5 Hz is a good starting point. However, if the ECG signal contains
-    low-frequency components of interest, such as the T wave or P wave, then a higher lowcut frequency may be
-    necessary to avoid over-filtering and loss of important ECG signal components. For this reason,
-    a lowcut frequency of 5 Hz is also a good starting point. The lowcut frequency can be adjusted as needed. The
-    highcut frequency should be set to the highest frequency that is still considered noise and not part of the ECG
-    signal. For example, noise is typically between 0.5 Hz and 45 Hz. Therefore, a highcut frequency of 45 Hz is a
-    good starting point. However, if the ECG signal contains high-frequency components of interest, such as the QRS
-    complex, then a lower highcut frequency may be necessary to avoid over-filtering and loss of important ECG signal
-    components. For this reason, a highcut frequency of 15 Hz is also a good starting point. The highcut frequency
-    can be adjusted as needed. See https://en.wikipedia.org/wiki/Band-pass_filter for more information on band-pass
-    filters.
-    :param data: ECG data as a 1-dimensional numpy array.
-    :type data: numpy array
-    :param sampling_rate: The sampling rate of ECG data (Hz), defaults to 360.
-    :type sampling_rate: int, optional
-    :param lowcut: The lowcut frequency of band-pass filter (Hz), defaults to 0.5.
-    :type lowcut: float, optional
-    :param highcut: The highcut frequency of band-pass filter (Hz), defaults to 45.
-    :type highcut: float, optional
-    :return: ECG data with noise removed
-    :rtype: numpy array
-    """
-    # Define filter parameters
-    nyquist_freq = 0.5 * sampling_rate
-    # Define cutoff frequencies (remove all frequencies below lowcut and above highcut)
-    low = lowcut / nyquist_freq
-    high = highcut / nyquist_freq
-    # Initialize filter
-    b, a = butter(4, [low, high], btype='band')
-    # Apply filter twice (combined filter) to remove forward and reverse phase shift. See
-    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.filtfilt.html for more information on filtfilt.
-    filtered_data = filtfilt(b, a, data)
-    return filtered_data
-def preprocess_ecg(data, sampling_rate=1000, new_sampling_rate=360):
-    # Remove basline wander using highpass filter
-    filtered_data = remove_basline_wander(data=data, sampling_rate=sampling_rate)
-    # Remove noise from ECG data using bandpass filter
-    filtered_data = remove_noise(data=filtered_data, sampling_rate=sampling_rate)
-    # Resample ECG data to a new sampling rate
-    if new_sampling_rate is not None and new_sampling_rate != sampling_rate:
-        filtered_data = resample(filtered_data, int(len(filtered_data) * new_sampling_rate / sampling_rate))
-    # Normalize ECG data to have zero mean and unit variance
-    scaler = StandardScaler()
-    normalized_data = scaler.fit_transform(filtered_data.reshape(-1, 1)).reshape(-1)
-    return normalized_data

src/deprecated/ecg_processing.py DELETED Viewed

@@ -1,54 +0,0 @@
-"""This file contains the ecg processing pipelines."""
-import pandas as pd
-import neurokit2 as nk
-from src.deprecated.ecg_feature_extraction import get_hrv_features, normalize_features
-from src.deprecated.utils import cut_out_window, create_windows
-pd.set_option('display.float_format', '{:.6f}'.format)
-from src.logger import setup_logger
-logger = setup_logger(__name__)
-def process_window(window, window_id, frequency):
-    features = get_hrv_features(window['ecg'].values, frequency)
-    tmp = pd.DataFrame(features, index=[0])
-    tmp['subject_id'] = window['subject_id'].unique()[0]
-    tmp['sample_id'] = str(window['sample_id'].unique()[0])
-    tmp['window_id'] = window_id
-    tmp['w_start_time'] = window['timestamp_idx'].min().strftime('%Y-%m-%d %H:%M:%S')
-    tmp['w_end_time'] = window['timestamp_idx'].max().strftime('%Y-%m-%d %H:%M:%S')
-    tmp['baseline'] = window_id == 0
-    tmp['frequency'] = frequency
-    return tmp
-def process_batch(samples, configs):
-    features_list = []
-    for i, sample in enumerate(samples):
-        logger.info(f"Processing sample ({i + 1}/{len(samples)})...")
-        sample_df = pd.DataFrame.from_dict(sample.dict())
-        # Preprocess the ecg signal
-        logger.info("Preprocess ECG signals...")
-        sample_df['ecg'] = nk.ecg_clean(sample_df['ecg'], sampling_rate=sample.frequency, method="pantompkins1985")
-        # Cut out the windows and process them
-        if configs.baseline_start:
-            logger.info("Cut out baseline window...")
-            baseline_window = cut_out_window(sample_df, 'timestamp_idx', start=configs.baseline_start,
-                                             end=configs.baseline_end)
-            sample_df = sample_df[sample_df['timestamp_idx'] > baseline_window['timestamp_idx'].max()]
-            logger.info("Processing baseline window...")
-            features_list.append(process_window(baseline_window, 0, sample.frequency))
-        logger.info("Cut out windows...")
-        windows = create_windows(df=sample_df, time_column='timestamp_idx', window_size=configs.window_size,
-                                 window_slicing_method=configs.window_slicing_method)
-        logger.info(f"Processing windows (Total: {len(windows)})...")
-        features_list.extend(process_window(window, i, sample.frequency) for i, window in enumerate(windows, start=1))
-        features_df = pd.concat(features_list, ignore_index=True)
-        # Normalize the features via baseline subtraction
-        if configs.baseline_start:
-            features_df = normalize_features(features_df, configs.normalization_method)
-    return features_df

src/deprecated/pipeline.py DELETED Viewed

@@ -1,63 +0,0 @@
-from src.deprecated.conversion import csv_to_pandas
-from src.deprecated.ecg_processing import process_batch
-from src.deprecated.pydantic_models import ECGConfig, ECGSample
-class PreTrainedPipeline():
-    def __init__(self):
-        # Preload all the elements you are going to need at inference.
-        # For instance your model, processors, tokenizer that might be needed.
-        # This function is only called once, so do all the heavy processing I/O here
-        self.path = None  # Placeholder for the path to the CSV file
-        self.df = None  # Placeholder for the DataFrame
-    def load_data(self):
-        # Load CSV file into DataFrame
-        self.df = csv_to_pandas(self.path)
-    def process_data(self):
-        # Read csv file
-        df = self.df
-        # Implode
-        cols_to_implode = ['timestamp_idx', 'ecg', 'label']
-        df_imploded = df.groupby(list(set(df.columns) - set(cols_to_implode))) \
-            .agg({'timestamp_idx': list,
-                  'ecg': list,
-                  'label': list}) \
-            .reset_index()
-        # Get metadata
-        config_cols = [col for col in df.columns if col.startswith('configs.')]
-        configs = df_imploded[config_cols].iloc[0].to_dict()
-        configs = {key.removeprefix('configs.'): value for key, value in configs.items()}
-        configs = ECGConfig(**configs)
-        batch_cols = [col for col in df.columns if col.startswith('batch.')]
-        batch = df_imploded[batch_cols].iloc[0].to_dict()
-        batch = {key.removeprefix('batch.'): value for key, value in batch.items()}
-        # Get samples
-        samples = df_imploded.to_dict(orient='records')
-        samples = [ECGSample(**sample) for sample in samples]
-        features_df = process_batch(samples, configs)
-    def __call__(
-            self, inputs):
-        """
-        Args:
-            inputs (:obj:`dict`):
-                a dictionary containing a key 'data' mapping to a dict in which
-                the values represent each column.
-        Return:
-            A :obj:`list` of floats or strings: The classification output for each row.
-        """
-        if inputs:
-            self.path = inputs
-        else:
-            raise ValueError("No input provided")
-        # Load data
-        self.load_data()
-        # Implement your processing logic here, if needed
-        result = self.process_data()
-        return result

src/deprecated/pipeline_wrapper.py DELETED Viewed

@@ -1,50 +0,0 @@
-from transformers import Pipeline
-from src.deprecated.conversion import csv_to_pandas
-from src.deprecated.pydantic_models import ECGConfig, ECGSample
-from src.deprecated.ecg_processing import process_batch
-class MyPipeline(Pipeline):
-    def _sanitize_parameters(self, **kwargs):
-        preprocess_kwargs = {}
-        if "maybe_arg" in kwargs:
-            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
-        return preprocess_kwargs, {}, {}
-    def preprocess(self, inputs: str) -> dict:
-        # inputs are csv files
-        df = csv_to_pandas(inputs)
-        # Implode
-        cols_to_implode = ['timestamp_idx', 'ecg', 'label']
-        df_imploded = df.groupby(list(set(df.columns) - set(cols_to_implode))) \
-            .agg({'timestamp_idx': list,
-                  'ecg': list,
-                  'label': list}) \
-            .reset_index()
-        # Get metadata
-        config_cols = [col for col in df.columns if col.startswith('configs.')]
-        configs = df_imploded[config_cols].iloc[0].to_dict()
-        configs = {key.removeprefix('configs.'): value for key, value in configs.items()}
-        configs = ECGConfig(**configs)
-        batch_cols = [col for col in df.columns if col.startswith('batch.')]
-        batch = df_imploded[batch_cols].iloc[0].to_dict()
-        batch = {key.removeprefix('batch.'): value for key, value in batch.items()}
-        # Get samples
-        samples = df_imploded.to_dict(orient='records')
-        samples = [ECGSample(**sample) for sample in samples]
-        model_input = {"samples": samples, "configs": configs, "batch": batch}
-        return {"model_input": model_input}
-    def _forward(self, model_inputs):
-        # model_inputs == {"model_input": model_input}
-        samples = model_inputs["model_input"]["samples"]
-        configs = model_inputs["model_input"]["configs"]
-        batch = model_inputs["model_input"]["batch"]
-        features_df = process_batch(samples, configs)
-        return features_df
-    def postprocess(self, model_outputs):
-        return model_outputs

src/deprecated/pydantic_models.py DELETED Viewed

@@ -1,146 +0,0 @@
-""" Pydantic models for use in the API. """
-import json
-from datetime import datetime, timedelta, date
-from typing import Union, Dict, Any
-from uuid import UUID, uuid4
-from pydantic import BaseModel, Field, model_validator
-from src.deprecated.configs import SignalEnum, WindowSlicingMethodEnum, NormalizationMethodEnum
-class ECGSample(BaseModel):
-    """ Model of the results of a single subject of an experiment with ECG biosignals. """
-    sample_id: UUID = Field(example="f70c1033-36ae-4b8b-8b89-099a96dccca5", default_factory=uuid4)
-    subject_id: str = Field(..., example="participant_1")
-    frequency: int = Field(..., example=1000)
-    device_name: str = Field(example="bioplux", default=None)
-    # pydantic will process either an int or float (unix timestamp) (e.g. 1496498400),
-    # an int or float as a string (assumed as Unix timestamp), or
-    # o string representing the date (e.g. "YYYY - MM - DD[T]HH: MM[:SS[.ffffff]][Z or [±]HH[:]MM]")
-    timestamp_idx: list[datetime] = Field(..., min_items=2, example=[1679709871, 1679713471, 1679720671])
-    ecg: list[float] = Field(..., min_items=2, example=[1.0, -1.100878, -3.996840])
-    label: list[str] = Field(min_items=2, example=["undefined", "stress", "undefined"], default=None)
-    class Config:
-        json_schema_extra = {
-            "example": {
-                "sample_id": "f70c1033-36ae-4b8b-8b89-099a96dccca5",
-                "subject_id": "participant_1",
-                "frequency": 1000,
-                "device_name": "bioplux",
-                "timestamp_idx": [1679709871, 1679713471, 1679720671],
-                "ecg": [1.0, -1.100878, -3.996840],
-                "label": ["undefined", "stress", "undefined"]
-            }
-        }
-    @model_validator(mode='before')
-    @classmethod
-    def set_label_default(cls, values: Any) -> Any:
-        """
-        Set default for list parameter "label" if list has empty values.
-        """
-        if isinstance(values, dict):
-            max_len = max(len(values['timestamp_idx']), len(values['ecg']))
-            if values['label'] is None:
-                values['label'] = ['undefined'] * max_len
-            elif len(values['label']) < max_len:
-                values['label'] += ['undefined'] * (max_len - len(values['label']))
-        return values
-    @model_validator(mode='after')
-    def check_length(self) -> 'ECGSample':
-        """
-        Validates that given lists have the same length.
-        """
-        lengths = [len(self.timestamp_idx), len(self.ecg)]
-        if len(set(lengths)) != 1:
-            raise ValueError('Given timestamp and ecg list must have the same length!')
-        return self
-class ECGConfig(BaseModel):
-    """ Model of the configuration of an experiment with ECG biosignals. """
-    signal: SignalEnum = Field(example=SignalEnum.chest, default=None)
-    window_slicing_method: WindowSlicingMethodEnum = Field(example=WindowSlicingMethodEnum.time_related,
-                                                           default=WindowSlicingMethodEnum.time_related)
-    window_size: float = Field(example=1.0, default=5.0)
-    # pydantic will process either an int or float (unix timestamp) (e.g. 1496498400),
-    # an int or float as a string (assumed as Unix timestamp), or
-    # o string representing the date (e.g. "YYYY - MM - DD[T]HH: MM[:SS[.ffffff]][Z or [±]HH[:]MM]")
-    baseline_start: datetime = Field(example="2034-01-16T00:00:00", default=None)
-    baseline_end: datetime = Field(example="2034-01-16T00:01:00", default=None)
-    baseline_duration: int = Field(example=60, default=None)  # in seconds
-    normalization_method: Union[NormalizationMethodEnum | None] = Field(
-        example=NormalizationMethodEnum.baseline_difference,
-        default=NormalizationMethodEnum.baseline_difference)
-    extra: Dict[str, Any] = Field(default=None)
-    class Config:
-        json_schema_extra = {
-            "example": {
-                "signal": "chest",
-                "window_slicing_method": "time_related",
-                "window_size": 60,
-                "baseline_start": "2023-05-23 22:58:01.335",
-                "baseline_duration": 60,
-                "test": "test"
-            }
-        }
-    @model_validator(mode='before')
-    @classmethod
-    def build_extra(cls, values: Any) -> Any:
-        required_fields = {field.alias for field in cls.model_fields.values() if field.alias != 'extra'}
-        extra: Dict[str, Any] = {}
-        for field_name in list(values):
-            if field_name not in required_fields:
-                extra[field_name] = values.pop(field_name)
-        values['extra'] = extra
-        return values
-    @model_validator(mode='after')
-    def check_baseline_start(self) -> 'ECGConfig':
-        """
-        Validates that baseline_start and either baseline_duration or baseline_end are given if baseline is True.
-        If baseline_end is not provided, it is calculated as baseline_start + baseline_duration.
-        """
-        if self.baseline_start:
-            if self.baseline_duration is None and self.baseline_end is None:
-                raise ValueError(
-                    'If baseline_start id given, either baseline_duration or baseline_end must be provided.')
-            if self.baseline_end is None:
-                if self.baseline_duration is None:
-                    raise ValueError(
-                        'If baseline is True, baseline_duration must be provided when baseline_end is not provided.')
-                self.baseline_end = self.baseline_start + timedelta(seconds=self.baseline_duration)
-        elif self.baseline_start is None and (self.baseline_duration or self.baseline_end) is not None:
-            raise ValueError(
-                'If basleine_duration or baseline_end is given, baseline_start must be provided in order. Delete the '
-                'baseline Parameters if the baseline is not needed.')
-        return self
-    @classmethod
-    def __get_validators__(cls):
-        yield cls.validate_to_json
-    @classmethod
-    def validate_to_json(cls, value):
-        if isinstance(value, str):
-            return cls.model_validate(json.loads(value.encode()))
-        return cls.model_validate(value)
-class ECGBatch(BaseModel):
-    """ Input Modle for Data Validation. The Input being the results of an experiment with ECG biosignals,
-    including a batch of ecg data of different subjects. """
-    supervisor: str = Field(..., example="Lieschen Mueller")
-    # pydantic will process either an int or float (unix timestamp) (e.g. 1496498400),
-    # an int or float as a string (assumed as Unix timestamp), or
-    # o string representing the date (e.g. "YYYY-MM-DD")
-    record_date: date = Field(example="2034-01-16", default_factory=datetime.utcnow)
-    configs: ECGConfig = Field(..., example=ECGConfig.Config.json_schema_extra)
-    samples: list[ECGSample] = Field(..., min_items=1,
-                                     example=[ECGSample.Config.json_schema_extra, ECGSample.Config.json_schema_extra])

src/deprecated/utils.py DELETED Viewed

@@ -1,80 +0,0 @@
-"""This file contains a collection of utility functions that can be used for common tasks in the ecg processing."""
-from datetime import datetime, timedelta
-from typing import Union
-import pandas as pd
-def cut_out_window(df: pd.DataFrame,
-                   time_column: str,
-                   start: Union[datetime, pd.Timestamp],
-                   end: Union[datetime, pd.Timestamp, None] = None,
-                   duration: Union[timedelta, int, None] = None) -> pd.DataFrame:
-    """
-    Cuts out a window from a DataFrame based on the given start and end timestamps or duration. The dataframe must have
-    a time column containing timestamps.
-    :param df: The dataframe to cut out the window from.
-    :type df: pandas.DataFrame
-    :param time_column: The name of the column containing the timestamps.
-    :type time_column: str
-    :param start: The start timestamp of the window.
-    :type start: datetime.datetime or pandas.Timestamp
-    :param end: The end timestamp of the window.
-    :type end: datetime.datetime or pandas.Timestamp or None
-    :param duration: The duration of the window in seconds.
-    :type duration: datetime.timedelta or int or None
-    :return: The window as a dataframe.
-    :rtype: pandas.DataFrame
-    """
-    # Convert the timestamp column to datetime if it's not already
-    if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
-        df[time_column] = pd.to_datetime(df[time_column])
-    # Cut out the window
-    if end is None and duration is None:
-        raise ValueError('Either end or duration must be given!')
-    if end is None and duration is not None:
-        end = start + pd.Timedelta(seconds=duration)
-    window = df[(df[time_column] >= start) & (df[time_column] <= end)]
-    return window
-def create_windows(df, time_column, label_column=None, window_size=5.0, window_slicing_method='time_related'):
-    """
-    Slices a dataframe into windows of a given size. The windows can be sliced in different ways. The windows are
-    returned as a generator of dataframes. The dataframe must have a column containing timestamps and be indexed by it.
-    :param df: The dataframe to slice.
-    :type df: pandas.DataFrame
-    :param time_column: The name of the column containing the timestamps.
-    :type time_column: str
-    :param label_column: The name of the column containing the labels.
-    :type label_column: str
-    :param window_size: The size of the windows in seconds.
-    :type window_size: int
-    :param window_slicing_method: The method used to slice the windows.
-    :type window_slicing_method: str
-    :return: A generator of dataframes containing the windows.
-    :rtype: generator
-    """
-    # Convert the timestamp column to datetime if it's not already
-    if not pd.api.types.is_datetime64_ns_dtype(df[time_column]):
-        df[time_column] = pd.to_datetime(df[time_column])
-    # Slice the dataframe into windows
-    if window_slicing_method == 'time_related':
-        # Resample the dataframe every x seconds
-        result_dfs = [group for _, group in df.groupby(pd.Grouper(key=time_column, freq=f'{window_size}S'))]
-        return result_dfs
-    elif window_slicing_method == 'label_related_before':
-        pass
-    elif window_slicing_method == 'label_related_after':
-        pass
-    elif window_slicing_method == 'label_related_centered':
-        pass
-    else:
-        raise ValueError(f'window_slicing_method {window_slicing_method} not supported')