Source code for medusa.classification_utils

from sklearn.preprocessing import OneHotEncoder
import numpy as np


[docs]def categorical_labels(one_hot_labels): cat_labels = np.argmax(one_hot_labels, axis=1) return cat_labels
[docs]def one_hot_labels(categorical_labels): enc = OneHotEncoder(handle_unknown='ignore') one_hot_labels = \ enc.fit_transform(categorical_labels.reshape(-1, 1)).toarray() return one_hot_labels
[docs]def k_fold_split(x, y, k, keys=None, shuffle=False): """ Special implementation of k fold splitting that allows to split the dataset into k folds for cross validation in function of keys array. It returns a list with the dataset for each iteration (k iterations). Parameters ---------- x: numpy array or list Training set data. Axis 0 represents each observation. Features could have one or more dimensions. For instance, [observations x eeg samples], [observations x eeg samples x channels] y: numpy array or list Training set labels. k: int Number of folds to split the dataset keys: numpy array or list Keys to split the dataset. If None, the dataset is splitted considering each observation independently. If not None, each position of keys array identifies the set that owns the observation. For instance, This is useful to split the dataset by subjects or trials. shuffle: boolean True if you want to shuffle the dataset randomly. Returns ------- sets: list List that contains a dict with the train and test set for each iteration of the k-fold algorithm. Examples -------- >>> k_fold_iter = k_fold_split(x, y, k) >>> k_fold_acc = 0 >>> for iter in k_fold_iter: >>> model.fit(iter["x_train"], iter["y_train"]) >>> y_test_pred = model.predict(iter["x_test"], iter["y_test"]) >>> k_fold_acc += np.sum(y_test_pred == iter["y_test"])/len(iter["y_test"]) >>> k_fold_acc = k_fold_acc/len(k_fold_iter) """ # Convert to numpy arrays x = np.array(x) y = np.array(y) # If keys is None, each observation is treated independently if keys is None: keys = np.arange(len(x)) else: keys = np.array(keys) if keys.shape[0] != x.shape[0] or keys.shape[0] != y.shape[0]: raise ValueError("Dimensions of x, y and keys arrays must match along" " axis 0.") # Divide keys array in k folds keys_values = np.unique(keys) if shuffle: np.random.shuffle(keys_values) keys_folds = np.array_split(keys_values, k) # Divide the dataset k_fold_iter = list() for i in range(k): idx = np.isin(keys, keys_folds[i]).nonzero() # Get train set x_train = np.delete(x, idx, axis=0) y_train = np.delete(y, idx, axis=0) # Get test set x_test = x[idx] y_test = y[idx] # Save train and test sets of iteration i split = dict() split["x_train"] = x_train split["y_train"] = y_train split["x_test"] = x_test split["y_test"] = y_test k_fold_iter.append(split) return k_fold_iter
[docs]class EarlyStopping: """ Implements early stopping to terminate training when a monitored metric stops improving. Parameters ---------- mode : {'min', 'max'}, optional Determines whether the monitored metric should be minimized or maximized. - 'min' (default): Training stops when the metric does not decrease. - 'max': Training stops when the metric does not increase. min_delta : float, optional The minimum change in the monitored metric to qualify as an improvement. Defaults to 0.001. patience : int, optional Number of epochs to wait after the last improvement before stopping training. Defaults to 20. verbose : bool, optional If True, prints messages when the best metric is updated or when patience runs out. Defaults to True. """
[docs] def __init__(self, mode='min', min_delta=0.001, patience=20, verbose=True): # Init attributes self.mode = mode self.min_delta = min_delta self.patience=patience self.verbose = verbose # Init states self.best_loss = float('inf') self.best_epoch = 0 self.best_params = None self.patience_counter = 0
[docs] def check_epoch(self, n_epoch, epoch_loss, epoch_params=None): """ Checks whether training should stop based on the given epoch's loss. Parameters ---------- n_epoch : int The current epoch number. epoch_loss : float The loss value for the current epoch. epoch_params : dict, optional The parameters at the current epoch (e.g., model state dictionary). Returns ------- bool True if training should stop, False otherwise. dict or None The best parameters recorded during training, or None if no improvement was found. """ # Check if updates are needed if self.mode == 'min': update_params = epoch_loss < self.best_loss update_state = epoch_loss < self.best_loss - self.min_delta elif self.mode == 'max': update_params = epoch_loss > self.best_loss update_state = epoch_loss > self.best_loss + self.min_delta else: raise ValueError('Mode must be min or max') # Update state if update_state: self.best_loss = epoch_loss self.best_epoch = n_epoch self.patience_counter = 0 if self.verbose: print(f"\nEarly stopping: New best loss {self.best_loss:.4f} " f"at epoch {n_epoch+1}. Resetting patience.") else: self.patience_counter += 1 # Update params if update_params: self.best_params = epoch_params # Check patience if self.patience_counter >= self.patience: return True, self.best_params else: return False, self.best_params