Source code for medusa.classification_utils

from sklearn.preprocessing import OneHotEncoder
import numpy as np


[docs]def categorical_labels(one_hot_labels): cat_labels = np.argmax(one_hot_labels, axis=1) return cat_labels
[docs]def one_hot_labels(categorical_labels): enc = OneHotEncoder(handle_unknown='ignore') one_hot_labels = \ enc.fit_transform(categorical_labels.reshape(-1, 1)).toarray() return one_hot_labels
[docs]def k_fold_split(x, y, k, keys=None, shuffle=False): """ Special implementation of k fold splitting that allows to split the dataset into k folds for cross validation in function of keys array. It returns a list with the dataset for each iteration (k iterations). Parameters ---------- x: numpy array or list Training set data. Axis 0 represents each observation. Features could have one or more dimensions. For instance, [observations x eeg samples], [observations x eeg samples x channels] y: numpy array or list Training set labels. k: int Number of folds to split the dataset keys: numpy array or list Keys to split the dataset. If None, the dataset is splitted considering each observation independently. If not None, each position of keys array identifies the set that owns the observation. For instance, This is useful to split the dataset by subjects or trials. shuffle: boolean True if you want to shuffle the dataset randomly. Returns ------- sets: list List that contains a dict with the train and test set for each iteration of the k-fold algorithm. Examples -------- >>> k_fold_iter = k_fold_split(x, y, k) >>> k_fold_acc = 0 >>> for iter in k_fold_iter: >>> model.fit(iter["x_train"], iter["y_train"]) >>> y_test_pred = model.predict(iter["x_test"], iter["y_test"]) >>> k_fold_acc += np.sum(y_test_pred == iter["y_test"])/len(iter["y_test"]) >>> k_fold_acc = k_fold_acc/len(k_fold_iter) """ # Convert to numpy arrays x = np.array(x) y = np.array(y) # If keys is None, each observation is treated independently if keys is None: keys = np.arange(len(x)) else: keys = np.array(keys) if keys.shape[0] != x.shape[0] or keys.shape[0] != y.shape[0]: raise ValueError("Dimensions of x, y and keys arrays must match along" " axis 0.") # Divide keys array in k folds keys_values = np.unique(keys) if shuffle: np.random.shuffle(keys_values) keys_folds = np.array_split(keys_values, k) # Divide the dataset k_fold_iter = list() for i in range(k): idx = np.isin(keys, keys_folds[i]).nonzero() # Get train set x_train = np.delete(x, idx, axis=0) y_train = np.delete(y, idx, axis=0) # Get test set x_test = x[idx] y_test = y[idx] # Save train and test sets of iteration i split = dict() split["x_train"] = x_train split["y_train"] = y_train split["x_test"] = x_test split["y_test"] = y_test k_fold_iter.append(split) return k_fold_iter