Source code for medusa.bci.erp_spellers

"""Created on Monday March 15 19:27:14 2021

In this module you will find useful functions and classes to operate with data
recorded using spellers based on event-related pontentials (ERP), which are
widely used by the BCI community. Enjoy!

@author: Eduardo Santamaría-Vázquez
"""

# Built-in imports
import copy, warnings
from abc import ABC, abstractmethod

# External imports
import numpy as np
from scipy import signal
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from tqdm import tqdm

# Medusa imports
import medusa as mds
from medusa import components
from medusa import meeg


[docs]class ERPSpellerData(components.ExperimentData):
    """Experiment info class for ERP-based spellers. It supports nested
    multi-level paradigms. This unified class can be used to represent a run
    of every ERP stimulation paradigm designed to date, and is the expected
    class for feature extraction and command decoding functions of the module
    medusa.bci.erp_paradigms. It is complicated, but powerful so.. use it well!
    """

[docs]    def __init__(self, mode, paradigm_conf, commands_info, onsets, batch_idx,
                 group_idx, unit_idx, level_idx, matrix_idx, sequence_idx,
                 trial_idx, spell_result, control_state_result,
                 spell_target=None, control_state_target=None, **kwargs):

        """ERPSpellerData constructor

        Parameters
        ----------
        mode : str
            Mode of this run. Values: {"train"|"test"}
        paradigm_conf :  list
            This parameter describes the paradigm configuration for the
            experiment. The array must have shape [n_matrices x n_units x
            n_groups x n_batches x n_commands/batch]. The matrix is the maximum
            entity of the paradigm and only one can be used in each trial.
            The units are smaller entities that are used in multi-level
            paradigms, such as the Hex-O-spell (HOS) paradigm [1]. In this
            case, each level can use a different unit, affecting the selected
            command for the trial. For instance, in the HOS paradigm,
            you should define 1 matrix with 7 units, one for the initial menu
            and 6 for the second level of each command (letters).
            Importantly, commands must be unequivocally defined in each matrix.
            Therefore, units cannot share command identifiers. Then, the groups
            describe aggregations of commands that are highlighted at the
            same time. For instance, the row-column paradigm (RCP) [2]
            has 2 groups of commands (i.e., rows and columns), whereas the
            HOS has only 1 (i.e., each command is highlighted individually).
            Finally, batches contain the commands IDs defined in each group.
            In an RCP matrix of 6x6, each of the 2 groups has 6 batches,
            corresponding to the rows and columns. This structure supports
            nested multi-level matrices, providing compatibility with all
            paradigms to date and setting a general framework for feature
            extraction and command decoding functions. The relationship between
            the command IDs and the letters or actions should be defined in
            other variable, but it is not necessary for signal processing.

            Example of 2x2 RCP paradigm:

                rcp_conf = [
                    # Matrices
                    [
                        # Units
                        [
                            # Groups
                            [
                                # Batches
                                [0, 1],
                                [2, 3]
                            ],
                            [
                                [0, 2],
                                [1, 3]
                            ]
                        ]
                    ]
                ]

            Example of HOS paradigm:

                hos_conf = [
                    # Matrices
                    [
                        # Units
                        [
                            # Groups
                            [
                                # Batches
                                [0], [1], [2], [3], [4], [5]
                            ],
                        ],
                        [
                            [
                                [6], [7], [8], [9], [10], [11]
                            ],
                        ],
                        [
                            [
                                [12], [13], [14], [15], [16], [17]
                            ],
                        ],
                        [
                            [
                                [18], [19], [20], [21], [22], [23]
                            ],
                        ],
                        [
                            [
                                [24], [25], [26], [27], [28], [29]
                            ],
                        ]
                    ]
                ]
        commands_info : list
            List containing the command information per matrix. Each
            position must be a dict, whose keys are the command ids used in
            paradigm_conf. The value must be another dict containing important
            information about each command (e.g., label, text, action, icon
            path, etc). This information may be different for different use
            cases, but must be serializable (i.e., contain primitive types).
            Shape [n_matrices x n_commands].
        onsets : list or numpy.ndarray
            Timestamp of each stimulation. This timestamps have to be
            synchronized with the EEG (or other biosignal) timestamps in
            order to assure a correct functioning of all medusa functions.
            Shape: [n_stim x 1]
        batch_idx : list or numpy.ndarray
            Index of the highlighted batch for each stimulation. A batch
            represents the highlighted commands in each  stimulation. For
            example in the row-col paradigm (RCP) represents each row and
            column. Shape: [n_stim x 1]
        group_idx : list or numpy.ndarray
            Index of the group that has been highlighted. Groups represent the
            different aggregations of batches. Between batches of different
            groups, 1 command must be common. For example in the RCP there
            are 2 groups: rows and columns. In this paradigm, between each
            pair of batches (e.g., row=2, col=4), there is only one command
            in common. Shape: [n_stim x 1]
        unit_idx: list or numpy.ndarray
            Index of the unit used in each stimulation. Units are low level
            entities used in multi-level paradigms, such as HOS paradigm [1].
            For each level, only 1 unit can be used. As the trial may have
            several layers, several units can be used in 1 trial. For
            instance, in the HOS, the first unit is the main menu. The other
            6 units are each of the lower level entities that are displayed
            in the second level of stimulation. Shape: [n_stim x 1]
        level_idx : list or numpy.ndarray
            Index of the level of each stimulation. Levels represent each
            one of the selections that must be made before a trial is
            finished. For example, in the Hex-O-spell paradigm there are 2
            levels (see [1]). Shape: [n_stim x 1]
        matrix_idx : list or numpy.ndarray
            Index of the matrix used in each stimulation. Each matrix can
            contain several levels. The matrix has to be the same accross the
            entire trial. Shape: [n_stim x 1]
        sequence_idx : list or numpy.ndarray
            Index of the sequence for each stimulation. A sequence
            represents a round of stimulation: all commands have been
            highlighted 1 time. This class support dynamic stopping in
            different levels. Shape: [n_stim x 1]
        trial_idx : list or numpy.ndarray
            Index of the trial for each stimulation. A trial represents
            the selection of a final command. Depending on the number of levels,
            the final selection takes N intermediate selections.
        spell_result : list or numpy.ndarray
            Spell result of the run. Each position contains the matrix and
            command id that identifies the selected command per trial and
            level. Shape [n_trials x n_levels x 2]. Eg., in the RCP:
            [[[matrix_idx, cmd_id], [matrix_idx, cmd_id]]].
        control_state_result : list or numpy.ndarray
            Control state result of the run. Each position contains the
            detected control state of the user per trial (0 -> non-control,
            1-> control). Shape: [n_trials x 1]. Values {0|1}
        spell_target : list or numpy.ndarray or None
            Spell target of the run. Each position contains the matrix and
            command id per level that identifies the target command of the
            trial. Shape [n_trials x n_levels x 2]. Eg., in the RCP:
            [[[matrix_idx, cmd_id], [matrix_idx, cmd_id], etc]].
        control_state_target : list or numpy.ndarray or None
            Control state target of the run. Each position contains the
            target control state of the user per trial (0 -> non-control,
            1-> control). Shape: [n_trials x 1]. Values {0|1}
        kwargs : kwargs
            Custom arguments that will also be saved in the class


        References
        ----------
        [1] Blankertz, B., Dornhege, G., Krauledat, M., Schröder,
        M., Williamson, J., Murray-Smith, R., & Müller, K. R. (2006). The
        Berlin Brain-Computer Interface presents the novel mental typewriter
        Hex-o-Spell.

        [2] Farwell, L. A., & Donchin, E. (1988). Talking off the top of your
        head: toward a mental prosthesis utilizing event-related brain
        potentials. Electroencephalography and clinical Neurophysiology,
        70(6), 510-523.
        """
        # Check errors
        mode = mode.lower()
        if mode not in ('train', 'test'):
            raise ValueError('Unknown mode. Possible values {train, test}')

        # Standard attributes
        self.mode = mode
        self.paradigm_conf = paradigm_conf
        self.commands_info = commands_info
        self.onsets = onsets
        self.batch_idx = batch_idx
        self.group_idx = group_idx
        self.unit_idx = unit_idx
        self.level_idx = level_idx
        self.matrix_idx = matrix_idx
        self.sequence_idx = sequence_idx
        self.trial_idx = trial_idx
        self.spell_result = spell_result
        self.control_state_result = control_state_result
        self.spell_target = spell_target
        self.control_state_target = control_state_target
        self.erp_labels = self.compute_erp_labels() \
            if mode == 'train' else None
        self.control_state_labels = self.compute_control_state_labels() \
            if mode == 'train' else None
        # Optional attributes
        for key, value in kwargs.items():
            setattr(self, key, value)

[docs]    def get_batches_associated_to_cmd(self, matrix_idx, command_idx):
        """This function returns the batches associated to the given command
        as defined in the paradigm configuration

        Parameters
        ----------
        matrix_idx: int
            Matrix of the command
        command_idx: int
            Index of the command as defined in attribute commands_info
        """
        tg_ids_batches_list = list()
        for u, unit in enumerate(self.paradigm_conf[matrix_idx]):
            for g, group in enumerate(unit):
                for b, batch in enumerate(group):
                    if command_idx in batch:
                        tg_ids_batches_list.append(
                            [matrix_idx, u, g, b])
        return tg_ids_batches_list

[docs]    def compute_erp_labels(self):
        """
        This function computes the erp label vector (0 if the epoch doesn't
        have ERP, 1 if the epoch have ERP).
        """
        # Convert to numpy array
        batch_idx = np.array(self.batch_idx)
        group_idx = np.array(self.group_idx)
        unit_idx = np.array(self.unit_idx)
        level_idx = np.array(self.level_idx)
        matrix_idx = np.array(self.matrix_idx)
        trial_idx = np.array(self.trial_idx)
        # Get batches associated to each target
        target_ids_batches = dict()
        for t, trial_target in enumerate(self.spell_target):
            for l, level_target in enumerate(trial_target):
                tg_matrix_idx = level_target[0]
                tg_id = level_target[1]
                target_ids_batches[(t, l)] = self.get_batches_associated_to_cmd(
                    tg_matrix_idx, tg_id)
        # Compute erp_labels
        erp_labels = np.zeros((len(batch_idx))).astype(int)
        for key, batches in target_ids_batches.items():
            for batch in batches:
                erp_labels_idx = np.ones((len(batch_idx))).astype(int)
                erp_labels_idx = np.logical_and(erp_labels_idx,
                                                trial_idx == key[0])
                erp_labels_idx = np.logical_and(erp_labels_idx,
                                                level_idx == key[1])
                erp_labels_idx = np.logical_and(erp_labels_idx,
                                                matrix_idx == batch[0])
                erp_labels_idx = np.logical_and(erp_labels_idx,
                                                unit_idx == batch[1])
                erp_labels_idx = np.logical_and(erp_labels_idx,
                                                group_idx == batch[2])
                erp_labels_idx = np.logical_and(erp_labels_idx,
                                                batch_idx == batch[3])
                erp_labels[erp_labels_idx] = 1
        return erp_labels

[docs]    def compute_control_state_labels(self):
        """
        This function computes the control state label vector (0 -> non-control
        state, 1 -> control state).
        """
        trial_idx = np.array(self.trial_idx)
        labels = np.zeros_like(trial_idx).astype(int)
        for t, trial in enumerate(np.unique(trial_idx)):
            labels[trial_idx == trial] = self.control_state_target[t]
        return labels

[docs]    @staticmethod
    def get_paradigm_conf_for_rcp(matrix_dims, commands_info_rcp=None):
        """Returns the paradigm configuration matrix for the row-column paradigm
        (RCP) experiment [1]

        Parameters
        ----------
        matrix_dims : list or np.array
            Array containing the dimensions of the matrices used in the
            experiment. For each matrix, the first position is the number of
            rows and the second the number of columns. Shape [n_matrices x 2]
        commands_info_rcp: list or None
            Array containing the dict info of each command, arranged in 2D
            matrices. Shape [n_matrices x n_rows x n_cols]

        Returns
        -------
        paradigm_conf : list
            Array with the paradigm configuration for an RCP paradigm
        commands_info : list
            Flattened version of commands_info input. It contains the command
            information corresponding to paradigm_conf. If input parameter
            commands_info is None, this output will be a skeleton with an empty
            dict for each command. If input commands_info is provided, it will
            be incorporated in the output

        References
        ----------
        [1] Farwell, L. A., & Donchin, E. (1988). Talking off the top of your
        head: toward a mental prosthesis utilizing event-related brain
        potentials. Electroencephalography and clinical Neurophysiology,
        70(6), 510-523.
        """
        # Paradigm conf
        matrix_dims = np.array(matrix_dims)
        paradigm_conf = list()
        commands_info = list()
        for m in range(matrix_dims.shape[0]):
            # Commands matrix
            n_rows = matrix_dims[m, 0]
            n_cols = matrix_dims[m, 1]
            commands_ids = np.arange(n_rows * n_cols)
            matrix = commands_ids.reshape((n_rows, n_cols))
            # Paradigm conf
            paradigm_conf.append(list())                    # Matrix
            paradigm_conf[m].append(list())                 # Unit
            paradigm_conf[m][0].append(matrix.tolist())     # Append group rows
            paradigm_conf[m][0].append(matrix.T.tolist())   # Append group cols
            # Commands info
            cmd_info_keys = commands_ids.tolist()
            if commands_info_rcp is None:
                cmd_info_values = [dict() for i in cmd_info_keys]
            else:
                cmd_info_values = np.array(commands_info_rcp[m]).flatten()
                cmd_info_values = cmd_info_values.tolist()
            commands_info.append(dict(zip(cmd_info_keys, cmd_info_values)))

        return paradigm_conf, commands_info

[docs]    @staticmethod
    def get_paradigm_conf_for_hox(matrix_dims, commands_info_hox=None):
        """Returns the paradigm configuration matrix for the Hex-O-Speller (HOX)
        or cake paradigms from the Berlin BCI Group [1]. This paradigm has 2
        levels of selection with 6 commands in each unit.

        Parameters
        ----------
        matrix_dims : list or np.array
            Array containing the dimensions of the matrices used in the
            experiment. For each matrix, the first position is the number of
            commands of the first level and the second the number of commands
            of the second level (typically both are 6). Shape [n_matrices x 2]
        commands_info_hox: list or None
            Array containing the dict info of each command. The first
            dimension are the matrices, the second dimension represent the
            units, and the third dimension contains the dictionary with the
            info of each command. Typically, this paradigm has 7 units of 6
            commands each. As defined by the Berlin BCI group: 1 menu matrix and
            6 matrix for each group of 6 letters. Therefore, with this
            setup, this array has shape [n_matrices x 7 x 6]

        Returns
        -------
        paradigm_conf : list
            Array with the paradigm configuration for an RCP paradigm
        commands_info : list
            Flattened version of commands_info input. It contains the command
            information corresponding to paradigm_conf. If input parameter
            commands_info is None, this output will be a skeleton with an empty
            dict for each command. If input commands_info is provided, it will
            be incorporated in the output

        References
        ----------
        [1] Blankertz, B., Dornhege, G., Krauledat, M., Schröder,
        M., Williamson, J., Murray-Smith, R., & Müller, K. R. (2006). The
        Berlin Brain-Computer Interface presents the novel mental typewriter
        Hex-o-Spell.
        """
        # Paradigm conf
        matrix_dims = np.array(matrix_dims)
        paradigm_conf = list()
        commands_info = list()
        for m in range(matrix_dims.shape[0]):
            # Useful variables
            n_cmd_l1 = matrix_dims[m, 0]
            n_cmd_l2 = matrix_dims[m, 0]
            units = list()
            commands_ids = list()
            # First unit (level 1)
            cmd_ids_u1 = list(range(n_cmd_l1))
            units.append([[[int(i)] for i in cmd_ids_u1]])
            commands_ids += cmd_ids_u1
            # Rest of units (level 2)
            for u in range(n_cmd_l1):
                cmd_ids_ux = list(range(
                    commands_ids[-1] + 1, commands_ids[-1] + 1 + n_cmd_l2
                ))
                units.append([[[int(i)] for i in cmd_ids_ux]])
                commands_ids += cmd_ids_ux
            paradigm_conf.append(units)
            # Commands info
            if commands_info_hox is None:
                cmd_info_values = [dict() for __ in commands_ids]
            else:
                cmd_info_values = np.array(commands_info_hox[m]).flatten()
                cmd_info_values = cmd_info_values.tolist()
            commands_info.append(dict(zip(commands_ids, cmd_info_values)))

        return paradigm_conf, commands_info

[docs]    def to_serializable_obj(self):
        rec_dict = self.__dict__
        for key in rec_dict.keys():
            if type(rec_dict[key]) == np.ndarray:
                rec_dict[key] = rec_dict[key].tolist()
        return rec_dict

[docs]    @classmethod
    def from_serializable_obj(cls, dict_data):
        return cls(**dict_data)


[docs]class ERPSpellerDataset(components.Dataset):

    """This class inherits from medusa.data_structures.Dataset, increasing
    its functionality for datasets with data from ERP-based spellers. It
    provides common ground for the rest of functions in the module.
    """

[docs]    def __init__(self, channel_set, fs=None, biosignal_att_key='eeg',
                 experiment_att_key='erpspellerdata', experiment_mode=None,
                 track_attributes=None):
        """ Constructor

        Parameters
        ----------
        channel_set : meeg.EEGChannelSet
            EEG channel set. Only these channels will be kept in the dataset,
            the others will be discarded. Also, the signals will be rearranged,
            keeping the same channel order, avoiding errors in future stages of
            the signal processing pipeline
        fs : int, float or None
            Sample rate of the recordings. If there are recordings with
            different sample rates, the consistency of the dataset can be
            still assured using resampling
        biosignal_att_key : str
            Name of the attribute containing the target biosginal that will be
            used to extract the features. It has to be the same in all
            recordings (e.g., 'eeg', 'meg').
        experiment_att_key : str or None
            Name of the attribute containing the target experiment that will be
            used to extract the features. It has to be the same in all
            recordings (e.g., 'rcp_data', 'cake_paradigm_data'). It is
            mandatory when a recording of the dataset contains more than 1
            experiment data
        experiment_mode : str {'train'|'test'|None}
            Mode of the experiment. If this dataset will be used to fit a model,
            set to train to avoid errors
        track_attributes: dict of dicts or None
            This parameter indicates custom attributes that must be tracked in
            feature extraction functions and how. The keys are the name of the
            attributes, whereas the values are dicts indicating the tracking
            mode {'concatenate'|'append'} and parent. Option concatenate is
            only available for attributes of type list or numpy arrays,
            forming a 1 dimensional array with the data from all recordings.
            Option append is used to save all kind of objects for each
            recording, forming a list whose length will be the number of
            recordings in the dataset. A set of default attributes is defined,
            so this parameter will be None in most cases. Example to track 2
            custom attributes (i.e., date and experiment_equipment):
                track_attributes = {
                    'date': {
                        'track_mode': 'append',
                        'parent': None
                    },
                    'experiment_equipment': {
                        'track_mode': 'append',
                        'parent': experiment_att_key
                    }
                }
        """
        # Check errors
        if experiment_mode is not None:
            if experiment_mode not in ('train', 'test'):
                raise ValueError('Parameter experiment_mode must be '
                                 '{train|test|None}')

        # Default track attributes
        default_track_attributes = {
            'subject_id': {
                'track_mode': 'append',
                'parent': None
            },
            'paradigm_conf': {
                'track_mode': 'append',
                'parent': experiment_att_key
            },
            'commands_info': {
                'track_mode': 'append',
                'parent': experiment_att_key
            },
            'onsets': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'batch_idx': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'group_idx': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'unit_idx': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'level_idx': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'matrix_idx': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'sequence_idx': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'trial_idx': {
                'track_mode': 'concatenate',
                'parent': experiment_att_key
            },
            'spell_result': {
                'track_mode': 'append',
                'parent': experiment_att_key
            },
            'control_state_result': {
                'track_mode': 'append',
                'parent': experiment_att_key
            }
        }

        if experiment_mode == 'train':
            default_track_attributes_train = {
                'erp_labels': {
                    'track_mode': 'concatenate',
                    'parent': experiment_att_key
                },
                'control_state_labels': {
                    'track_mode': 'concatenate',
                    'parent': experiment_att_key
                },
                'spell_target': {
                    'track_mode': 'append',
                    'parent': experiment_att_key
                },
                'control_state_target': {
                    'track_mode': 'append',
                    'parent': experiment_att_key
                }
            }
            default_track_attributes = {
                **default_track_attributes,
                **default_track_attributes_train
            }

        track_attributes = \
            default_track_attributes if track_attributes is None else \
            {**default_track_attributes, **track_attributes}

        # Class attributes
        self.channel_set = channel_set
        self.fs = fs
        self.biosignal_att_key = biosignal_att_key
        self.experiment_att_key = experiment_att_key
        self.experiment_mode = experiment_mode
        self.track_attributes = track_attributes

        # Consistency checker
        checker = self.__get_consistency_checker()
        super().__init__(consistency_checker=checker)

    def __get_consistency_checker(self):
        """Creates a standard consistency checker for ERP datasets

        Returns
        -------
        checker : data_structures.ConsistencyChecker
            Standard consistency checker for ERP feature extraction
        """
        # Create consistency checker
        checker = components.ConsistencyChecker()
        # Check that the biosignal exists
        checker.add_consistency_rule(
            rule='check-attribute',
            rule_params={'attribute': self.biosignal_att_key}
        )
        checker.add_consistency_rule(
            rule='check-attribute-type',
            rule_params={'attribute': self.biosignal_att_key,
                         'type': meeg.EEG}
        )
        # Check channels
        checker.add_consistency_rule(
            rule='check-values-in-attribute',
            rule_params={'attribute': 'channels',
                         'values': self.channel_set.channels},
            parent=self.biosignal_att_key+'.channel_set'
        )
        # Check sample rate
        if self.fs is not None:
            checker.add_consistency_rule(rule='check-attribute-value',
                                         rule_params={'attribute': 'fs',
                                                      'value': self.fs},
                                         parent=self.biosignal_att_key)
        else:
            warnings.warn('Parameter fs is None. The consistency of the '
                          'dataset cannot be assured. Still, you can use '
                          'target_fs parameter for feature extraction '
                          'and everything should be fine.')

        # Check experiment
        checker.add_consistency_rule(
            rule='check-attribute',
            rule_params={'attribute': self.experiment_att_key}
        )
        checker.add_consistency_rule(
            rule='check-attribute-type',
            rule_params={'attribute': self.experiment_att_key,
                         'type': ERPSpellerData}
        )
        # Check mode
        if self.experiment_mode is not None:
            checker.add_consistency_rule(
                rule='check-attribute-value',
                rule_params={'attribute': 'mode',
                             'value': self.experiment_mode},
                parent=self.experiment_att_key
            )

        # Check track_attributes
        if self.track_attributes is not None:
            for key, value in self.track_attributes.items():
                checker.add_consistency_rule(
                    rule='check-attribute',
                    rule_params={'attribute': key},
                    parent=value['parent']
                )
                if value['track_mode'] == 'concatenate':
                    checker.add_consistency_rule(
                        rule='check-attribute-type',
                        rule_params={'attribute': key,
                                     'type': [list, np.ndarray]},
                        parent=value['parent']
                    )

        return checker

[docs]    def custom_operations_on_recordings(self, recording):
        # Select channels
        eeg = getattr(recording, self.biosignal_att_key)
        eeg.change_channel_set(self.channel_set)
        return recording


[docs]class StandardPreprocessing(components.ProcessingMethod):
    """Just the common preprocessing applied in ERP-based spellers. Simple,
    quick and effective: frequency IIR filter followed by common average
    reference (CAR) spatial filter.
    """
[docs]    def __init__(self, order=5, cutoff=(0.5, 10), btype='bandpass',
                 filt_method='sosfiltfilt'):
        super().__init__(fit_transform_signal=['signal'],
                         fit_transform_dataset=['dataset'])
        # Parameters
        self.order = order
        self.cutoff = cutoff
        self.btype = btype
        self.filt_method = filt_method

        # Variables that
        self.iir_filter = None

[docs]    def fit(self, fs, n_cha=None):
        """Fits the IIR filter.

        Parameters
        ----------
        fs: float
            Sample rate of the signal.
        n_cha: int
            Number of channels. Used to compute the initial conditions of the
            frequency filter. Only required with sosfilt filtering method
            (online filtering)
        """
        self.iir_filter = mds.IIRFilter(order=self.order,
                                        cutoff=self.cutoff,
                                        btype=self.btype,
                                        filt_method=self.filt_method)
        self.iir_filter.fit(fs, n_cha=n_cha)

[docs]    def transform_signal(self, signal):
        """Transforms an EEG signal applying IIR filtering and CAR sequentially

        Parameters
        ----------
        signal: np.array or list
            Signal to transform. Shape [n_samples x n_channels]
        """
        signal = self.iir_filter.transform(signal)
        signal = mds.car(signal)
        return signal

[docs]    def fit_transform_signal(self, signal, fs):
        """Fits the IIR filter and transforms an EEG signal applying IIR
        filtering and CAR sequentially

        Parameters
        ----------
        signal: np.array or list
            Signal to transform. Shape [n_samples x n_channels]
        fs: float
            Sample rate of the signal.
        """
        self.iir_filter = mds.IIRFilter(order=self.order,
                                        cutoff=self.cutoff,
                                        btype=self.btype,
                                        filt_method=self.filt_method)
        signal = self.iir_filter.fit_transform(signal, fs)
        signal = mds.car(signal)
        return signal

[docs]    def fit_transform_dataset(self, dataset, show_progress_bar=True):
        """Fits the IIR filter and transforms an EEG signal applying the
        filter and CAR sequentially. Each recording is preprocessed
        independently, taking into account possible differences in sample rate.

        Parameters
        ----------
        dataset: ERPSpellerDataset
            ERPSpellerDataset with the recordings to be preprocessed.
        show_progress_bar: bool
            Show progress bar
        """
        pbar = None
        if show_progress_bar:
            pbar = tqdm(total=len(dataset.recordings),
                        desc='Preprocessing')
        for rec in dataset.recordings:
            eeg = getattr(rec, dataset.biosignal_att_key)
            eeg.signal = self.fit_transform_signal(eeg.signal, eeg.fs)
            setattr(rec, dataset.biosignal_att_key, eeg)
            if show_progress_bar:
                pbar.update(1)
        if show_progress_bar:
            pbar.close()
        return dataset


[docs]class StandardFeatureExtraction(components.ProcessingMethod):
    """Standard feature extraction method for ERP-based spellers. Basically,
    it gets the raw epoch for each stimulation event.
    """
[docs]    def __init__(self, w_epoch_t=(0, 1000), target_fs=20,
                 w_baseline_t=(-250, 0), norm='z',
                 concatenate_channels=True, safe_copy=True):
        """Class constructor

        w_epoch_t : list
            Temporal window in ms for each epoch relative to the event onset
            (e.g., [0, 1000])
        target_fs : float of None
            Target sample rate of each epoch. If None, all the recordings must
            have the same sample rate, so it is strongly recommended to set this
            parameter to a suitable value to avoid problems and save time
        w_baseline_t : list
            Temporal window in ms to be used for baseline normalization for each
            epoch relative to the event onset (e.g., [-250, 0])
        norm : str {'z'|'dc'}
            Type of baseline normalization. Set to 'z' for Z-score normalization
            or 'dc' for DC normalization
        concatenate_channels : bool
            This parameter controls the shape of the feature array. If True, all
            channels will be concatenated, returning an array of shape [n_events
            x (samples x channels)]. If false, the array will have shape
            [n_events x samples x channels]
        safe_copy : bool
            Makes a safe copy of the signal to avoid changing the original
            samples due to references
        """
        super().__init__(transform_signal=['x'],
                         transform_dataset=['x', 'x_info'])
        self.w_epoch_t = w_epoch_t
        self.target_fs = target_fs
        self.w_baseline_t = w_baseline_t
        self.norm = norm
        self.concatenate_channels = concatenate_channels
        self.safe_copy = safe_copy

[docs]    def transform_signal(self, times, signal, fs, onsets):
        """Function to extract ERP features from raw signal. It returns a 3D
        feature array with shape [n_events x n_samples x n_channels]. This
        function does not track any other attributes. Use for online processing
        and custom higher level functions.

        Parameters
        ----------
       times : list or numpy.ndarray
                1D numpy array [n_samples]. Timestamps of each sample. If they
                are not available, generate them artificially. Nevertheless,
                all signals and events must have the same temporal origin
        signal : list or numpy.ndarray
            2D numpy array [n_samples x n_channels]. EEG samples (the units
            should be defined using kwargs)
        fs : int or float
            Sample rate of the recording.
        onsets : list or numpy.ndarray [n_events x 1]
                Timestamp of each event


        Returns
        -------
        features : np.ndarray [n_events x n_samples x n_channels]
            Feature array with the epochs of signal
        """
        # Avoid changes in the original signal (this may not be necessary)
        if self.safe_copy:
            signal = signal.copy()
        # Extract features
        features = mds.get_epochs_of_events(timestamps=times, signal=signal,
                                            onsets=onsets, fs=fs,
                                            w_epoch_t=self.w_epoch_t,
                                            w_baseline_t=self.w_baseline_t,
                                            norm=self.norm)
        # Resample each epoch to the target frequency
        if self.target_fs is not None:
            if self.target_fs > fs:
                raise warnings.warn('Target fs is greater than data fs')
            features = mds.resample_epochs(features,
                                           self.w_epoch_t,
                                           self.target_fs)
        # Reshape epochs and concatenate the channels
        if self.concatenate_channels:
            features = np.squeeze(features.reshape((features.shape[0],
                                                    features.shape[1] *
                                                    features.shape[2], 1)))
        return features

[docs]    def transform_dataset(self, dataset, show_progress_bar=True):
        #TODO: Review this description (EDUARDO)
        """High level function to easily extract features from EEG recordings
        and save useful info for later processing. Nevertheless, the provided
        functionality has several limitations and it will not be suitable for
        all cases and processing pipelines. If it does not fit your needs,
        create a custom function iterating the recordings and using
        extract_erp_features, a much more low-level and general function. This
        function does not apply any preprocessing to the signals, this must
        be done before

        Parameters
        ----------
        dataset: ERPSpellerDataset
            List of data_structures.Recordings or data_structures.Dataset. If this
            parameter is a list of recordings, the consistency of the dataset will
            be checked. Otherwise, if the parameter is a dataset, this function
            assumes that the consistency is already checked
        show_progress_bar: bool
            Show progress bar

        Returns
        -------
        features : numpy.ndarray
            Array with the biosignal samples arranged in epochs
        track_info : dict
            Dictionary with tracked information across all recordings

        """
        # Avoid changes in the original recordings (this may not be necessary)
        if self.safe_copy:
            dataset = copy.deepcopy(dataset)
        # Avoid consistency problems
        if dataset.fs is None and self.target_fs is None:
            raise ValueError('The consistency of the features is not assured '
                             'since dataset.fs and target_fs are both None. '
                             'Specify one of these parameters')

        # Additional track attributes
        track_attributes = dataset.track_attributes
        track_attributes['run_idx'] = {
            'track_mode': 'concatenate',
            'parent': dataset.experiment_att_key
        }

        # Initialization
        features = None
        track_info = dict()
        for key, value in track_attributes.items():
            if value['track_mode'] == 'append':
                track_info[key] = list()
            elif value['track_mode'] == 'concatenate':
                track_info[key] = None
            else:
                raise ValueError('Unknown track mode')

        # Init progress bar
        pbar = None
        if show_progress_bar:
            pbar = tqdm(total=len(dataset.recordings),
                        desc='Extracting features')

        # Compute features
        run_counter = 0
        trial_counter = 0
        for rec in dataset.recordings:
            # Extract recording experiment and biosignal
            rec_exp = getattr(rec, dataset.experiment_att_key)
            rec_sig = getattr(rec, dataset.biosignal_att_key)

            # Get features
            rec_feat = self.transform_signal(
                times=rec_sig.times,
                signal=rec_sig.signal,
                fs=rec_sig.fs,
                onsets=rec_exp.onsets,
            )
            features = np.concatenate((features, rec_feat), axis=0) \
                if features is not None else rec_feat

            # Special attributes that need tracking across runs to assure the
            # consistency of the dataset
            rec_exp.run_idx = run_counter * np.ones_like(rec_exp.trial_idx)
            rec_exp.trial_idx = trial_counter + np.array(rec_exp.trial_idx)

            # Update counters of special attributes
            run_counter += 1
            trial_counter += np.unique(rec_exp.trial_idx).shape[0]

            # Track experiment info
            for key, value in track_attributes.items():
                if value['parent'] is None:
                    parent = rec
                else:
                    parent = rec
                    for p in value['parent'].split('.'):
                        parent = getattr(parent, p)
                att = getattr(parent, key)
                if value['track_mode'] == 'append':
                    track_info[key].append(att)
                elif value['track_mode'] == 'concatenate':
                    track_info[key] = np.concatenate(
                        (track_info[key], att), axis=0
                    ) if track_info[key] is not None else att
                else:
                    raise ValueError('Unknown track mode')

            if show_progress_bar:
                pbar.update(1)
        if show_progress_bar:
            pbar.close()

        return features, track_info


[docs]def decode_commands(scores, paradigm_conf, run_idx, trial_idx, matrix_idx,
                    level_idx, unit_idx, sequence_idx, group_idx, batch_idx):

    """Command decoder for ERP-based spellers.

    Parameters
    ----------
    scores : list or np.ndarray
        Array with the score for each stimulation
    paradigm_conf : list or np.ndarray
        Array containing the unified speller matrix structure with shape
        [n_runs x n_matrices x n_units x n_groups x n_batches x
        n_commands/batch]. All ERP-based speller paradigms can be adapted to
        this format and use this function for command decoding. See
        ERPSpellerData class for more info.
    run_idx : list or numpy.ndarray [n_stim x 1]
        Index of the run for each stimulation. This variable is automatically
        retrieved by function extract_erp_features_from_dataset as part of
        the track info dict. The run indexes must be related to
        paradigm_conf, keeping the same order. Therefore
        paradigm_conf[np.unique(run_idx)[0]] must retrieve the paradigm
        configuration of run 0.
    trial_idx : list or numpy.ndarray [n_stim x 1]
        Index of the trial for each stimulation. A trial represents
        the selection of a final command. Depending on the number of levels,
        the final selection takes N intermediate selections.
    matrix_idx : list or numpy.ndarray [n_stim x 1]
        Index of the matrix used in each stimulation. Each matrix can
        contain several levels. The matrix has to be the same accross the
        entire trial.
    level_idx : list or numpy.ndarray [n_stim x 1]
        Index of the level of each stimulation. Levels represent each
        one of the selections that must be made before a trial is
        finished. For example, in the Hex-O-spell paradigm there are 2
        levels (see [1]).
    unit_idx: list or numpy.ndarray [n_stim x 1]
        Index of the unit used in each stimulation. Units are low level
        entities used in multi-level paradigms, such as HOS paradigm [1].
        For each level, only 1 unit can be used. As the trial may have
        several layers, several units can be used in 1 trial. For instance,
        in the HOS, there are 7 units. The first unit is the main  menu. The
        other 6 units are each of the lower level entities that are
        displayed in the second level of stimulation.
    sequence_idx : list or numpy.ndarray [n_stim x 1]
        Index of the sequence for each stimulation. A sequence
        represents a round of stimulation: all commands have been
        highlighted 1 time. This class support dynamic stopping in
        different levels.
    group_idx : list or numpy.ndarray [n_stim x 1]
        Index of the group that has been highlighted. Groups represent the
        different aggregations of batches. Between batches of different
        groups, 1 command must be common. For example in the RCP there
        are 2 groups: rows and columns. In this paradigm, between each
        pair of batches (e.g., row=2, col=4), there is only one command
        in common.
    batch_idx : list or numpy.ndarray [n_stim x 1]
        Index of the code of the highlighted batch for each stimulation.
        A batch represents the highlighted commands in each stimulation.
        For example in the row-col paradigm (RCP) represents each row and
        column.

    Returns
    -------
    selected_commands: list
        Selected command for each trial considering all sequences of
        stimulation. Each command is organized in an array [matrix_idx,
        command_id]. Take into account that the command ids are unique for each
        matrix, and therefore only the command of the last level should be
        useful to take action. Shape [n_runs x n_trials x n_levels x 2]
    selected_commands_per_seq: list
        Selected command for each trial and sequence of stimulation. The
        fourth dimension of the array contains [matrix_idx, command_id]. To
        calculate the command for each sequence, it takes into account the
        scores of all the previous sequences as well. Shape [n_runs x
        n_trials x n_levels x n_sequences x 2]
    scores: list
        Scores for each command per sequence. Shape [n_runs x n_trials x
        n_levels x n_sequences x n_commands x 1]. The score of each sequence
        is calculated independently of the previous sequences.
    """

    # Avoid errors
    scores = np.array(scores)
    run_idx = np.array(run_idx)
    trial_idx = np.array(trial_idx)
    matrix_idx = np.array(matrix_idx)
    level_idx = np.array(level_idx)
    unit_idx = np.array(unit_idx)
    sequence_idx = np.array(sequence_idx)
    group_idx = np.array(group_idx)
    batch_idx = np.array(batch_idx)

    # Check errors
    if len(scores.shape) > 1:
        if len(scores.shape) > 2 or scores.shape[-1] != 1:
            raise ValueError('Parameter scores must have shape '
                             '(n_stim,) or (n_stim, 1)')
    n_stim = scores.shape[0]
    if trial_idx.shape[0] != n_stim or matrix_idx.shape[0] != n_stim or \
            level_idx.shape[0] != n_stim or sequence_idx.shape[0] != n_stim or \
            group_idx.shape[0] != n_stim or  batch_idx.shape[0] != n_stim:
        raise ValueError('Shape mismatch. Parameters scores, trial_idx, '
                         'matrix_idx, level_idx, sequence_idx, group_idx and '
                         'batch_idx must have the same dimensions')

    depth = lambda l: isinstance(l, list) and max(map(depth, l)) + 1
    if len(paradigm_conf) != np.unique(run_idx).shape[0] or \
        depth(paradigm_conf) != 6:
        raise ValueError('Shape mismatch. Parameter paradigm_conf must be a '
                         'list of length n_runs. Take into account that the '
                         'paradigm configuration can change between runs')

    # Command decoding
    cmd_scores = list()
    selected_commands = list()
    selected_commands_per_seq = list()
    idx = np.arange(trial_idx.shape[0])

    # Iterate each trial
    r_cnt = 0  # Run counter
    for r in np.unique(run_idx):
        idx_r = idx[np.where(run_idx == r)]
        # Increment dimensionality
        cmd_scores.append(list())
        selected_commands_per_seq.append(list())
        selected_commands.append(list())
        # Reset trial counter
        t_cnt = 0  # Trial counter
        for t in np.unique(trial_idx[idx_r]):
            try:
                idx_t = idx_r[np.where(trial_idx[idx_r] == t)]
                # Get matrix index
                m = int(np.squeeze(np.unique(matrix_idx[trial_idx == t])))
                # Update lists
                cmd_scores[r_cnt].append(list())
                selected_commands_per_seq[r_cnt].append(list())
                selected_commands[r_cnt].append(list())
                # Reset level counter
                l_cnt = 0  # Level counter
                for l in np.unique(level_idx[idx_t]):
                    idx_l = idx_t[np.where(level_idx[idx_t] == l)]
                    # Append list to cmd_scores
                    cmd_scores[r_cnt][t_cnt].append(list())
                    selected_commands_per_seq[r_cnt][t_cnt].append(list())
                    # selected_commands[r_cnt][t_cnt].append(list())
                    # Get unit index
                    u = int(np.squeeze(np.unique(unit_idx[idx_l])))
                    # Commands in this unit
                    commands_id = list()
                    for x in paradigm_conf[r_cnt][m][u]:
                        for y in x:
                            for z in y:
                                commands_id.append(z)
                    commands_id = np.unique(commands_id)
                    # Reset sequences counter
                    s_cnt = 0
                    for s in np.unique(sequence_idx[idx_l]):
                        idx_s = idx_l[np.where(sequence_idx[idx_l] == s)]
                        # Append one list for each command
                        cmd_scores[r_cnt][t_cnt][l_cnt].append(list())
                        selected_commands_per_seq[r_cnt][t_cnt][l_cnt].\
                            append([m])
                        for __ in commands_id:
                            cmd_scores[r_cnt][t_cnt][l_cnt][s_cnt].append([m])
                        # Iterate over groups
                        for g in np.unique(group_idx[idx_s]):
                            idx_g = idx_s[np.where(group_idx[idx_s] == g)]
                            # Iterate over batches
                            for b in np.unique(batch_idx[idx_g]):
                                idx_b = np.squeeze(
                                    idx_g[np.where(batch_idx[idx_g] == b)]
                                )
                                # Reset command counter
                                for cmd in paradigm_conf[r_cnt][m][u][g][b]:
                                    c = np.squeeze(np.where(commands_id == cmd))
                                    sc = scores[idx_b]
                                    cmd_scores[r_cnt][t_cnt][l_cnt][s_cnt][
                                        int(c)].append(sc)

                        # Append selected command for this sequence and trial
                        s_scores = np.array(cmd_scores[r_cnt][t_cnt][l_cnt])
                        s_scores_mean = np.mean(np.mean(s_scores, axis=2),
                                                axis=0)
                        sel_cmd = commands_id[np.argmax(s_scores_mean)]
                        selected_commands_per_seq[r_cnt][t_cnt][l_cnt][s_cnt].\
                            append(sel_cmd)
                        # Increment sequence counter
                        s_cnt += 1
                    # Append selected cmd (avoid reference problems with copy)
                    selected_commands[r_cnt][t_cnt].append(
                        copy.copy(
                            selected_commands_per_seq[r_cnt][t_cnt][-1][-1]
                        )
                    )
                    # Increment level counter
                    l_cnt += 1
                # Increment trial counter
                t_cnt += 1
            except Exception as e:
                raise type(e)('Error in trial %i: %s' % (t, str(e)))
        # Increment run counter
        r_cnt += 1

    return selected_commands, selected_commands_per_seq, cmd_scores


[docs]def detect_control_state(scores, run_idx, trial_idx, sequence_idx):
    """Detects the user's control state for each trial, assigning 0 to
    non-control and 1 to control states.

    Parameters
    ----------
    scores : list or np.ndarray
        Array with the score per stimulation.
    run_idx : list or numpy.ndarray [n_stim x 1]
        Index of the run for each stimulation.
    trial_idx : list or numpy.ndarray [n_stim x 1]
        Index of the trial for each stimulation. A trial represents
        the selection of a final command. Depending on the number of levels,
        the final selection takes N intermediate selections.
    sequence_idx : list or numpy.ndarray [n_stim x 1]
        Index of the sequence for each stimulation. A sequence
        represents a round of stimulation: all commands have been
        highlighted 1 time. This class support dynamic stopping in
        different levels.

    Returns
    -------
    selected_control_state: list
        Selected control state for each trial considering all sequences of
        stimulation. Shape [n_runs x n_trials]
    selected_control_state_per_seq: list
        Selected command for each trial and sequence of stimulation. The
        fourth dimension of the array contains [matrix_idx, command_id]. To
        calculate the command for each sequence, it takes into account the
        scores of all the previous sequences as well. Shape [n_runs x
        n_trials x n_sequences]
    scores: list
        Scores for each command per sequence. Shape [n_runs x n_trials x
        n_levels x n_sequences]. The score of each sequence is calculated
        independently of the previous sequences.
    """

    # Avoid errors
    scores = np.array(scores)
    run_idx = np.array(run_idx)
    trial_idx = np.array(trial_idx)
    sequence_idx = np.array(sequence_idx)

    # Check errors
    if len(scores.shape) > 1:
        if len(scores.shape) > 2 or scores.shape[-1] != 1:
            raise ValueError('Parameter scores must have shape '
                             '(n_stim,) or (n_stim, 1)')
    n_stim = scores.shape[0]
    if run_idx.shape[0] != n_stim or trial_idx.shape[0] != n_stim or \
            sequence_idx.shape[0] != n_stim:
        raise ValueError('Shape mismatch. Parameters scores, run_idx, '
                         'trial_idx and sequence_idx must have a the same '
                         'dimensions')

    # Control state detection
    state_scores = list()
    selected_control_state = list()
    selected_control_state_per_seq = list()
    idx = np.arange(trial_idx.shape[0])

    # Get selected control state
    for r_cnt, r in enumerate(np.unique(run_idx)):
        idx_r = idx[np.where(run_idx == r)]
        # Increment dimensionality
        state_scores.append(list())
        selected_control_state.append(list())
        selected_control_state_per_seq.append(list())
        for t_cnt, t in enumerate(np.unique(trial_idx[idx_r])):
            idx_t = idx_r[np.where(trial_idx[idx_r] == t)]
            state_scores[r_cnt].append(list())
            selected_control_state_per_seq[r_cnt].append(list())
            for s_cnt, s in enumerate(np.unique(sequence_idx[idx_t])):
                idx_s = idx_t[np.where(sequence_idx[idx_t] == s)]
                # Score for this sequence (only this sequence)
                state_scores[r_cnt][t_cnt].append(np.mean(scores[idx_s]))
                # Score for this sequence (all sequences <= s)
                cs_score = np.mean(state_scores[r_cnt][t_cnt])
                cs_pred = int(cs_score > 0.5)
                selected_control_state_per_seq[r_cnt][t_cnt].append(cs_pred)
            selected_control_state[r_cnt].append(
                selected_control_state_per_seq[r_cnt][t_cnt][-1]
            )

    return selected_control_state, selected_control_state_per_seq, state_scores


[docs]def get_selected_commands_info(selected_commands, commands_info):
    """Returns the info dict of the selected commands

    Parameters
    ----------
    selected_commands : list
        Selected command for each trial. Each command is organized in an array
        [matrix_idx, command_id]. Take into account that the command ids are
        unique for each matrix, and therefore only the command of the last
        level should be useful to take action. Shape [n_runs x n_trials x
        n_levels x 2]
    commands_info : list
        List containing the command information per run and matrix. Each
        position must be a dict, whose keys are the commands ids used in
        paradigm_conf. The value must be another dict containing important
        information about each command (e.g., label, text, action, icon
        path, etc). This information may be different for different use
        cases, but must be serializable (i.e., contain primitive types).
        Shape [n_runs x n_matrices x n_commands]

    Returns
    -------
    selected_commands_info : list
        List containing the information dict of the selected commands
    """
    try:
        # Print info
        selected_commands_info = list()
        for r in range(len(selected_commands)):
            for t in range(len(selected_commands[r])):
                [m_d, cmd_d] = selected_commands[r][t][-1]
                selected_commands_info.append(commands_info[m_d][cmd_d])
    except Exception as e:
        raise type(e)(str(e) + '\nCheck that selected_commands has shape '
                               '[n_runs x n_trials x n_levels x 2]')

    return selected_commands_info


[docs]def command_decoding_accuracy(selected_commands, target_commands):
    """Computes the accuracy of the selected sequence of targets given the
    objective

    Parameters
    ----------
    selected_commands: list
        Target commands. Each position contains the matrix index and command
        id per level that identifies the selected command of the trial. Shape
        [n_runs x n_trials x n_levels x 2]
    target_commands: list
        Target commands. Each position contains the matrix index and command
        id per level that identifies the target command of the trial. Shape
        [n_runs x n_trials x n_levels x 2]

    Returns
    -------
    accuracy : float
        Accuracy of the command decoding stage
    """
    # Check errors
    if len(selected_commands) != len(target_commands):
        raise ValueError('Parameters selected_commands and target_commands '
                         'must have the same shape [n_runs x n_trials x '
                         'n_levels x 2]')

    t_correct_cnt = 0
    t_total_cnt = 0
    for r in range(len(selected_commands)):
        for t in range(len(selected_commands[r])):
            if selected_commands[r][t] == target_commands[r][t]:
                t_correct_cnt += 1
            t_total_cnt += 1

    accuracy = t_correct_cnt / t_total_cnt
    return accuracy


[docs]def command_decoding_accuracy_per_seq(selected_commands_per_seq,
                                      target_commands):
    """
    Computes the accuracy of the selected sequence of targets given the
    target

    Parameters
    ----------
    selected_commands_per_seq: list
        List with the spell result per sequence as given by function
        decode_commands. Shape [n_runs x n_trials x n_levels x n_sequences x 2]
    target_commands: list
        Target commands. Each position contains the matrix index and command
        id per level that identifies the target command of the trial. Shape
        [n_runs x n_trials x n_levels x 2]

    Returns
    -------
    acc_per_seq : float
        Accuracy of the command decoding stage for each number of sequences
        considered in the analysis. Shape [n_sequences]
    """
    # Check errors
    selected_commands_per_seq = list(selected_commands_per_seq)
    target_commands = list(target_commands)
    if len(selected_commands_per_seq) != len(target_commands):
        raise ValueError('Parameters selected_commands_per_seq and spell_target'
                         'must have the same length.')

    # Compute accuracy per sequence
    bool_result_per_seq = []
    n_seqs = []
    for r in range(len(selected_commands_per_seq)):
        r_sel_cmd_per_seq = selected_commands_per_seq[r]
        r_spell_target = target_commands[r]
        for t in range(len(r_sel_cmd_per_seq)):
            t_sel_cmd_per_seq = r_sel_cmd_per_seq[t]
            t_spell_target = r_spell_target[t]
            t_bool_result_per_seq = []
            t_n_seqs = []
            for t in range(len(t_sel_cmd_per_seq)):
                l_sel_cmd_per_seq = t_sel_cmd_per_seq[t]
                l_spell_target = t_spell_target[t]
                t_bool_result_per_seq.append(list())
                t_n_seqs.append(len(l_sel_cmd_per_seq))
                for s in range(len(l_sel_cmd_per_seq)):
                    s_sel_cmd_per_seq = l_sel_cmd_per_seq[s]
                    t_bool_result_per_seq[t].append(l_spell_target ==
                                                    s_sel_cmd_per_seq)

            # Calculate the trial result per seq (all levels must be correct)
            t_n_levels = len(t_sel_cmd_per_seq)
            t_max_n_seqs = np.max(t_n_seqs)
            t_acc_per_seq = np.empty((t_max_n_seqs, t_n_levels))
            t_acc_per_seq[:] = np.nan
            for t in range(t_n_levels):
                t_acc_per_seq[:t_n_seqs[t], t] = t_bool_result_per_seq[t]
            bool_result_per_seq.append(np.all(t_acc_per_seq, axis=1))
            n_seqs.append(t_max_n_seqs)

    # Calculate the accuracy per number of sequences considered in the analysis
    max_n_seqs = np.max(n_seqs)
    n_trials = len(bool_result_per_seq)
    acc_per_seq = np.empty((max_n_seqs, n_trials))
    acc_per_seq[:] = np.nan
    for t in range(n_trials):
        acc_per_seq[:n_seqs[t], t] = bool_result_per_seq[t]

    return np.nanmean(acc_per_seq, axis=1)


[docs]def control_state_detection_accuracy(selected_control_state,
                                     target_control_state):
    """Computes the accuracy of the selected control states given the target

    Parameters
    ----------
    selected_control_state: list
        Target commands. Each position contains the matrix index and command
        id per level that identifies the selected command of the trial. Shape
        [n_runs x n_trials]
    target_control_state: list
        Target control state. Each position contains the control state of the
        trial. Shape [n_runs x n_trials]

    Returns
    -------
    accuracy : float
        Accuracy of the command decoding stage
    """
    # Check errors
    if len(selected_control_state) != len(target_control_state):
        raise ValueError('Parameters selected_control_state and '
                         'target_control_state must have the same shape '
                         '[n_runs x n_trials]')

    t_correct_cnt = 0
    t_total_cnt = 0
    for r in range(len(selected_control_state)):
        for t in range(len(selected_control_state[r])):
            if selected_control_state[r][t] == target_control_state[r][t]:
                t_correct_cnt += 1
            t_total_cnt += 1

    accuracy = t_correct_cnt / t_total_cnt
    return accuracy


[docs]def control_state_detection_accuracy_per_seq(selected_control_state_per_seq,
                                             target_control_state):
    """
    Returns the accuracy of the selected sequence of predicted control
    states given the target.

    Parameters
    ----------
    selected_control_state_per_seq: list
        List with the control state detection result per sequence as given by
        function detect_control_state. Shape [n_runs x n_trials x n_sequences]
    target_control_state : list
        Numpy array with dimensions [n_runs x n_trials] with the real control
        state of each trial

    Returns
    -------
    acc_per_seq : float
        Accuracy of the control state detection stage
    """

    # Check errors
    selected_control_state_per_seq = list(selected_control_state_per_seq)
    target_control_state = list(target_control_state)
    if len(selected_control_state_per_seq) != len(target_control_state):
        raise ValueError('Parameters selected_control_state_per_seq and '
                         'target_control_state must have the same length.')

    # Compute accuracy per sequence
    bool_result_per_seq = []
    n_seqs = []
    for r in range(len(selected_control_state_per_seq)):
        r_sel_cmd_per_seq = selected_control_state_per_seq[r]
        r_cs_target = target_control_state[r]
        for t in range(len(r_sel_cmd_per_seq)):
            t_sel_cmd_per_seq = r_sel_cmd_per_seq[t]
            t_cs_target = r_cs_target[t]
            t_bool_result_per_seq = []
            for s in range(len(t_sel_cmd_per_seq)):
                s_sel_cmd_per_seq = t_sel_cmd_per_seq[s]
                t_bool_result_per_seq.append(t_cs_target == s_sel_cmd_per_seq)
            # Save for later use
            bool_result_per_seq.append(t_bool_result_per_seq)
            n_seqs.append(len(t_sel_cmd_per_seq))

    # Calculate the accuracy per number of sequences
    max_n_seqs = np.max(n_seqs)
    n_trials = len(bool_result_per_seq)
    acc_per_seq = np.empty((max_n_seqs, n_trials))
    acc_per_seq[:] = np.nan
    for t in range(n_trials):
        acc_per_seq[:n_seqs[t], t] = bool_result_per_seq[t]

    return np.nanmean(acc_per_seq, axis=1)


[docs]def split_erp_features(sets_pct, trial_idx_key="trial_idx", **kwargs):
    """
    This function splits randomly an ERP dataset keeping the relation between
    epochs and trials, which enables a later analysis of command prediction.

    Parameters
    ----------
    sets_pct: list
        List containing the percentage of for each set. For example,
        sets_pct=[60, 20, 20] will split the dataset in three sets, one that
        contains 60% of the trials, and two that contains 20% each.

    trial_idx_key: string
        Sets the trial_track_key, which is a vector that keeps the relation
        between trials and stimulus.

    kwargs: key-value arguments
        Variables to split.

    Returns
    -------
    variables: list
        List of the sets containing a dict with the split variables
    """

    # Check that the sets_pct parameter is a list
    if type(sets_pct) != list:
        raise ValueError("Parameter sets_pct must be of type list")
    # Check that the number of output sets is greater than 1
    n_sets = len(sets_pct)
    if n_sets < 2:
        raise ValueError("The number of output sets must be greater than 1")
    # Check that the sum of the percentages is equal to 1
    if sum(sets_pct) != 100:
        raise ValueError("The sum of the percentages for each set must be "
                         "equal to 1")
    # Check that the number of sets and the length of the percentage list
    # are equal
    if n_sets != len(sets_pct):
        raise ValueError("The number of sets and the set pct length must "
                         "coincide")
    # Check the existence of the trial_track array
    if not trial_idx_key in kwargs:
        raise ValueError("Array '" + trial_idx_key +
                         "' not found. This array must relate uniquely each "
                         "observation or epoch with the corresponding trial")
    # Check that all the arrays are numpy arrays
    for key, value in kwargs.items():
        if type(value) != np.ndarray:
            raise ValueError("Matrix '" + key + "' must be a numpy array")
    # Compute the number of epochs and trials and check the dimensions of all
    # the arrays
    n_epochs = kwargs[trial_idx_key].shape[0]
    n_trials = len(np.unique(kwargs[trial_idx_key]))
    for key, value in kwargs.items():
        if value.shape[0] != n_epochs and value.shape[0] != n_trials:
            raise ValueError("Array " + key +
                             " must be either of length n_epochs = " +
                             str(n_epochs) + " or n_trials = " +
                             str(n_trials) + " in axis=0")

    # Compute the number of trials per set
    n_trials_per_set = []
    for s in range(n_sets):
        n_trials_per_set.append(
            np.round(float(sets_pct[s])/100 * n_trials).astype(int)
        )

    # Create the trial index array for each set
    idx = np.unique(kwargs[trial_idx_key])
    np.random.shuffle(idx)  # TODO: Reorganize the indices randomly?
    trials_per_set = []
    last_idx = 0
    for s in range(n_sets):
        # Train set trials
        trials_per_set.append(idx[last_idx:last_idx+n_trials_per_set[s]])
        last_idx = last_idx + n_trials_per_set[s]
    # Split the sets
    sets = list()
    for s in range(n_sets):
        set_dict = dict()
        idx_epochs = np.isin(kwargs[trial_idx_key], trials_per_set[s])
        idx_trials = np.isin(np.unique(kwargs[trial_idx_key]),
                             trials_per_set[s])
        for key, value in kwargs.items():
            if value.shape[0] == n_epochs:
                set_dict[key] = value[idx_epochs]
            else:
                set_dict[key] = value[idx_trials]
        sets.append(set_dict)
    return sets


[docs]class ERPSpellerModel(components.Algorithm):
    """Skeleton class for ERP-based spellers models. This class inherits from
    components.Algorithm. Therefore, it can be used to create standalone
    algorithms that can be used in compatible apps from medusa-platform
    for online experiments. See components.Algorithm to know more about this
    functionality.

    Related tutorials:

        - Overview of erp_spellers module [LINK]
        - Create standalone models for ERP-based spellers compatible with
            Medusa platform [LINK]
    """

[docs]    def __init__(self):
        """Class constructor
        """
        print('ERPSpellerModel')
        super().__init__(fit_dataset=['spell_target',
                                      'spell_result_per_seq',
                                      'spell_acc_per_seq'],
                         predict=['spell_result',
                                  'spell_result_per_seq'])
        # Settings
        self.settings = None
        self.channel_set = None
        self.configure()
        # Configuration
        self.is_configured = False
        self.is_built = False
        self.is_fit = False

[docs]    @abstractmethod
    def configure(self, **kwargs):
        """This function must be used to configure the model before calling
        build method. Class attribute settings attribute must be set with a dict
        """
        # Update state
        self.is_configured = True
        self.is_built = False
        self.is_fit = False

[docs]    @abstractmethod
    def build(self, *args, **kwargs):
        """This function builds the model, adding all the processing methods
        to the pipeline. It must be called after configure.
        """
        # Check errors
        if not self.is_configured:
            raise ValueError('Function configure must be called first!')
        # Update state
        self.is_built = True
        self.is_fit = False

[docs]    @staticmethod
    def check_for_non_control_data(cs_labels, cs_target, throw_warning=True):
        """Checks the data for non-control trials.

        Returns
        -------
        check: bool
            True if there is non-control trials in the data, False otherwise.
        """
        check = False
        if not np.all(np.array(cs_labels) == 1) or \
                not np.all(np.array(cs_target) == 1):
            if np.all(np.unique(cs_labels) == [0, 1]) and \
                    np.all(np.unique(cs_target) == [0, 1]):
                check = True
                if throw_warning:
                    warnings.warn('Non-control trials detected. Only control '
                                  'trials will be used to fit the ERP '
                                  'speller model.')
            else:
                raise ValueError('Incorrect format of control_state_labels '
                                 'or control_state_result. These arrays '
                                 'must be binary {0|1})')
        return check

[docs]    @staticmethod
    def get_control_data(x, x_info):
        """Selects only the control trials in the dataset. Useful to fit
        command decoding models.
        """
        # Copy variables to avoid problems with referencing
        x = x.copy()
        x_info = copy.deepcopy(x_info)

        # Useful variables
        cs_labels = np.array(x_info['control_state_labels'])
        runs_idx = np.unique(x_info['run_idx']).astype(int)

        # Get control runs indexes
        control_runs_idx = \
            np.unique(x_info['run_idx'][cs_labels == 1]).astype(int)

        # Get control observations
        x = x[cs_labels == 1]

        # Get control info
        for key, val in x_info.items():

            if len(val) == cs_labels.shape[0]:
                x_info[key] = val[cs_labels == 1]
            elif len(val) == runs_idx.shape[0]:
                x_info[key] = [val[r] for r in control_runs_idx]
            else:
                raise ValueError('Incorrect dimension of x_info[%s]' % key)

        return x, x_info

[docs]    def fit_dataset(self, dataset, **kwargs):
        """Function that receives an ERPSpellerDataset and uses its data to
        fit the model. By default, executes pipeline 'fit_dataset'. Override
        method for other behaviour.

        Parameters
        ----------
        dataset: ERPSpellerDataset
            Dataset with recordings from an ERP-based speller experiment
        kwargs: key-value arguments
            Optional parameters depending on the specific implementation of
            the model

        Returns
        -------
        fit_results: dict
            Dict with the information of the fot process. For command
            decoding models, at least it has to contain keys
            spell_result, spell_result_per_seq and spell_acc_per_seq,
            which contain the decoded commands, the decoded commands and the
            command decoding accuracy per sequences of stimulation considered in
            the analysis.
        """
        # Check errors
        if not self.is_built:
            raise ValueError('Function build must be called first!')
        # Execute pipeline
        output = self.exec_pipeline('fit_dataset', dataset=dataset)
        # Set channels
        self.channel_set = dataset.channel_set
        # Update state
        self.is_fit = True
        return output

[docs]    def predict(self, times, signal, fs, l_cha, x_info, **kwargs):
        """Function that receives EEG signal and experiment info from an
        ERP-based speller to decode the user's intentions. Used in online
        experiments. By default, executes pipeline 'predict'. Override method
        for other behaviour.

        Parameters
        ---------
        times: list or numpy.ndarray
            Timestamps of the EEG samples
        signal: list or numpy.ndarray
            EEG samples with shape [n_samples x n_channels]
        fs: float
            Sample rate of the EEG signal
        l_cha: list
            List of channel labels
        x_info: dict
            Dict with the needed experiment info to decode the commands. It
            has to contain keys: paradigm_conf, onsets, batch_idx, group_idx,
            unit_idx, level_idx, matrix_idx, sequence_idx, trial_idx, run_idx.
            See ERPSpellerData to know how are defined these variables.
        kwargs: key-value arguments
            Optional parameters depending on the specific implementation of
            the model
        """
        # Check errors
        if not self.is_fit:
            raise ValueError('Function fit_dataset must be called first!')
        # Check channels
        self.channel_set.check_channels_labels(l_cha, strict=True)
        # Execute predict pipeline
        return self.exec_pipeline('predict', times=times, signal=signal,
                                  fs=fs, x_info=x_info, **kwargs)


[docs]class CMDModelRLDA(ERPSpellerModel):
    """Command decoding model for ERP-based spellers model based on regularized
    linear discriminant analysis (rLDA) based on the implementation of
    Krusienski et al 2008 [1].

    Dataset features:

    - Sample rate of the signals > 20 Hz. The model can handle recordings
        with different sample rates.
    - Recommended channels: ['Fz', 'Cz', 'Pz', 'P3', 'P4', 'PO7', 'PO8', 'Oz'].

    Processing pipeline:
    - Preprocessing (medusa.bci.erp_spellers.StandardPreprocessing):

        - IIR Filter (order=5, cutoff=(0.5, 10) Hz: unlike FIR filters, IIR
            filters are quick and can be applied in small signal chunks. Thus,
            they are the preferred method for frequency filter in online
            systems.
        - Common average reference (CAR): widely used spatial filter that
            increases the signal-to-noise ratio of the ERPs.
    - Feature extraction (medusa.bci.erp_spellers.StandardFeatureExtraction):

        - Epochs (window=(0, 1000) ms, resampling to 20 HZ): the epochs of
            signal are extracted for each stimulation. Baseline normalization
            is also applied, taking the window (-250, 0) ms relative to the
            stimulus onset.

    - Feature classification (
    sklearn.discriminant_analysis.LinearDiscriminantAnalysis)

        - Regularized linear discriminant analysis (rLDA): we use the sklearn
            implementation, with eigen solver and auto shrinkage paramers. See
            reference in sklearn doc.

    References
    ----------
    [1] Krusienski, D. J., Sellers, E. W., McFarland, D. J., Vaughan, T. M., &
    Wolpaw, J. R. (2008). Toward enhanced P300 speller performance. Journal of
    neuroscience methods, 167(1), 15-21.
    """

[docs]    def __init__(self):
        super().__init__()

[docs]    def configure(self, p_filt_cutoff=(0.5, 10), f_w_epoch_t=(0, 800),
                  f_target_fs=20):
        self.settings = {
            'p_filt_cutoff': p_filt_cutoff,
            'f_w_epoch_t': f_w_epoch_t,
            'f_target_fs': f_target_fs
        }
        # Update state
        self.is_configured = True
        self.is_built = False
        self.is_fit = False

[docs]    def build(self):
        # Check errors
        if not self.is_configured:
            raise ValueError('Function configure must be called first!')
        # Preprocessing (default: bandpass IIR filter [0.5, 10] Hz + CAR)
        self.add_method('prep_method', StandardPreprocessing(
            cutoff=self.settings['p_filt_cutoff']
        ))
        # Feature extraction (default: epochs [0, 800] ms + resampling to 20 Hz)
        self.add_method('ext_method', StandardFeatureExtraction(
            w_epoch_t=self.settings['f_w_epoch_t'],
            target_fs=self.settings['f_target_fs'],
        ))
        # Feature classification (rLDA)
        clf = components.ProcessingClassWrapper(
            LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto'),
            fit=[], predict_proba=['y_pred']
        )
        self.add_method('clf_method', clf)
        # Update state
        self.is_built = True
        self.is_fit = False

[docs]    def fit_dataset(self, dataset, **kwargs):
        # Check errors
        if not self.is_built:
            raise ValueError('Function build must be called first!')
        # Preprocessing
        dataset = self.get_inst('prep_method').fit_transform_dataset(dataset)
        # Extract features
        x, x_info = self.get_inst('ext_method').transform_dataset(dataset)
        if self.check_for_non_control_data(x_info['control_state_labels'],
                                           x_info['control_state_target'],
                                           throw_warning=True):
            x, x_info = self.get_control_data(x, x_info)
        # Classification
        self.get_inst('clf_method').fit(x, x_info['erp_labels'])
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]
        # Command decoding
        spell_result, spell_result_per_seq, __ = decode_commands(
            scores=y_pred,
            paradigm_conf=x_info['paradigm_conf'],
            run_idx=x_info['run_idx'],
            trial_idx=x_info['trial_idx'],
            matrix_idx=x_info['matrix_idx'],
            level_idx=x_info['level_idx'],
            unit_idx=x_info['unit_idx'],
            sequence_idx=x_info['sequence_idx'],
            group_idx=x_info['group_idx'],
            batch_idx=x_info['batch_idx']
        )
        # Spell accuracy per seq
        spell_acc_per_seq = command_decoding_accuracy_per_seq(
            spell_result_per_seq,
            x_info['spell_target']
        )
        cmd_assessment = {
            'x': x,
            'x_info': x_info,
            'y_pred': y_pred,
            'spell_result': spell_result,
            'spell_result_per_seq': spell_result_per_seq,
            'spell_acc_per_seq': spell_acc_per_seq
        }
        # Save info
        self.channel_set = dataset.channel_set
        # Update state
        self.is_fit = True
        return cmd_assessment

[docs]    def predict(self, times, signal, fs, channel_set, x_info, **kwargs):
        # Check errors
        if not self.is_fit:
            raise ValueError('Function fit_dataset must be called first!')
        # Check channel set
        if self.channel_set != channel_set:
            warnings.warn('The channel set is not the same that was used to '
                          'fit the model. Be careful!')
        # Preprocessing
        signal = self.get_inst('prep_method').fit_transform_signal(signal, fs)

        # Extract features
        x = self.get_inst('ext_method').transform_signal(times, signal, fs,
                                                         x_info['onsets'])
        # Classification
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]

        # Command decoding
        spell_result, spell_result_per_seq, __ = decode_commands(
            scores=y_pred,
            paradigm_conf=x_info['paradigm_conf'],
            run_idx=x_info['run_idx'],
            trial_idx=x_info['trial_idx'],
            matrix_idx=x_info['matrix_idx'],
            level_idx=x_info['level_idx'],
            unit_idx=x_info['unit_idx'],
            sequence_idx=x_info['sequence_idx'],
            group_idx=x_info['group_idx'],
            batch_idx=x_info['batch_idx']
        )
        cmd_decoding = {
            'x': x,
            'x_info': x_info,
            'y_pred': y_pred,
            'spell_result': spell_result,
            'spell_result_per_seq': spell_result_per_seq
        }
        return cmd_decoding


[docs]class CMDModelEEGNet(ERPSpellerModel):
    """Command decoding model for ERP-based spellers model based on EEGNet, a
    compact deep convolutional neural network specifically developed for EEG
    applications [1].

    Dataset features:

    - Sample rate of the signals > 128 Hz. The model can handle recordings
        with different sample rates.
    - Recommended channels: ['Fz', 'Cz', 'Pz', 'P3', 'P4', 'PO7', 'PO8', 'Oz'].

    Processing pipeline:
    - Preprocessing:

        - IIR Filter (order=5, cutoff=(0.5, 45) Hz: unlike FIR filters, IIR
            filters are quick and can be applied in small signal chunks. Thus,
            they are the preferred method for frequency filter in online systems
        - Common average reference (CAR): widely used spatial filter that
            increases the signal-to-noise ratio of the ERPs.
    - Feature extraction:

        - Epochs (window=(0, 1000) ms, resampling to 128 HZ): the epochs of
            signal are extract for each stimulation. Baseline normalization
            is also applied, taking the window (-250, 0) ms relative to the
            stimulus onset.

    - Feature classification

        - EEGNet: compact convolutional network.

    References
    ----------
    [1] Lawhern, V. J., Solon, A. J., Waytowich, N. R., Gordon, S. M., Hung,
    C. P., & Lance, B. J. (2018). EEGNet: a compact convolutional neural network
    for EEG-based brain–computer interfaces. Journal of neural engineering,
    15(5), 056013.
    """
[docs]    def __init__(self):
        super().__init__()

[docs]    def configure(self, cnn_n_cha=8, fine_tuning=False,
                  shuffle_before_fit=True, validation_split=0.2,
                  init_weights_path=None, gpu_acceleration=True):
        self.settings = {
            'cnn_n_cha': cnn_n_cha,
            'fine_tuning': fine_tuning,
            'shuffle_before_fit': shuffle_before_fit,
            'validation_split': validation_split,
            'init_weights_path': init_weights_path,
            'gpu_acceleration': gpu_acceleration
        }
        # Update state
        self.is_configured = True
        self.is_built = False
        self.is_fit = False

[docs]    def build(self):
        # Check errors
        if not self.is_configured:
            raise ValueError('Function configure must be called first!')
        # Only import deep learning models if necessary
        from medusa.deep_learning_models import EEGNet
        # Preprocessing (bandpass IIR filter [0, 10] Hz + CAR)
        self.add_method('prep_method', StandardPreprocessing(cutoff=(0.5, 45)))
        # Feature extraction (epochs [0, 1000] ms + resampling to 128 Hz)
        ext_method = StandardFeatureExtraction(
            target_fs=128, concatenate_channels=False)
        self.add_method('ext_method', ext_method)
        # Feature classification
        clf = EEGNet(nb_classes=2, n_cha=self.settings['cnn_n_cha'],
                     samples=128, dropout_rate=0.25, kern_length=64, F1=8,
                     D=2, F2=16, dropout_type='Dropout', norm_rate=0.25,
                     gpu_acceleration=self.settings['gpu_acceleration'])
        if self.settings['init_weights_path'] is not None:
            clf.load_weights(self.settings['init_weights_path'])
        self.add_method('clf_method', clf)
        # Update state
        self.is_built = True
        self.is_fit = False

[docs]    def fit_dataset(self, dataset, **kwargs):
        # Check errors
        if not self.is_built:
            raise ValueError('Function build must be called first!')
        if dataset.channel_set.n_cha != self.settings['cnn_n_cha']:
            raise ValueError('The number of channels of the model and the '
                             'dataset does not match!')
        # Preprocessing
        dataset = self.get_inst('prep_method').fit_transform_dataset(dataset)
        # Extract features
        x, x_info = self.get_inst('ext_method').transform_dataset(dataset)
        if self.check_for_non_control_data(x_info['control_state_labels'],
                                           x_info['control_state_target'],
                                           throw_warning=True):
            x, x_info = self.get_control_data(x, x_info)
        # Classification
        self.get_inst('clf_method').fit(
            x, x_info['erp_labels'],
            fine_tuning=self.settings['fine_tuning'],
            shuffle_before_fit=self.settings['shuffle_before_fit'],
            validation_split=self.settings['validation_split'],
            **kwargs)
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]
        # Command decoding
        spell_result, spell_result_per_seq, __ = decode_commands(
            scores=y_pred,
            paradigm_conf=x_info['paradigm_conf'],
            run_idx=x_info['run_idx'],
            trial_idx=x_info['trial_idx'],
            matrix_idx=x_info['matrix_idx'],
            level_idx=x_info['level_idx'],
            unit_idx=x_info['unit_idx'],
            sequence_idx=x_info['sequence_idx'],
            group_idx=x_info['group_idx'],
            batch_idx=x_info['batch_idx']
        )
        # Spell accuracy per seq
        spell_acc_per_seq = command_decoding_accuracy_per_seq(
            spell_result_per_seq,
            x_info['spell_target']
        )
        cmd_assessment = {
            'x': x,
            'x_info': x_info,
            'y_pred': y_pred,
            'spell_result': spell_result,
            'spell_result_per_seq': spell_result_per_seq,
            'spell_acc_per_seq': spell_acc_per_seq
        }
        # Save info
        self.channel_set = dataset.channel_set
        # Update state
        self.is_fit = True
        return cmd_assessment

[docs]    def predict(self, times, signal, fs, channel_set, x_info, **kwargs):
        # Check channel set
        if self.channel_set != channel_set:
            warnings.warn('The channel set is not the same that was used to '
                          'fit the model. Be careful!')
        # Preprocessing
        signal = self.get_inst('prep_method').fit_transform_signal(signal, fs)
        # Extract features
        x = self.get_inst('ext_method').transform_signal(times, signal, fs,
                                                         x_info['onsets'])
        # Classification
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]
        # Command decoding
        spell_result, spell_result_per_seq, __ = decode_commands(
            scores=y_pred,
            paradigm_conf=x_info['paradigm_conf'],
            run_idx=x_info['run_idx'],
            trial_idx=x_info['trial_idx'],
            matrix_idx=x_info['matrix_idx'],
            level_idx=x_info['level_idx'],
            unit_idx=x_info['unit_idx'],
            sequence_idx=x_info['sequence_idx'],
            group_idx=x_info['group_idx'],
            batch_idx=x_info['batch_idx']
        )
        cmd_decoding = {
            'x': x,
            'x_info': x_info,
            'y_pred': y_pred,
            'spell_result': spell_result,
            'spell_result_per_seq': spell_result_per_seq
        }
        return cmd_decoding


[docs]class CMDModelEEGInception(ERPSpellerModel):
    """Command decoding model for ERP-based spellers model based on
    EEG-Inception, a powerful deep convolutional neural network specifically
    developed for BCI applications [1].

    Dataset features:

    - Sample rate of the signals > 128 Hz. The model can handle recordings
        with different sample rates.
    - Recommended channels: ['Fz', 'Cz', 'Pz', 'P3', 'P4', 'PO7', 'PO8', 'Oz'].

    Processing pipeline:

    - Preprocessing:

        - IIR Filter (order=5, cutoff=(0.5, 45) Hz: unlike FIR filters, IIR
            filters are quick and can be applied in small signal chunks. Thus,
            they are the preferred method for frequency filter in online systems
        - Common average reference (CAR): widely used spatial filter that
            increases the signal-to-noise ratio of the ERPs.
    - Feature extraction:

        - Epochs (window=(0, 1000) ms, resampling to 128 HZ): the epochs of
            signal are extract for each stimulation. Baseline normalization
            is also applied, taking the window (-250, 0) ms relative to the
            stimulus onset.

    - Feature classification

        - EEG-Inception: convolutional neural network [1].

    References
    ----------
    [1] Santamaría-Vázquez, E., Martínez-Cagigal, V., Vaquerizo-Villar, F., &
    Hornero, R. (2020). EEG-Inception: A Novel Deep Convolutional Neural Network
    for Assistive ERP-based Brain-Computer Interfaces. IEEE Transactions on
    Neural Systems and Rehabilitation Engineering.
    """
[docs]    def __init__(self):
        super().__init__()

[docs]    def configure(self, cnn_n_cha=8, fine_tuning=False,
                  shuffle_before_fit=True, validation_split=0.2,
                  init_weights_path=None, gpu_acceleration=True):
        self.settings = {
            'cnn_n_cha': cnn_n_cha,
            'fine_tuning': fine_tuning,
            'shuffle_before_fit': shuffle_before_fit,
            'validation_split': validation_split,
            'init_weights_path': init_weights_path,
            'gpu_acceleration': gpu_acceleration
        }
        # Update state
        self.is_configured = True
        self.is_built = False
        self.is_fit = False

[docs]    def build(self):
        # Check errors
        if not self.is_configured:
            raise ValueError('Function configure must be called first!')
        # Only import deep learning models if necessary
        from medusa.deep_learning_models import EEGInceptionv1
        # Preprocessing (bandpass IIR filter [0.5, 45] Hz + CAR)
        self.add_method('prep_method',
                        StandardPreprocessing(cutoff=(0.5, 45)))
        # Feature extraction (epochs [0, 1000] ms + resampling to 128 Hz)
        self.add_method('ext_method',
                        StandardFeatureExtraction(
                            target_fs=128, concatenate_channels=False)
        )
        # Feature classification
        clf = EEGInceptionv1(
            input_time=1000,
            fs=128,
            n_cha=self.settings['cnn_n_cha'],
            filters_per_branch=8,
            scales_time=(500, 250, 125),
            dropout_rate=0.25,
            activation='elu', n_classes=2,
            learning_rate=0.001,
            gpu_acceleration=self.settings['gpu_acceleration'])
        if self.settings['init_weights_path'] is not None:
            clf.load_weights(self.settings['init_weights_path'])
        self.add_method('clf_method', clf)
        # Update state
        self.is_built = True
        self.is_fit = False

[docs]    def fit_dataset(self, dataset, **kwargs):
        # Check errors
        if not self.is_built:
            raise ValueError('Function build must be called first!')
        if dataset.channel_set.n_cha != self.settings['cnn_n_cha']:
            raise ValueError('The number of channels of the model and the '
                             'dataset does not match!')
        # Preprocessing
        dataset = self.get_inst('prep_method').fit_transform_dataset(dataset)
        # Extract features
        x, x_info = self.get_inst('ext_method').transform_dataset(dataset)
        if self.check_for_non_control_data(x_info['control_state_labels'],
                                           x_info['control_state_target'],
                                           throw_warning=True):
            x, x_info = self.get_control_data(x, x_info)
        # Classification
        self.get_inst('clf_method').fit(
            x, x_info['erp_labels'],
            fine_tuning=self.settings['fine_tuning'],
            shuffle_before_fit=self.settings['shuffle_before_fit'],
            validation_split=self.settings['validation_split'],
            **kwargs)
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]
        # Command decoding
        spell_result, spell_result_per_seq, __ = decode_commands(
            scores=y_pred,
            paradigm_conf=x_info['paradigm_conf'],
            run_idx=x_info['run_idx'],
            trial_idx=x_info['trial_idx'],
            matrix_idx=x_info['matrix_idx'],
            level_idx=x_info['level_idx'],
            unit_idx=x_info['unit_idx'],
            sequence_idx=x_info['sequence_idx'],
            group_idx=x_info['group_idx'],
            batch_idx=x_info['batch_idx']
        )
        # Spell accuracy per seq
        spell_acc_per_seq = command_decoding_accuracy_per_seq(
            spell_result_per_seq,
            x_info['spell_target']
        )
        cmd_assessment = {
            'x': x,
            'x_info': x_info,
            'y_pred': y_pred,
            'spell_result': spell_result,
            'spell_result_per_seq': spell_result_per_seq,
            'spell_acc_per_seq': spell_acc_per_seq
        }
        # Save variables
        self.channel_set = dataset.channel_set
        # Update state
        self.is_fit = True
        return cmd_assessment

[docs]    def predict(self, times, signal, fs, channel_set, exp_info, **kwargs):
        # Check errors
        if not self.is_fit:
            raise ValueError('Function fit_dataset must be called first!')
        # Check channel set
        if self.channel_set != channel_set:
            warnings.warn('The channel set is not the same that was used to '
                          'fit the model. Be careful!')
        # Preprocessing
        signal = self.get_inst('prep_method').fit_transform_signal(signal, fs)
        # Extract features
        x = self.get_inst('ext_method').transform_signal(times, signal, fs,
                                                         exp_info['onsets'])
        # Classification
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]
        # Command decoding
        spell_result, spell_result_per_seq, __ = decode_commands(
            scores=y_pred,
            paradigm_conf=exp_info['paradigm_conf'],
            run_idx=exp_info['run_idx'],
            trial_idx=exp_info['trial_idx'],
            matrix_idx=exp_info['matrix_idx'],
            level_idx=exp_info['level_idx'],
            unit_idx=exp_info['unit_idx'],
            sequence_idx=exp_info['sequence_idx'],
            group_idx=exp_info['group_idx'],
            batch_idx=exp_info['batch_idx']
        )
        cmd_decoding = {
            'x': x,
            'x_info': exp_info,
            'y_pred': y_pred,
            'spell_result': spell_result,
            'spell_result_per_seq': spell_result_per_seq
        }
        return cmd_decoding


[docs]class CSDModelEEGInception(ERPSpellerModel):
    """Control state detection model for ERP-based spellers model based on
    EEG-Inception, a powerful deep convolutional neural network specifically
    developed for BCI applications [1]. More information about this control
    state detection method can be found in [2].

    Dataset features:

    - Sample rate of the signals > 128 Hz. The model can handle recordings
        with different sample rates.
    - Recommended channels: ['Fz', 'Cz', 'Pz', 'P3', 'P4', 'PO7', 'PO8', 'Oz'].

    Processing pipeline:

    - Preprocessing:

        - IIR Filter (order=5, cutoff=(0.5, 45) Hz: unlike FIR filters, IIR
            filters are quick and can be applied in small signal chunks. Thus,
            they are the preferred method for frequency filter in online systems
        - Common average reference (CAR): widely used spatial filter that
            increases the signal-to-noise ratio of the ERPs.
    - Feature extraction:

        - Epochs (window=(0, 1000) ms, resampling to 128 HZ): the epochs of
            signal are extract for each stimulation. Baseline normalization
            is also applied, taking the window (-250, 0) ms relative to the
            stimulus onset.

    - Feature classification

        - EEG-Inception: convolutional neural network

    References
    ----------
    [1] Santamaría-Vázquez, E., Martínez-Cagigal, V., Vaquerizo-Villar, F., &
    Hornero, R. (2020). EEG-Inception: A Novel Deep Convolutional Neural Network
    for Assistive ERP-based Brain-Computer Interfaces. IEEE Transactions on
    Neural Systems and Rehabilitation Engineering.

    [2] Eduardo Santamaría-Vázquez, Víctor Martínez-Cagigal, Sergio
    Pérez-Velasco, Diego Marcos-Martínez, Roberto Hornero, Robust Asynchronous
    Control of ERP-Based Brain-Computer Interfaces using Deep Learning,
    Computer Methods and Programs in Biomedicine, vol. 215, Marzo, 2022
    """
[docs]    def __init__(self):
        super().__init__()

[docs]    def configure(self, cnn_n_cha=8, fine_tuning=False,
                  shuffle_before_fit=True, validation_split=0.2,
                  init_weights_path=None, gpu_acceleration=True):
        self.settings = {
            'cnn_n_cha': cnn_n_cha,
            'fine_tuning': fine_tuning,
            'shuffle_before_fit': shuffle_before_fit,
            'validation_split': validation_split,
            'init_weights_path': init_weights_path,
            'gpu_acceleration': gpu_acceleration
        }
        # Update state
        self.is_configured = True
        self.is_built = False
        self.is_fit = False

[docs]    def build(self):
        # Check errors
        if not self.is_configured:
            raise ValueError('Function configure must be called first!')
        # Only import deep learning models if necessary
        from medusa.deep_learning_models import EEGInceptionv1
        # Preprocessing (bandpass IIR filter [0, 10] Hz + CAR)
        self.add_method('prep_method',
                        StandardPreprocessing(cutoff=(0.5, 45)))
        # Feature extraction (epochs [0, 1000] ms + resampling to 128 Hz)
        self.add_method(
            'ext_method',
            StandardFeatureExtraction(target_fs=128,
                                      concatenate_channels=False)
        )
        # Feature classification
        clf = EEGInceptionv1(
            input_time=1000,
            fs=128,
            n_cha=self.settings['cnn_n_cha'],
            filters_per_branch=8,
            scales_time=(500, 250, 125),
            dropout_rate=0.25,
            activation='elu', n_classes=2,
            learning_rate=0.001,
            gpu_acceleration=self.settings['gpu_acceleration'])
        if self.settings['init_weights_path'] is not None:
            clf.load_weights(self.settings['init_weights_path'])
        self.add_method('clf_method', clf)
        # Update state
        self.is_built = True
        self.is_fit = False

[docs]    def fit_dataset(self, dataset, **kwargs):
        # Check errors
        if not self.is_built:
            raise ValueError('Function build must be called first!')
        if dataset.channel_set.n_cha != self.settings['cnn_n_cha']:
            raise ValueError('The number of channels of the model and the '
                             'dataset does not match!')
        # Preprocessing
        dataset = self.get_inst('prep_method').fit_transform_dataset(dataset)

        # Feature extraction
        x, x_info = self.get_inst('ext_method').transform_dataset(dataset)

        # Check errors
        if np.all(np.array(x_info['control_state_labels']) == 0) or \
            np.all(np.array(x_info['control_state_labels']) == 1):
            raise ValueError('The dataset does not contain examples of '
                             'different control states')
        # Classification
        self.get_inst('clf_method').fit(
            x, x_info['control_state_labels'],
            fine_tuning=self.settings['fine_tuning'],
            shuffle_before_fit=self.settings['shuffle_before_fit'],
            validation_split=self.settings['validation_split'],
            **kwargs)
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]

        # Control state detection
        csd_result, csd_result_per_seq, scores = detect_control_state(
            y_pred, x_info['run_idx'],
            x_info['trial_idx'],
            x_info['sequence_idx']
        )

        # Control state accuracy
        csd_acc_per_seq = control_state_detection_accuracy_per_seq(
            csd_result_per_seq,
            x_info['control_state_target']
        )

        csd_assessment = {
            'x': x,
            'x_info': x_info,
            'y_pred': y_pred,
            'control_state_result': csd_result,
            'control_state_result_per_seq': csd_result_per_seq,
            'control_state_acc_per_seq': csd_acc_per_seq,
        }
        # Save info
        self.channel_set = dataset.channel_set
        # Update state
        self.is_fit = True
        return csd_assessment

[docs]    def predict(self, times, signal, fs, channel_set, x_info, **kwargs):
        # Check errors
        if not self.is_fit:
            raise ValueError('Function fit_dataset must be called first!')
        # Check channel set
        if self.channel_set != channel_set:
            warnings.warn('The channel set is not the same that was used to '
                          'fit the model. Be careful!')
        # Preprocessing
        signal = self.get_inst('prep_method').fit_transform_signal(signal, fs)
        # Extract features
        x = self.get_inst('ext_method').transform_signal(times, signal, fs,
                                                         x_info['onsets'])
        # Classification
        y_pred = self.get_inst('clf_method').predict_proba(x)[:, 1]
        # Control state detection
        csd_result, csd_result_per_seq, __ = detect_control_state(
            y_pred, x_info['run_idx'],
            x_info['trial_idx'],
            x_info['sequence_idx']
        )

        cs_detection = {
            'x': x,
            'x_info': x_info,
            'y_pred': y_pred,
            'control_state_result': csd_result,
            'control_state_result_per_seq': csd_result_per_seq,
        }
        return cs_detection