Source code for medusa.stats

import numpy as np


[docs]
def get_confusion_matrix_stats(tp, tn, fp, fn):
    """ This function returns a collection of statistics given a confusion
    matrix. For more information about these statistics, refer to
    https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values.

    Parameters
    -------------
    tp : int
        Number of true positives
    tn : int
        Number of true negatives
    fp : int
        Number of false positives
    fn : int
        Number of false negatives

    Returns
    --------------
    dict()
        Dictionary that contains the following statistics:
            - "prevalence"
            - "accuracy"
            - "ba", i.e. balanced accuracy
            - "ppv", i.e. positive predictive value
            - "precision", same as PPV
            - "fdr", i.e. false discovery rate
            - "f1", i.e. F1 score
            - "for", i.e. false omission rate
            - "npv", i.e. negative predictive value
            - "fm", i.e. Fowlkes-Mallows index
            - "informedness"
            - "bm", i.e. bookmaker informedness (informedness)
            - "tpr", i.e. true positive rate
            - "sensitivity", i.e. same as TPR
            - "recall", i.e. same as TPR
            - "fpr", i.e. false positive rate (fall-out or type I error)
            - "lr+", positive likelihood ratio
            - "mk", i.e. markedness (deltaP)
            - "mcc", i.e. Matthews correlation coefficient
            - "pt", i.e. prevalence threshold
            - "fnr", i.e. false negative rate (miss rate or type II error)
            - "tnr", i.e. true negative rate (selectivity)
            - "specificity", i.e. same as TNR
            - "lr-", i.e. negative likelihood ratio
            - "dor", i.e. diagnostic odds ratio
            - "ts", i.e. threat score (Jaccard index)
            - "csi", i.e. critical success index (same as TS)
    """
    stats = dict()
    pp = tp + fp
    pn = fn + tn
    p = tp + fn
    n = fp + tn
    total = tp + tn + fp + fn

    stats["prevalence"] = p / (p + n)
    stats["accuracy"] = (tp + tn) / total
    stats["ppv"] = tp / pp
    stats["precision"] = stats["ppv"]
    stats["fdr"] = fp / pp
    stats["f1"] = 2 * tp / (2 * tp + fp + fn)
    stats["for"] = fn / pn
    stats["npv"] = tn / pn
    stats["tpr"] = tp / (tp + fn)
    stats["sensitivity"] = stats["tpr"]
    stats["recall"] = stats["tpr"]
    stats["fnr"] = fn / (tp + fn)
    stats["fpr"] = fp / (fp + tn)
    stats["tnr"] = tn / (fp + tn)
    stats["specificity"] = tn / (fp + tn)
    stats["lr+"] = stats["tpr"] / stats["fpr"]
    stats["lr-"] = stats["fnr"] / stats["tnr"]
    stats["dor"] = stats["lr+"] / stats["lr-"]
    stats["mk"] = stats["ppv"] + stats["npv"] - 1
    stats["informedness"] = stats["tpr"] + stats["tnr"] - 1
    stats["bm"] = stats["informedness"]
    stats["pt"] = (np.sqrt(stats["tpr"] * stats["fpr"]) - stats["fpr"]) / (
        stats["tpr"] - stats["fpr"])
    stats["ba"] = (stats["tpr"] + stats["tnr"]) / 2
    stats["fm"] = np.sqrt(stats["ppv"] * stats["tpr"])
    stats["mcc"] = np.sqrt(stats["tpr"] + stats["tnr"] + stats["ppv"] +
                           stats["npv"])
    stats["csi"] = tp / (tp + fn + fp)
    stats["ts"] = stats["csi"]
    return stats