Source code for lib5c.util.statistics

"""
Module containing utility functions related to statistical transformation,
correction, or combination of p-values.
"""

import numpy as np
import scipy.stats as stats
from statsmodels.stats.multitest import multipletests


[docs]def adjust_pvalues(pvalue_matrix, method='fdr_bh'):
    """
    Performs multiple testing correction on a matrix of p-values.

    Parameters
    ----------
    pvalue_matrix : np.ndarray
        The matrix of p-values, representing many tests that were performed.
    method : str
        Method used for testing and adjustment of pvalues. Can be either the
        full name or initial letters. Available methods are ::

        `bonferroni` : one-step correction
        `sidak` : one-step correction
        `holm-sidak` : step down method using Sidak adjustments
        `holm` : step-down method using Bonferroni adjustments
        `simes-hochberg` : step-up method  (independent)
        `hommel` : closed method based on Simes tests (non-negative)
        `fdr_bh` : Benjamini/Hochberg  (non-negative)
        `fdr_by` : Benjamini/Yekutieli (negative)
        `fdr_tsbh` : two stage fdr correction (non-negative)
        `fdr_tsbky` : two stage fdr correction (non-negative)

    Returns
    -------
    np.ndarray
        The matrix of adjusted p-values. Same size and shape as pvalue_matrix.
    """
    pvalue_matrix = np.asarray(pvalue_matrix)
    padj_matrix = np.zeros_like(pvalue_matrix) * np.nan
    padj_matrix[np.isfinite(pvalue_matrix)] = \
        multipletests(pvalue_matrix[np.isfinite(pvalue_matrix)], 0.1,
                      method=method)[1]
    return padj_matrix


[docs]def stouffer(pvalue_matrix, axis=None):
    """
    Combines a matrix of p-values along a specified axis using Stouffer's
    Z-transform.

    Parameters
    ----------
    pvalue_matrix : np.ndarray
        Matrix of p-values.
    axis : int, optional
        The axis to combine p-values along. Pass None to combine all of them.

    Returns
    -------
    np.ndarray
        Array of combined p-values.

    Notes
    -----
    Element-by-element, this should be equivalent to::

        scipy.stats.combine_pvalues(pvalue_matrix, method='stouffer')[1]

    except that this function is vectorized.

    Examples
    --------
    >>> from scipy.stats import combine_pvalues
    >>> stouffer([0.1, 0.1]) == combine_pvalues([0.1, 0.1],
    ...                                         method='stouffer')[1]
    True
    >>> stouffer(np.array([[0.1, 0.1],
    ...                    [0.2, 0.2]]), axis=1)
    array([0.03496316, 0.11697758])
    """
    pvalue_matrix = np.asarray(pvalue_matrix)
    zscores = stats.norm.isf(pvalue_matrix)
    k = pvalue_matrix.size if axis is None else pvalue_matrix.shape[axis]
    scaled_zscore_sum = np.sum(zscores, axis=axis) / np.sqrt(k)
    return stats.norm.sf(scaled_zscore_sum)


[docs]def convert_to_two_tail(pvalue):
    """
    Converts a one-tail p-value to a two-tail p-value.

    Only valid for continuous distributions (otherwise I think the PMF
    evaluation at the obs value needs to be taken into account when reversing
    the test).

    Array-safe, nan-safe, position-independent.

    Parameters
    ----------
    pvalue : float
        The one-tail p-values to be folded into two-tail pvalues.

    Returns
    -------
    float
        The two-tail p-values.
    """
    idx = pvalue > 0.5
    pvalue[idx] = 1 - pvalue[idx]
    return 2 * pvalue