Source code for lib5c.util.statistics

"""
Module containing utility functions related to statistical transformation,
correction, or combination of p-values.
"""

import numpy as np
import scipy.stats as stats
from statsmodels.stats.multitest import multipletests


[docs]def adjust_pvalues(pvalue_matrix, method='fdr_bh'): """ Performs multiple testing correction on a matrix of p-values. Parameters ---------- pvalue_matrix : np.ndarray The matrix of p-values, representing many tests that were performed. method : str Method used for testing and adjustment of pvalues. Can be either the full name or initial letters. Available methods are :: `bonferroni` : one-step correction `sidak` : one-step correction `holm-sidak` : step down method using Sidak adjustments `holm` : step-down method using Bonferroni adjustments `simes-hochberg` : step-up method (independent) `hommel` : closed method based on Simes tests (non-negative) `fdr_bh` : Benjamini/Hochberg (non-negative) `fdr_by` : Benjamini/Yekutieli (negative) `fdr_tsbh` : two stage fdr correction (non-negative) `fdr_tsbky` : two stage fdr correction (non-negative) Returns ------- np.ndarray The matrix of adjusted p-values. Same size and shape as pvalue_matrix. """ pvalue_matrix = np.asarray(pvalue_matrix) padj_matrix = np.zeros_like(pvalue_matrix) * np.nan padj_matrix[np.isfinite(pvalue_matrix)] = \ multipletests(pvalue_matrix[np.isfinite(pvalue_matrix)], 0.1, method=method)[1] return padj_matrix
[docs]def stouffer(pvalue_matrix, axis=None): """ Combines a matrix of p-values along a specified axis using Stouffer's Z-transform. Parameters ---------- pvalue_matrix : np.ndarray Matrix of p-values. axis : int, optional The axis to combine p-values along. Pass None to combine all of them. Returns ------- np.ndarray Array of combined p-values. Notes ----- Element-by-element, this should be equivalent to:: scipy.stats.combine_pvalues(pvalue_matrix, method='stouffer')[1] except that this function is vectorized. Examples -------- >>> from scipy.stats import combine_pvalues >>> stouffer([0.1, 0.1]) == combine_pvalues([0.1, 0.1], ... method='stouffer')[1] True >>> stouffer(np.array([[0.1, 0.1], ... [0.2, 0.2]]), axis=1) array([0.03496316, 0.11697758]) """ pvalue_matrix = np.asarray(pvalue_matrix) zscores = stats.norm.isf(pvalue_matrix) k = pvalue_matrix.size if axis is None else pvalue_matrix.shape[axis] scaled_zscore_sum = np.sum(zscores, axis=axis) / np.sqrt(k) return stats.norm.sf(scaled_zscore_sum)
[docs]def convert_to_two_tail(pvalue): """ Converts a one-tail p-value to a two-tail p-value. Only valid for continuous distributions (otherwise I think the PMF evaluation at the obs value needs to be taken into account when reversing the test). Array-safe, nan-safe, position-independent. Parameters ---------- pvalue : float The one-tail p-values to be folded into two-tail pvalues. Returns ------- float The two-tail p-values. """ idx = pvalue > 0.5 pvalue[idx] = 1 - pvalue[idx] return 2 * pvalue