Source code for lib5c.algorithms.correlation

"""
Module for computing correlations between 5C replicates.
"""

import numpy as np
from scipy.stats import spearmanr, pearsonr

from lib5c.util.counts_superdict import counts_superdict_to_matrix


[docs]def make_pairwise_correlation_matrix_from_counts_matrix(counts_matrix, correlation='pearson'): """ Computes a matrix of pairwise correlation coefficients among a set of 5C replicates. Parameters ---------- counts_matrix : np.ndarray The rows are replicates, the columns are FFLJs. correlation : {'pearson', 'spearman'} Controls which correlation will be used. Returns ------- np.ndarray The square, symmetric pairwise correlation matrix. """ # resolve correlation corr_fn = None if correlation == 'pearson': corr_fn = pearsonr elif correlation == 'spearman': corr_fn = spearmanr # compute matrix of pairwise correlation coefficients correlation_matrix = np.zeros((len(counts_matrix), len(counts_matrix))) for i in range(len(correlation_matrix)): for j in range(i + 1): if i == j: correlation_matrix[i, j] = 1.0 else: corr_value = corr_fn(counts_matrix[i], counts_matrix[j])[0] correlation_matrix[i, j] = corr_value correlation_matrix[j, i] = corr_value return correlation_matrix
[docs]def make_pairwise_correlation_matrix(counts_superdict, correlation='pearson', rep_order=None): """ Computes a matrix of pairwise correlation coefficients among a set of 5C replicates. Parameters ---------- counts_superdict : Dict[str, Dict[str, np.ndarray]] The keys to the outer dict are replicate names as strings. The values are standard "counts dicts" whose keys are region names as strings and whose values are square symmetric matrices of counts. correlation : {'pearson', 'spearman'} Controls which correlation will be used. rep_order : Optional[List[str]] Pass a list of strings to specify the order of the replicates in the rows and columns of the returned correlation matrix. If this kwarg is omitted the columns and rows of the returned correlation matrix will be arranged in the iteration order of the keys of ``counts_superdict``. Returns ------- np.ndarray The square, symmetric pairwise correlation matrix. """ # make a matrix of all the counts counts_matrix = counts_superdict_to_matrix( counts_superdict, rep_order=rep_order, discard_nan=True) return make_pairwise_correlation_matrix_from_counts_matrix( counts_matrix, correlation=correlation)