Source code for lib5c.tools.correlation
import argparse
from lib5c.tools.parents import primerfile_parser
[docs]def add_correlation_tool(parser):
correlation_parser = parser.add_parser(
'correlation',
prog='lib5c plot correlation',
help='compute pairwise correlation coefficients',
parents=[primerfile_parser]
)
correlation_parser.add_argument(
'-P', '--pearson',
action='store_true',
help='''Compute Pearson correlation coefficients.''')
correlation_parser.add_argument(
'-S', '--spearman',
action='store_true',
help='''Compute Spearman correlation coefficients.''')
# correlation_parser.add_argument(
# '-r', '--region',
# type=str,
# help='''If passed, compute correlations only for the specified
# region.''')
correlation_parser.add_argument(
'-c', '--csv',
type=str,
help='''Specify an output file to write output as csv. %%c will be
replaced with the correlation type.''')
correlation_parser.add_argument(
'-i', '--image',
type=str,
help='''Specify an output file to write output as image. %%c will be
replaced with the correlation type.''')
correlation_parser.add_argument(
'-s', '--scale',
type=str,
default=None,
help='''Specify a colorscale to use when writing output image by passing
a string of the form '(min, max)'.''')
correlation_parser.add_argument(
'-B', '--colorbar',
action='store_true',
help='''Add a colorbar to the plot. Ignored when -C/--cluster is
passed.''')
correlation_parser.add_argument(
'-C', '--cluster',
action='store_true',
help='''Cluster the samples when writing output image.''')
correlation_parser.add_argument(
'countsfiles',
type=str,
nargs=argparse.REMAINDER,
help='''Countsfiles to compute correlations between.''')
correlation_parser.set_defaults(func=correlation_tool)
[docs]def correlation_tool(parser, args):
import glob
from lib5c.tools.helpers import resolve_primerfile, infer_replicate_names
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.algorithms.correlation import make_pairwise_correlation_matrix
from lib5c.plotters.correlation import plot_correlation_matrix
from lib5c.writers.correlation import write_correlation_table
from lib5c.util.system import check_outdir
# resolve primerfile
primerfile = resolve_primerfile(args.countsfiles, args.primerfile)
# expand infiles
expanded_infiles = []
for infile in args.countsfiles:
expanded_infiles.extend(glob.glob(infile.strip('\'"')))
# resolve colorscale
colorscale = list(map(float, args.scale.strip('()').split(','))) \
if args.scale is not None else None
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
counts_superdict = {expanded_infile: load_counts(expanded_infile, primermap)
for expanded_infile in expanded_infiles}
# build correlation type list
correlation_types = []
if args.pearson:
correlation_types.append('pearson')
if args.spearman:
correlation_types.append('spearman')
# sort and prettify filenames for use as labels
rep_order = sorted(expanded_infiles)
labels = infer_replicate_names(
rep_order, pattern=args.countsfiles[0] if len(args.countsfiles) == 1
and '*' in args.countsfiles[0] else None)
# make matrices and write output
for correlation_type in correlation_types:
correlation_matrix = make_pairwise_correlation_matrix(
counts_superdict, correlation=correlation_type, rep_order=rep_order)
if args.image is not None:
outfile = args.image.replace('%c', correlation_type)
plot_correlation_matrix(
correlation_matrix, label_values=labels, cluster=args.cluster,
cbar=args.colorbar, colorscale=colorscale, outfile=outfile)
if args.csv is not None:
outfile = args.csv.replace('%c', correlation_type)
check_outdir(outfile)
write_correlation_table(correlation_matrix, outfile, labels=labels)
print('%s correlation matrix:' % correlation_type)
print(correlation_matrix)
print('')