Source code for lib5c.tools.distribution
import argparse
from lib5c.tools.parents import lim_parser, primerfile_parser
[docs]def add_distribution_tool(parser):
distribution_parser = parser.add_parser(
'distribution',
prog='lib5c plot distribution',
help='plot distributions',
parents=[primerfile_parser, lim_parser]
)
distribution_parser.add_argument(
'-r', '--region',
type=str,
help='''Pass this flag to plot distributions for a specific region.''')
distribution_parser.add_argument(
'-R', '--regional',
action='store_true',
help='''Pass this flag to plot distributions for all regions separately.
%%r in the output filename will be''')
distribution_parser.add_argument(
'-Z', '--drop_zeros',
action='store_true',
help='''Pass this flag to plot only the positive part of the
distribution.''')
distribution_parser.add_argument(
'-s', '--separate_colors',
type=str,
help='''Specify a shell-quoted, comma-separated list of class names
(which must be substrings of the replicate names) to color-code the
output with. For example, 'ES,NPC'.''')
distribution_parser.add_argument(
'outfile',
type=str,
help='''Filename to draw plot to.''')
distribution_parser.add_argument(
'countsfiles',
type=str,
nargs=argparse.REMAINDER,
help='''Countsfiles to plot distributions for.''')
distribution_parser.set_defaults(func=distribtion_tool)
[docs]def distribtion_tool(parser, args):
import glob
from lib5c.tools.helpers import resolve_primerfile, \
infer_replicate_names, infer_level_mapping
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.plotters.distribution import plot_regional_distribtions, \
plot_global_distributions, plot_regional_distribtions_parallel
# resolve primerfile
primerfile = resolve_primerfile(args.countsfiles, args.primerfile)
# expand infiles
expanded_infiles = []
for infile in args.countsfiles:
expanded_infiles.extend(glob.glob(infile.strip('\'"')))
# resolve xlim and ylim
xlim = list(map(float, args.xlim.strip('()').split(','))) \
if args.xlim is not None else None
ylim = list(map(float, args.ylim.strip('()').split(','))) \
if args.ylim is not None else None
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
counts_superdict = {expanded_infile: load_counts(expanded_infile, primermap)
for expanded_infile in expanded_infiles}
# compute labels
rep_names = infer_replicate_names(
expanded_infiles, as_dict=True, pattern=args.countsfiles[0]
if len(args.countsfiles) == 1 and '*' in args.countsfiles[0] else None)
# determine levels
levels = None
if args.separate_colors is not None:
levels = infer_level_mapping(
list(rep_names.values()),
list(map(str.strip, args.separate_colors.split(',')))
)
# plot
if args.region is not None:
plot_regional_distribtions(
{infile: counts_superdict[infile][args.region]
for infile in expanded_infiles},
labels=rep_names,
levels=levels,
outfile=args.outfile,
xlim=xlim,
ylim=ylim
)
elif args.regional:
outfiles = {region: args.outfile.replace(r'%r', region)
for region in primermap.keys()}
plot_regional_distribtions_parallel(
{region: {infile: counts_superdict[infile][region]
for infile in expanded_infiles}
for region in primermap.keys()},
labels=rep_names,
levels=levels,
outfile=outfiles,
xlim=xlim,
ylim=ylim
)
else:
plot_global_distributions(
counts_superdict,
labels=rep_names,
levels=levels,
outfile=args.outfile,
xlim=xlim,
ylim=ylim
)