Source code for lib5c.tools.dd_curve
import argparse
from lib5c.tools.parents import primerfile_parser
[docs]def add_dd_curve_tool(parser):
dd_curve_parser = parser.add_parser(
'dd-curve',
prog='lib5c plot dd-curve',
help='plot distance dependence curve',
parents=[primerfile_parser]
)
dd_curve_parser.add_argument(
'-r', '--region',
type=str,
help='''Pass this flag to plot distributions for a specific region.''')
dd_curve_parser.add_argument(
'-R', '--regional',
action='store_true',
help='''Pass this flag to plot distributions for all regions separately.
%%r in the output filename will be''')
dd_curve_parser.add_argument(
'-b', '--bins',
type=str,
help='''Pass a comma separated list of bin edges to override the default
distance stratification.'''
)
dd_curve_parser.add_argument(
'-y', '--ylim',
type=str,
help='''Pass a tuple '(min,max)' to force the y-limits of the plot.'''
)
dd_curve_parser.add_argument(
'-s', '--separate_colors',
type=str,
help='''Specify a shell-quoted, comma-separated list of class names
(which must be substrings of the replicate names) to color-code the
output with. For example, 'ES,NPC'.''')
dd_curve_parser.add_argument(
'outfile',
type=str,
help='''Filename to draw plot to.''')
dd_curve_parser.add_argument(
'countsfiles',
type=str,
nargs=argparse.REMAINDER,
help='''Countsfiles to plot distance dependence curves for.''')
dd_curve_parser.set_defaults(func=dd_curve_tool)
[docs]def dd_curve_tool(parser, args):
import glob
from lib5c.tools.helpers import resolve_primerfile, infer_replicate_names, \
infer_level_mapping
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.plotters.distance_dependence import plot_distance_dependence,\
plot_distance_dependence_parallel
# resolve primerfile
primerfile = resolve_primerfile(args.countsfiles, args.primerfile)
# expand infiles
expanded_infiles = []
for infile in args.countsfiles:
expanded_infiles.extend(glob.glob(infile.strip('\'"')))
# resolve ylim
ylim = list(map(float, args.ylim.strip('()').split(','))) \
if args.ylim is not None else None
# resolve bins
bins = list(map(float, args.bins.strip('()').split(','))) \
if args.bins is not None else None
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
counts_superdict = {expanded_infile: load_counts(expanded_infile, primermap)
for expanded_infile in expanded_infiles}
# establish reps and regions
reps = list(counts_superdict.keys())
regions = list(primermap.keys())
# compute labels
rep_names = infer_replicate_names(
expanded_infiles, as_dict=True, pattern=args.countsfiles[0]
if len(args.countsfiles) == 1 and '*' in args.countsfiles[0] else None)
# determine levels
levels = None
if args.separate_colors is not None:
levels = infer_level_mapping(
list(rep_names.values()),
list(map(str.strip, args.separate_colors.split(',')))
)
# plot
if args.region or not args.regional:
plot_distance_dependence(
counts_superdict,
primermap,
region=args.region,
bins=bins,
labels=rep_names,
levels=levels,
outfile=args.outfile,
ylim=ylim
)
else:
plot_distance_dependence_parallel(
{r: {rep: {r: counts_superdict[rep][r]} for rep in reps}
for r in regions},
{r: {r: primermap[r]} for r in regions},
region={r: r for r in regions},
bins=bins,
labels=rep_names,
levels=levels,
outfile={r: args.outfile.replace(r'%r', r) for r in regions},
ylim=ylim
)