Source code for lib5c.tools.variance
from lib5c.tools.parents import parallelization_parser, primerfile_parser, \
var_parser
[docs]def add_variance_tool(parser):
variance_parser = parser.add_parser(
'variance',
prog='lib5c variance',
help='estimate variances from obs values and exp models',
parents=[primerfile_parser, parallelization_parser, var_parser]
)
variance_parser.add_argument(
'observed',
type=str,
help='''File containing observed counts.''')
variance_parser.add_argument(
'expected',
type=str,
help='''File containing expected counts.''')
variance_parser.add_argument(
'outfile',
type=str,
help='''Path to file to write variance counts to.''')
variance_parser.add_argument(
'--x_unit',
type=str,
choices=['exp', 'dist'],
default='dist',
help='''X-unit of the variance relationship. Default is 'dist'.''')
variance_parser.add_argument(
'--y_unit',
type=str,
choices=['disp', 'var'],
default='disp',
help='''Y-unit of the variance relationship. Default is 'disp'.''')
variance_parser.add_argument(
'--logx',
action='store_true',
help='''Pass this flag to perform the variance relationship fitting on
the scale of log(x).''')
variance_parser.add_argument(
'--logy',
action='store_true',
help='''Pass this flag to perform the variance relationship fitting on
the scale of log(y).''')
variance_parser.add_argument(
'-R', '--regional',
action='store_true',
help='''Pass this to perform a separate variance estimation step for
each region.''')
variance_parser.set_defaults(func=variance_tool)
[docs]def variance_tool(parser, args):
import glob
from lib5c.tools.helpers import resolve_parallel, resolve_primerfile
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.algorithms.variance import estimate_variance
from lib5c.writers.counts import write_counts
# resolve primerfile
primerfile = resolve_primerfile(args.observed, args.primerfile)
# short-circuit to cross-replicate mode
if 'cross_rep' in args.source:
# expand infiles
expanded_infiles = glob.glob(args.observed)
if len(expanded_infiles) < 2:
raise ValueError('cross-replicate variance estimation requires at '
'least 2 replicates')
# resolve key rep
key_rep = None
if args.rep is not None:
for expanded_infile in expanded_infiles:
if args.rep in expanded_infile:
key_rep = expanded_infile
break
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
counts_superdict = {
expanded_infile: load_counts(expanded_infile, primermap)
for expanded_infile in expanded_infiles}
expected_counts = load_counts(args.expected, primermap)
# estimate variance
print('estimating variance')
variance = estimate_variance(
counts_superdict, expected_counts, key_rep=key_rep,
model=args.model, source=args.source, fitter=args.fitter,
fitter_agg=args.agg_fn, x_unit=args.x_unit, y_unit=args.y_unit,
logx=args.logx, logy=args.logy, min_disp=float(args.min_disp),
min_obs=args.min_obs, min_dist=args.min_dist,
regional=args.regional)
# write variance
print('writing variance')
write_counts(variance, args.outfile, primermap)
return
# resolve parallel
resolve_parallel(parser, args, subcommand='variance', key_arg='observed')
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
observed_counts = load_counts(args.observed, primermap)
expected_counts = load_counts(args.expected, primermap)
# estimate variance
variance = estimate_variance(
observed_counts, expected_counts, model=args.model,
source=args.source, fitter=args.fitter, fitter_agg=args.agg_fn,
x_unit=args.x_unit, y_unit=args.y_unit, logx=args.logx, logy=args.logy,
min_disp=float(args.min_disp), min_obs=args.min_obs,
min_dist=args.min_dist, regional=args.regional)
# write variance
print('writing variance')
write_counts(variance, args.outfile, primermap)