Source code for lib5c.tools.qnorm
import argparse
from lib5c.tools.parents import primerfile_parser
[docs]def add_qnorm_tool(parser):
qnorm_parser = parser.add_parser(
'qnorm',
prog='lib5c qnorm',
help='quantile normalization',
parents=[primerfile_parser]
)
qnorm_parser.add_argument(
'-A', '--average',
action='store_true',
help='''Pass this flag to set all tied entries to the average value
across the tied ranks. The default is to set all tied entries to the
value of the lowest rank.''')
qnorm_parser.add_argument(
'-R', '--regional',
action='store_true',
help='''Pass this flag to apply quantile normalization to each
region separately.''')
qnorm_parser.add_argument(
'-c', '--condition_on',
type=str,
help='''Specify a locus property to perform quantile normalization
conditioning on that property. Only works with -R/--regional.''')
qnorm_parser.add_argument(
'-r', '--reference',
type=str,
help='''Specify a countsfile or a replicate name to use as a reference
distribution.''')
qnorm_parser.add_argument(
'outfile_pattern',
type=str,
help='''Pattern to use to name output files. %%s will be replaced with
the replicate name, as guessed from the input files.''')
qnorm_parser.add_argument(
'countsfiles',
type=str,
nargs=argparse.REMAINDER,
help='''Countsfiles to quantile normalize.''')
qnorm_parser.set_defaults(func=qnorm_tool)
[docs]def qnorm_tool(parser, args):
import glob
from lib5c.tools.helpers import resolve_primerfile, infer_replicate_names
from lib5c.algorithms.qnorm import qnorm_counts_superdict
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.writers.counts import write_counts
# resolve primerfile
primerfile = resolve_primerfile(args.countsfiles, args.primerfile)
# expand infiles
expanded_infiles = []
for infile in args.countsfiles:
expanded_infiles.extend(glob.glob(infile.strip('\'"')))
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
counts_superdict = {infile: load_counts(infile, primermap)
for infile in expanded_infiles}
# resolve tie
resolved_tie = 'average' if args.average else 'lowest'
# resolve reference
if args.reference is None:
resolved_reference = None
else:
if args.reference not in counts_superdict:
# maybe it's a partial replicate name
partial_match = False
for infile in counts_superdict:
if args.reference in infile:
resolved_reference = infile
partial_match = True
break
if not partial_match:
# no partial match, try to load it as a new countsfile
counts_superdict[args.reference] = load_counts(args.reference,
primermap)
resolved_reference = args.reference
else:
resolved_reference = args.reference
# quantile normalize
print('quantile normalizing')
qnormed_counts_superdict = qnorm_counts_superdict(
counts_superdict,
primermap,
tie=resolved_tie,
regional=args.regional,
condition_on=args.condition_on,
reference=resolved_reference
)
# write counts
print('writing counts')
replicate_names = infer_replicate_names(
expanded_infiles, pattern=args.countsfiles[0]
if len(args.countsfiles) == 1 and '*' in args.countsfiles[0] else None)
outfiles = {expanded_infiles[i]:
args.outfile_pattern.replace(r'%s', replicate_names[i])
for i in range(len(expanded_infiles))}
for infile in expanded_infiles:
write_counts(qnormed_counts_superdict[infile], outfiles[infile],
primermap)