Source code for lib5c.tools.trim
import argparse
from lib5c.tools.parents import primerfile_parser
[docs]def add_trim_tool(parser):
trim_parser = parser.add_parser(
'trim',
prog='lib5c trim',
help='primer and countsfile trimming',
parents=[primerfile_parser]
)
trim_parser.add_argument(
'-s', '--minimum_sum',
type=int,
default=100,
help='''The minumum sum of cis contacts that a primer must have to avoid
being trimmed. Pass 0 to skip this check. The default is 100.''')
trim_parser.add_argument(
'-f', '--minimum_fraction',
type=float,
default=0.5,
help='''The minimum fraction of nonzero cis contacts that a primer must
have to avoid being trimmed. Pass 0.0 to skip this check. The default is
0.5.''')
trim_parser.add_argument(
'-w', '--wipe_countsfiles',
type=str,
help='''If this flag is present, output countsfiles will be written here
with nan's at all removed positions. Pass a path containing one %%s,
which will be replaced with the replicate name.''')
trim_parser.add_argument(
'-t', '--trim_countsfiles',
type=str,
help='''If this flag is present, output countsfiles will be written here
with all trimmed positions removed. Pass a path containing one %%s,
which will be replaced with the replicate name.''')
trim_parser.add_argument(
'outfile',
type=str,
help='''Path to file to write trimmed primers to.''')
trim_parser.add_argument(
'countsfiles',
type=str,
nargs=argparse.REMAINDER,
help='''Countsfiles to use to determine which primers should be
trimmed.''')
trim_parser.set_defaults(func=trim_tool)
[docs]def trim_tool(parser, args):
import os
import glob
from lib5c.tools.helpers import resolve_primerfile
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.writers.counts import write_counts
from lib5c.writers.primers import write_primermap
from lib5c.algorithms.trimming import trim_primers, wipe_counts, trim_counts
# resolve primerfile
primerfile = resolve_primerfile(args.countsfiles, args.primerfile)
# expand infiles
expanded_infiles = []
for infile in args.countsfiles:
expanded_infiles.extend(glob.glob(infile.strip('\'"')))
# load counts
primermap = load_primermap(primerfile)
counts_superdict = {expanded_infile: load_counts(expanded_infile, primermap)
for expanded_infile in expanded_infiles}
# trim primers
trimmed_primermap, removed_indices = trim_primers(
primermap, counts_superdict, min_sum=args.minimum_sum,
min_frac=args.minimum_fraction)
# write trimmed primermap
write_primermap(trimmed_primermap, args.outfile)
# wipe countsfiles
if args.wipe_countsfiles is not None:
for expanded_infile in expanded_infiles:
# wipe counts
wiped_counts = wipe_counts(counts_superdict[expanded_infile],
removed_indices)
# deduce outfile
rep = os.path.splitext(os.path.split(expanded_infile)[1])[0]
outfile = args.wipe_countsfiles % rep
# write wiped output
write_counts(wiped_counts, outfile, primermap)
# trim countsfiles
if args.trim_countsfiles is not None:
for expanded_infile in expanded_infiles:
# trim counts
trimmed_counts = trim_counts(counts_superdict[expanded_infile],
removed_indices)
# deduce outfile
rep = os.path.splitext(os.path.split(expanded_infile)[1])[0]
outfile = args.trim_countsfiles % rep
# write wiped output
write_counts(trimmed_counts, outfile, trimmed_primermap)