Source code for lib5c.tools.remove

import argparse

from lib5c.tools.parents import primerfile_parser


[docs]def add_remove_tool(parser): remove_parser = parser.add_parser( 'remove', prog='lib5c remove', help='remove low-count primer-primer pairs', parents=[primerfile_parser] ) remove_parser.add_argument( '-t', '--threshold', type=float, default=5.0, help='''Sets the threshold. A rep passes the threshold if it is greater than or equal to this number. The default is 5.0.''') remove_parser.add_argument( '-n', '--num_reps', type=int, help='''Pass an int to make the condition for removal be that this many reps must fail the threshold. This overrides the -f/--fraction_reps flag.''') remove_parser.add_argument( '-f', '--fraction_reps', type=float, help='''Pass a fraction (between 0 and 1) as a float to make the condition for removal be that this fraction of the reps must fail the threshold.''') remove_parser.add_argument( '-A', '--all_reps', action='store_true', help='''Pass this flag to make the condition be that the sum across all replicates must clear the threshold. This is the default mode if niether -n/--num_reps nor -f/--fraction_reps is passed.''') remove_parser.add_argument( 'outfile_pattern', type=str, help='''Pattern describing where to write output countsfiles to. %%s will be replaced by the replicate name.''') remove_parser.add_argument( 'countsfiles', type=str, nargs=argparse.REMAINDER, help='''Countsfiles to use to determine which primer-primer pairs should be removed.''') remove_parser.set_defaults(func=remove_tool)
[docs]def remove_tool(parser, args): import glob from lib5c.tools.helpers import resolve_primerfile, infer_replicate_names from lib5c.parsers.primers import load_primermap from lib5c.parsers.counts import load_counts from lib5c.writers.counts import write_counts from lib5c.algorithms.outliers import remove_primer_primer_pairs # resolve primerfile primerfile = resolve_primerfile(args.countsfiles, args.primerfile) # expand infiles expanded_infiles = [] for infile in args.countsfiles: expanded_infiles.extend(glob.glob(infile.strip('\'"'))) # load counts print('loading counts') primermap = load_primermap(primerfile) counts_superdict = {expanded_infile: load_counts(expanded_infile, primermap) for expanded_infile in expanded_infiles} # remove primer-primer pairs print('removing low primer-primer pairs') remove_primer_primer_pairs( counts_superdict, primermap, threshold=args.threshold, num_reps=args.num_reps, fraction_reps=args.fraction_reps, all_reps=args.all_reps) # write output print('writing output') replicate_names = infer_replicate_names( expanded_infiles, pattern=args.countsfiles[0] if len(args.countsfiles) == 1 and '*' in args.countsfiles[0] else None) for i in range(len(expanded_infiles)): outfile = args.outfile_pattern % replicate_names[i] write_counts(counts_superdict[expanded_infiles[i]], outfile, primermap)