Source code for lib5c.tools.expected

from lib5c.tools.parents import level_parser, simple_in_out_parser, \
    parallelization_parser, primerfile_parser


[docs]def add_expected_tool(parser): expected_parser = parser.add_parser( 'expected', prog='lib5c expected', help='compute expected models', parents=[primerfile_parser, level_parser, simple_in_out_parser, parallelization_parser] ) expected_parser.add_argument( '-G', '--global_expected', action='store_true', help='''Pass this flag to use a global 1-D distance expected instead of a regional one.''') expected_parser.add_argument( '-M', '--force_monotonic', action='store_true', help='''Force the 1-D distance dependence to be monotonic.''') expected_parser.add_argument( '-E', '--exclude_near_diagonal', action='store_true', help='''If -R/--regression or -L/--lowess is passed and the data is bin-level, pass this flag to exclude the first 1/3 of the distance scales from the fitting operation.''') group_prl = expected_parser.add_mutually_exclusive_group() group_prl.add_argument( '-P', '--powerlaw', action='store_true', help='''Fit a discrete power law distribution to distances counts are detected at. This ignores -t.''') group_prl.add_argument( '-R', '--regression', action='store_true', help='''Fit a log-log polynomial regression to the 1-D distance dependence. This forces -t both. The degree of the polynomial can be specified with the -d/--degree flag.''') group_prl.add_argument( '-L', '--lowess', action='store_true', help='''Pass this flag to lowess smooth the 1-D distance dependence.''') expected_parser.add_argument( '-d', '--degree', type=int, default=1, help='''This flag sets the degree of the polynomial used for regression when the -R flag is passed. The default is 1 for a linear fit.''') expected_parser.add_argument( '-f', '--smoothing_fraction', type=float, default=0.8, help='''The fraction of the data to use when lowess smoothing. The default is 0.8.''') expected_parser.add_argument( '-D', '--donut_correction', action='store_true', help='''Pass this flag to use the local donut correction factor.''') expected_parser.add_argument( '-w', '--donut_outer_size', type=int, default=15, help='''The outer size of the donut to use. The default is 15.''') expected_parser.add_argument( '-x', '--donut_inner_size', type=int, default=5, help='''The inner size of the donut to use. The default is 5.''') expected_parser.add_argument( '-m', '--minimum_donut_fraction', type=float, default=0.2, help='''The minimum fraction of the donut that must be occupied by real values. Default is 0.2.''') expected_parser.add_argument( '-e', '--min_exp', type=float, default=0.1, help='''When passing -D/--donut_correction, if the sum of the expected values under the donut footprint for a particular pixel is less than this value, the output at this pixel will be set to nan to avoid numerical instability related to division by small numbers. Default is 0.1.''') expected_parser.add_argument( '-O', '--log_donut', action='store_true', help='''When using the -D/--donut_correction flag, additionally pass this flag to perform the dount correction in log space.''') expected_parser.add_argument( '-X', '--max_with_lower_left', action='store_true', help='''When using the -D/--donut_correction flag, additionally pass this flag to use the max of the donut and the lower left as the local correction factor.''') expected_parser.add_argument( '-t', '--log_transform', type=str, choices=['both', 'counts', 'none', 'auto'], default='auto', help='''Determines which quantities should be logged when performing expected modeling. Pass 'both' to log both counts and distances. Pass 'counts' to log only the counts. Pass 'none' to log neither quantity. The default is 'auto', which uses a sane default that depends on the other parameters. This flag may be forced by other paramters.''') expected_parser.add_argument( '-o', '--plot_outfile', type=str, help='''Pass a string reference to a path to which to write visualizations of the 1-D expected model. This string should include one %%r, which will be replaced with the region name. A %%d in this string will be replaced with the output directory of the countsfiles.''') expected_parser.add_argument( '-K', '--kde', action='store_true', help='''Use a kernel density estimate instead of a scatterplot when plotting the visualization.''') expected_parser.add_argument( '-B', '--hexbin', action='store_true', help='''Use hexagonal bins instead of a scatterplot when plotting the visualization.''') expected_parser.add_argument( '-S', '--single_model', action='store_true', help='''Pass this flag to generate a single expected model for all infiles.''') expected_parser.set_defaults(func=expected_tool)
[docs]def expected_tool(parser, args): import os from lib5c.tools.helpers import resolve_parallel, resolve_level, \ resolve_primerfile from lib5c.parsers.primers import load_primermap from lib5c.parsers.counts import load_counts from lib5c.writers.counts import write_counts from lib5c.plotters.expected import plot_bin_expected, \ plot_fragment_expected from lib5c.algorithms.expected import make_expected_matrix, \ get_global_distance_expected # -S not implemented if args.single_model: raise NotImplementedError('single model not implemented yet') # resolve primerfile and parallel primerfile = resolve_primerfile(args.infile, args.primerfile) resolve_parallel(parser, args, subcommand='expected') print('loading counts') primermap = load_primermap(primerfile) counts = load_counts(args.infile, primermap) # resolve level resolved_level = resolve_level(primermap, args.level) # honor -G if args.global_expected: distance_expected = get_global_distance_expected( counts, primermap=primermap, level=resolved_level, powerlaw=args.powerlaw, regression=args.regression, degree=args.degree, lowess_smooth=args.lowess, lowess_frac=args.smoothing_fraction, log_transform=args.log_transform, exclude_near_diagonal=args.exclude_near_diagonal ) else: distance_expected = None # make expected expected_counts, distance_expected, distance_matrix = make_expected_matrix( counts, regional_primermap=primermap, level=resolved_level, powerlaw=args.powerlaw, regression=args.regression, degree=args.degree, lowess_smooth=args.lowess, lowess_frac=args.smoothing_fraction, log_transform=args.log_transform, monotonic=args.force_monotonic, donut=args.donut_correction, w=args.donut_outer_size, p=args.donut_inner_size, donut_frac=args.minimum_donut_fraction, min_exp=args.min_exp, log_donut=args.log_donut, max_donut_ll=args.max_with_lower_left, distance_expected=distance_expected, exclude_near_diagonal=args.exclude_near_diagonal ) # plot outfile if args.plot_outfile is not None: root_dir = os.path.split(args.outfile)[0] outfiles = {region: args.plot_outfile.replace(r'%r', region) .replace(r'%d', root_dir) for region in counts.keys()} if resolved_level == 'bin': plot_bin_expected( counts, distance_expected, title={region: '%s 1-D expected model' % region for region in counts.keys()}, kde=args.kde, hexbin=args.hexbin, outfile=outfiles ) else: plot_fragment_expected( counts, distance_expected, distance_matrix, title={region: '%s 1-D expected model' % region for region in counts.keys()}, kde=args.kde, hexbin=args.hexbin, outfile=outfiles ) # write counts write_counts(expected_counts, args.outfile, primermap)