Source code for lib5c.tools.bias_heatmap

from lib5c.tools.parents import level_parser, simple_in_out_parser, \
    parallelization_parser, primerfile_parser


[docs]def add_bias_heatmap_tool(parser):
    bias_heatmap_parser = parser.add_parser(
        'bias-heatmap',
        prog='lib5c plot bias-heatmap',
        help='plot bias heatmap',
        parents=[primerfile_parser, level_parser, simple_in_out_parser,
                 parallelization_parser]
    )
    bias_heatmap_parser.add_argument(
        'bias_factor',
        type=str,
        help='''Specify which bias factor to plot heatmaps for. The bias factor
        must match the header label of a column in the primer bedfile, or it
        must be a reference to a bed file containing bias information.''')
    bias_heatmap_parser.add_argument(
        '-D', '--divide_by_length',
        action='store_true',
        help='''If bias_factor is a bed file, divide the number of intersections
        by the length of the fragment in kb.''')
    bias_heatmap_parser.add_argument(
        '-U', '--unlog',
        action='store_true',
        help='''Show the bias on an unlogged fold change scale. By default it
        will be shown on a log2 fold change scale.''')
    bias_heatmap_parser.add_argument(
        '-A', '--asymmetric',
        action='store_true',
        help='''Pass this flag to allow the heatmap to be asymmetric by only
        considering the upper triangular entries of the contact matrices. By
        default, the heatmaps will be symmetric.''')
    bias_heatmap_parser.add_argument(
        '-n', '--num_bins',
        type=int,
        help='''Stratify the bias factor into this many bins with equal numbers
        of loci in each bin.''')
    bias_heatmap_parser.add_argument(
        '-b', '--bins',
        type=str,
        help='''Stratify the bias factor into bins based on these boundaries,
        which should be passed as a comma-separated list such as
        '(min,split1,split2,max)'.''')
    bias_heatmap_parser.add_argument(
        '-c', '--colormap',
        type=str,
        default='bias',
        help='''Specify the colormap to use. Special values include 'bias'. The
        default is 'bias'.''')
    bias_heatmap_parser.add_argument(
        '-s', '--scale',
        type=str,
        default='(-1.0,1.0)',
        help='''Specify the scale for the heatmap colorbar. The default is
        '(-1.0,1.0)'.''')
    bias_heatmap_parser.add_argument(
        '-m', '--midpoint',
        type=float,
        default=0.5,
        help='''Force a midpoint for the colormap, as a fraction of the range of
        the scale. The default is 0.5.''')
    bias_heatmap_parser.add_argument(
        '-r', '--region',
        type=str,
        help='''Only include counts from this region.''')
    bias_heatmap_parser.add_argument(
        '-e', '--expected_model',
        type=str,
        help='''Pass a string reference to a countsfile containing the expected
        model to be used. If this is not passed, a linear log-log fit will be
        used.''')
    bias_heatmap_parser.add_argument(
        '-Z', '--zero_inflated',
        action='store_true',
        help='''Pass this flag to treat the bias factor as "zero inflated",
        which will cause all the zero values to land in a dedicated "zero
        stratum" and allocate the remaining bins evenly among the positive
        data.''')
    bias_heatmap_parser.add_argument(
        '-Q', '--unique',
        action='store_true',
        help='''Pass this flag to ignore -b/--bins and -n/--num_bins and instead
        group each unique value of the bias factor into its own stratum.''')
    bias_heatmap_parser.set_defaults(func=bias_heatmap_tool)


[docs]def bias_heatmap_tool(parser, args):
    import os

    from lib5c.tools.helpers import resolve_parallel, resolve_primerfile, \
        resolve_level
    from lib5c.parsers.primers import load_primermap
    from lib5c.parsers.counts import load_counts
    from lib5c.parsers.bed import load_features
    from lib5c.util.bed import count_intersections
    from lib5c.plotters.colormaps import get_colormap
    from lib5c.plotters.bias_heatmaps import plot_bias_heatmap
    from lib5c.algorithms.expected import \
        make_poly_log_log_binned_expected_matrix, \
        make_poly_log_log_fragment_expected_matrix

    # resolve parallel and primerfile
    resolve_parallel(parser, args, subcommand='plot bias-heatmap')
    primerfile = resolve_primerfile(args.infile, args.primerfile)

    # load counts
    print('loading counts')
    primermap = load_primermap(primerfile)
    obs_counts = load_counts(args.infile, primermap)
    if args.expected_model:
        exp_counts = load_counts(args.expected_model, primermap)
    else:
        if resolve_level(primermap, args.level) == 'fragment':
            exp_counts = make_poly_log_log_fragment_expected_matrix(
                obs_counts, primermap)
        else:
            exp_counts = make_poly_log_log_binned_expected_matrix(obs_counts)

    print('preparing to plot')
    # parse vmin and vmax from args.scale
    vmin, vmax = map(float, args.scale.strip('()').split(','))

    # parse bins if passed
    bins = list(map(float, args.bins.strip('()').split(','))) \
        if args.bins is not None else None

    # resolve colormap
    cmap = get_colormap(args.colormap)

    # resolve log
    log = True
    if args.unlog:
        log = False

    # resolve bias_factor
    if os.path.exists(args.bias_factor):
        boundaries = [{'start': primermap[region][0]['start'],
                       'end'  : primermap[region][-1]['end'],
                       'chrom': primermap[region][0]['chrom']}
                      for region in primermap.keys()]
        features = load_features(args.bias_factor, boundaries=boundaries)
        for region in primermap.keys():
            for i in range(len(primermap[region])):
                primermap[region][i]['peaks'] = count_intersections(
                    primermap[region][i],
                    features[primermap[region][i]['chrom']])
                primermap[region][i]['peaks per kb'] = \
                    primermap[region][i]['peaks'] * 1000 / \
                    float(primermap[region][i]['length'])
        if args.divide_by_length:
            resolved_bias_factor = 'peaks per kb'
        else:
            resolved_bias_factor = 'peaks'
    else:
        resolved_bias_factor = args.bias_factor

    # plot heatmap
    print('plotting')
    plot_bias_heatmap(
        obs_counts, exp_counts, primermap, resolved_bias_factor,
        bins=bins, n_bins=args.num_bins, cmap=cmap, vmin=vmin, vmax=vmax,
        midpoint=args.midpoint, log=log, region=args.region,
        asymmetric=args.asymmetric, zero_inflated=args.zero_inflated,
        unique=args.unique, outfile=args.outfile)