Source code for lib5c.plotters.heatmap

"""
Module providing ``plot_heatmap()``, a wrapper function for the extendable
heatmap system defined in the ``lib5c.plotters.extendable`` module.
"""

import os
import itertools
import warnings

from lib5c.parsers.bed import load_features
from lib5c.plotters.extendable import ExtendableHeatmap
from lib5c.util.plotting import plotter
from lib5c.util.parallelization import parallelize_regions
from lib5c.contrib.pybigwig.bigwig import bigwig_avail, BigWig


[docs]@parallelize_regions @plotter def plot_heatmap(matrix, grange_x, grange_y=None, rulers=True, ruler_fontsize=7, genes=None, gene_colors=None, colorscale=None, colorbar=False, colormap='abs_obs', motif_tracks=None, motif_track_colors=None, motif_track_labels=None, motif_linewidth=0.5, bed_tracks=None, bed_track_colors=None, bed_track_labels=None, chipseq_tracks=None, snp_tracks=None, snp_colors=None, snp_track_labels=None, chipseq_track_scales=None, chipseq_track_colors=None, chipseq_track_labels=None, domains=None, domain_color='g', clusters=None, cluster_colors=None, dpi=800, despine=False, style=None, **kwargs): """ Wrapper function for creating ExtendableHeatmaps. Parameters ---------- matrix : np.ndarray The matrix to plot in the heatmap. grange_x : dict or list of dict The genomic range represented by the x-axis of this heatmap. The dict should have the form:: { 'chrom': str, 'start': int, 'end': int } Pass a list of dicts of this form (assumed to be sorted) to assume that the genomic range extends from the start of the first range to the end of the last range. grange_y : dict, optional The genomic range represented by the y-axis of this heatmap. If None, the heatmap is assumed to be symmetric. rulers : bool Pass True to include genomic coordinate rulers on the heatmap. ruler_fontsize : int Controls the fontsize for the ruler when ``rulers`` is True. genes : str or list of dict Pass None to skip plotting gene tracks. Pass one of 'mm9', 'mm10', 'hg18', 'hg19', or 'hg38' to add gene tracks for the selected reference genome. To plot a custom set of genes, pass a list of dicts of the form:: { 'chrom' : str, 'start' : int, 'end' : int, 'name' : str, 'strand': '+' or '-', 'blocks': list of dicts } Blocks typically represent exons and are represented as dicts with the following structure:: { 'start': int, 'end' : int } gene_colors : dict, optional Pass a dict mapping gene names or ID's as strings to valid matplotlib colors to plot specific genes in the specified colors. colorscale : tuple of float, optional Specify the range of the heatmap colorbar as a tuple of the form (min, max). colorbar : bool Pass True to include a colorbar. colormap : str Specify the colormap to use. motif_tracks : list of str, optional Pass file references to bed files to add to the heatmap as motif tracks. motif_track_colors : dict, optional Map from strand value (e.g., '+', '-') to color name for motifs with that strand value (i.e., orientation). If not provided for a given strand, color is 'k' by default. motif_track_labels : list of str, optional Parallel to ``motif_tracks``, the ith string will be used to label the ith motif track. motif_linewidth : float Pass a linewidth to use when drawing motif instances. bed_tracks : list of str, optional Pass file references to bed files to add to the heatmap as bed tracks. bed_track_colors : dict, optional Map from strand value (e.g., '+', '-') to color name for bed features with that strand value (i.e., orientation). If not provided for a given strand, color is 'k' by default. bed_track_labels : Parallel to ``bed_tracks``, the ith string will be used to label the ith bed track. snp_tracks : list of str, optional Pass file references to bed files to add to the heatmap as SNP tracks. snp_colors : dict, optional Map from SNP id's or names to colors. If not provided for a given SNP, color is 'k' by default. snp_track_labels : list of str, optional Parallel to ``snp_tracks``, the ith string will be used to label the ith SNP track. chipseq_tracks : list of str or list of lists of dicts, optional Pass file references to bed, bedgraph, or bigwig files to add to the heatmap as chipseq/feature tracks. Alternatively, pass a list of feature lists where each feature is a dict with the form:: { 'chrom': str, 'start': int, 'end': int, 'value': float } where the 'value' key is optional. chipseq_track_scales : list of tuples of float, optional Parallel to ``chipseq_tracks``, the ith tuple should have the form (min, max) and should specify the axis limits of the ith track. Pass None to scale chipseq tracks automatically. chipseq_track_colors : list of str, optional Parallel to ``chipseq_tracks``, the ith string should indicate the color of the ith chipseq track. Pass None to color all chipseq tracks black. chipseq_track_labels : list of str, optional Parallel to ``chipseq_tracks``, the ith string will be used to label the ith chipseq track. domains : list of dict, optional Each dict should represent one domain and should have the form:: { 'chrom': str, 'start': int, 'end': int } domain_color : str The color to use to outline the domains. clusters : list of lists of dicts, optional Each inner list should describe one cluster to be outlined. Each cluster is a list of dicts of the form:: { 'x': int, 'y': int } where these integers represent indices of ``matrix``. cluster_colors : list of str, optional Parallel to ``clusters``, the ith string should indicate the color to use to outline the ith cluster. Pass None to outline all clusters in green. kwargs : kwargs Typical plotter kwargs. Returns ------- lib5c.plotters.extendable.ExtendableHeatmap The resulting ExtendableHeatmap. """ # handle case where grange_x is a list if type(grange_x) == list: grange_x = {'chrom': grange_x[0]['chrom'], 'start': grange_x[0]['start'], 'end': grange_x[-1]['end']} # handle case where grange_y is None if grange_y is None: grange_y = grange_x # make the basic heatmap object h = ExtendableHeatmap( array=matrix, grange_x=grange_x, grange_y=grange_y, colorscale=colorscale, colormap=colormap ) # add rulers if rulers: h.add_rulers(fontsize=ruler_fontsize) # add snp tracks if snp_tracks is not None: padding = itertools.chain((0.1,), itertools.repeat(0.0)) snps = [load_features(x) for x in snp_tracks] snp_names = [os.path.splitext(os.path.basename(x))[0] for x in snp_tracks] for i, (snp_name, snp_track) in enumerate(zip(snp_names, snps)): track_label = snp_track_labels[i] \ if snp_track_labels is not None else None h.add_snp_tracks(snp_track[grange_x['chrom']], name=snp_name, pad=next(padding), colors=snp_colors, track_label=track_label) # add gene tracks if genes is not None: if genes in ['mm9', 'mm10', 'hg18', 'hg19', 'hg38']: h.add_refgene_stacks(genes, colors=gene_colors) else: h.add_gene_stacks(genes, colors=gene_colors) # add chipseq tracks if chipseq_tracks is not None: # set default scales and colors if necessary if chipseq_track_scales is None: chipseq_track_scales = [None] * len(chipseq_tracks) if chipseq_track_colors is None: chipseq_track_colors = ['k'] * len(chipseq_tracks) # plot each track for i in range(len(chipseq_tracks)): # get a feature set based on the file extension ext = os.path.splitext(chipseq_tracks[i])[1].lower() if ext in ['.bw', '.bigwig']: if not bigwig_avail(): warnings.warn('failed to import pyBigWig - ' 'is it installed?', ImportWarning) continue features_x = BigWig(chipseq_tracks[i]).query( grange_x, num_bins=1000) features_y = BigWig(chipseq_tracks[i]).query( grange_y, num_bins=1000) else: features_x = load_features( chipseq_tracks[i], boundaries=[grange_x])[grange_x['chrom']] features_y = load_features( chipseq_tracks[i], boundaries=[grange_y])[grange_y['chrom']] track_label = chipseq_track_labels[i] \ if chipseq_track_labels is not None else None h.add_chipseq_track( features_x, loc='bottom', name=chipseq_tracks[i], axis_limits=chipseq_track_scales[i], color=chipseq_track_colors[i], track_label=track_label) h.add_chipseq_track( features_y, loc='left', name=chipseq_tracks[i], axis_limits=chipseq_track_scales[i], color=chipseq_track_colors[i], track_label=track_label) # add motif tracks if motif_tracks is not None: motifs = [load_features(x) for x in motif_tracks] motif_names = [os.path.splitext(os.path.basename(x))[0] for x in motif_tracks] for i, (motif_name, motif) in enumerate(zip(motif_names, motifs)): track_label = motif_track_labels[i] \ if motif_track_labels is not None else None h.add_motif_tracks(motif[grange_x['chrom']], name=motif_name, motif_linewidth=motif_linewidth, colors=motif_track_colors, track_label=track_label) # add bed tracks if bed_tracks is not None: bed_features = [load_features(x) for x in bed_tracks] bed_names = [os.path.splitext(os.path.basename(x))[0] for x in bed_tracks] for i, (bed_name, bed_feature) in enumerate(zip(bed_names, bed_features)): track_label = bed_track_labels[i] \ if bed_track_labels is not None else None h.add_bed_tracks(bed_feature[grange_x['chrom']], name=bed_name, colors=bed_track_colors, track_label=track_label) # add domain outlines if domains is not None: h.outline_domains(domains, color=domain_color) # add cluster outlines if clusters is not None: if cluster_colors is None: cluster_colors = ['g'] * len(clusters) for i in range(len(clusters)): h.outline_cluster(clusters[i], cluster_colors[i]) # add colorbar if colorbar: h.add_colorbar() return h