"""
Module providing ``plot_heatmap()``, a wrapper function for the extendable
heatmap system defined in the ``lib5c.plotters.extendable`` module.
"""
import os
import itertools
import warnings
from lib5c.parsers.bed import load_features
from lib5c.plotters.extendable import ExtendableHeatmap
from lib5c.util.plotting import plotter
from lib5c.util.parallelization import parallelize_regions
from lib5c.contrib.pybigwig.bigwig import bigwig_avail, BigWig
[docs]@parallelize_regions
@plotter
def plot_heatmap(matrix, grange_x, grange_y=None, rulers=True, ruler_fontsize=7,
genes=None, gene_colors=None, colorscale=None, colorbar=False,
colormap='abs_obs', motif_tracks=None, motif_track_colors=None,
motif_track_labels=None, motif_linewidth=0.5, bed_tracks=None,
bed_track_colors=None, bed_track_labels=None,
chipseq_tracks=None, snp_tracks=None, snp_colors=None,
snp_track_labels=None, chipseq_track_scales=None,
chipseq_track_colors=None, chipseq_track_labels=None,
domains=None, domain_color='g', clusters=None,
cluster_colors=None, dpi=800, despine=False, style=None,
**kwargs):
"""
Wrapper function for creating ExtendableHeatmaps.
Parameters
----------
matrix : np.ndarray
The matrix to plot in the heatmap.
grange_x : dict or list of dict
The genomic range represented by the x-axis of this heatmap. The dict
should have the form::
{
'chrom': str,
'start': int,
'end': int
}
Pass a list of dicts of this form (assumed to be sorted) to assume that
the genomic range extends from the start of the first range to the end
of the last range.
grange_y : dict, optional
The genomic range represented by the y-axis of this heatmap. If None,
the heatmap is assumed to be symmetric.
rulers : bool
Pass True to include genomic coordinate rulers on the heatmap.
ruler_fontsize : int
Controls the fontsize for the ruler when ``rulers`` is True.
genes : str or list of dict
Pass None to skip plotting gene tracks. Pass one of 'mm9', 'mm10',
'hg18', 'hg19', or 'hg38' to add gene tracks for the selected reference
genome. To plot a custom set of genes, pass a list of dicts of the
form::
{
'chrom' : str,
'start' : int,
'end' : int,
'name' : str,
'strand': '+' or '-',
'blocks': list of dicts
}
Blocks typically represent exons and are represented as dicts with the
following structure::
{
'start': int,
'end' : int
}
gene_colors : dict, optional
Pass a dict mapping gene names or ID's as strings to valid matplotlib
colors to plot specific genes in the specified colors.
colorscale : tuple of float, optional
Specify the range of the heatmap colorbar as a tuple of the form
(min, max).
colorbar : bool
Pass True to include a colorbar.
colormap : str
Specify the colormap to use.
motif_tracks : list of str, optional
Pass file references to bed files to add to the heatmap as motif tracks.
motif_track_colors : dict, optional
Map from strand value (e.g., '+', '-') to color name for motifs with
that strand value (i.e., orientation). If not provided for a given
strand, color is 'k' by default.
motif_track_labels : list of str, optional
Parallel to ``motif_tracks``, the ith string will be used to label the
ith motif track.
motif_linewidth : float
Pass a linewidth to use when drawing motif instances.
bed_tracks : list of str, optional
Pass file references to bed files to add to the heatmap as bed tracks.
bed_track_colors : dict, optional
Map from strand value (e.g., '+', '-') to color name for bed features
with that strand value (i.e., orientation). If not provided for a given
strand, color is 'k' by default.
bed_track_labels : Parallel to ``bed_tracks``, the ith string will be used
to label the ith bed track.
snp_tracks : list of str, optional
Pass file references to bed files to add to the heatmap as SNP tracks.
snp_colors : dict, optional
Map from SNP id's or names to colors. If not provided for a given SNP,
color is 'k' by default.
snp_track_labels : list of str, optional
Parallel to ``snp_tracks``, the ith string will be used to label the ith
SNP track.
chipseq_tracks : list of str or list of lists of dicts, optional
Pass file references to bed, bedgraph, or bigwig files to add to the
heatmap as chipseq/feature tracks. Alternatively, pass a list of feature
lists where each feature is a dict with the form::
{
'chrom': str,
'start': int,
'end': int,
'value': float
}
where the 'value' key is optional.
chipseq_track_scales : list of tuples of float, optional
Parallel to ``chipseq_tracks``, the ith tuple should have the form
(min, max) and should specify the axis limits of the ith track. Pass
None to scale chipseq tracks automatically.
chipseq_track_colors : list of str, optional
Parallel to ``chipseq_tracks``, the ith string should indicate the color
of the ith chipseq track. Pass None to color all chipseq tracks black.
chipseq_track_labels : list of str, optional
Parallel to ``chipseq_tracks``, the ith string will be used to label the
ith chipseq track.
domains : list of dict, optional
Each dict should represent one domain and should have the form::
{
'chrom': str,
'start': int,
'end': int
}
domain_color : str
The color to use to outline the domains.
clusters : list of lists of dicts, optional
Each inner list should describe one cluster to be outlined. Each cluster
is a list of dicts of the form::
{
'x': int,
'y': int
}
where these integers represent indices of ``matrix``.
cluster_colors : list of str, optional
Parallel to ``clusters``, the ith string should indicate the color to
use to outline the ith cluster. Pass None to outline all clusters in
green.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
lib5c.plotters.extendable.ExtendableHeatmap
The resulting ExtendableHeatmap.
"""
# handle case where grange_x is a list
if type(grange_x) == list:
grange_x = {'chrom': grange_x[0]['chrom'],
'start': grange_x[0]['start'],
'end': grange_x[-1]['end']}
# handle case where grange_y is None
if grange_y is None:
grange_y = grange_x
# make the basic heatmap object
h = ExtendableHeatmap(
array=matrix,
grange_x=grange_x,
grange_y=grange_y,
colorscale=colorscale,
colormap=colormap
)
# add rulers
if rulers:
h.add_rulers(fontsize=ruler_fontsize)
# add snp tracks
if snp_tracks is not None:
padding = itertools.chain((0.1,), itertools.repeat(0.0))
snps = [load_features(x) for x in snp_tracks]
snp_names = [os.path.splitext(os.path.basename(x))[0]
for x in snp_tracks]
for i, (snp_name, snp_track) in enumerate(zip(snp_names, snps)):
track_label = snp_track_labels[i] \
if snp_track_labels is not None else None
h.add_snp_tracks(snp_track[grange_x['chrom']], name=snp_name,
pad=next(padding), colors=snp_colors,
track_label=track_label)
# add gene tracks
if genes is not None:
if genes in ['mm9', 'mm10', 'hg18', 'hg19', 'hg38']:
h.add_refgene_stacks(genes, colors=gene_colors)
else:
h.add_gene_stacks(genes, colors=gene_colors)
# add chipseq tracks
if chipseq_tracks is not None:
# set default scales and colors if necessary
if chipseq_track_scales is None:
chipseq_track_scales = [None] * len(chipseq_tracks)
if chipseq_track_colors is None:
chipseq_track_colors = ['k'] * len(chipseq_tracks)
# plot each track
for i in range(len(chipseq_tracks)):
# get a feature set based on the file extension
ext = os.path.splitext(chipseq_tracks[i])[1].lower()
if ext in ['.bw', '.bigwig']:
if not bigwig_avail():
warnings.warn('failed to import pyBigWig - '
'is it installed?', ImportWarning)
continue
features_x = BigWig(chipseq_tracks[i]).query(
grange_x, num_bins=1000)
features_y = BigWig(chipseq_tracks[i]).query(
grange_y, num_bins=1000)
else:
features_x = load_features(
chipseq_tracks[i], boundaries=[grange_x])[grange_x['chrom']]
features_y = load_features(
chipseq_tracks[i], boundaries=[grange_y])[grange_y['chrom']]
track_label = chipseq_track_labels[i] \
if chipseq_track_labels is not None else None
h.add_chipseq_track(
features_x, loc='bottom', name=chipseq_tracks[i],
axis_limits=chipseq_track_scales[i],
color=chipseq_track_colors[i], track_label=track_label)
h.add_chipseq_track(
features_y, loc='left', name=chipseq_tracks[i],
axis_limits=chipseq_track_scales[i],
color=chipseq_track_colors[i], track_label=track_label)
# add motif tracks
if motif_tracks is not None:
motifs = [load_features(x) for x in motif_tracks]
motif_names = [os.path.splitext(os.path.basename(x))[0]
for x in motif_tracks]
for i, (motif_name, motif) in enumerate(zip(motif_names, motifs)):
track_label = motif_track_labels[i] \
if motif_track_labels is not None else None
h.add_motif_tracks(motif[grange_x['chrom']], name=motif_name,
motif_linewidth=motif_linewidth,
colors=motif_track_colors,
track_label=track_label)
# add bed tracks
if bed_tracks is not None:
bed_features = [load_features(x) for x in bed_tracks]
bed_names = [os.path.splitext(os.path.basename(x))[0]
for x in bed_tracks]
for i, (bed_name, bed_feature) in enumerate(zip(bed_names,
bed_features)):
track_label = bed_track_labels[i] \
if bed_track_labels is not None else None
h.add_bed_tracks(bed_feature[grange_x['chrom']], name=bed_name,
colors=bed_track_colors, track_label=track_label)
# add domain outlines
if domains is not None:
h.outline_domains(domains, color=domain_color)
# add cluster outlines
if clusters is not None:
if cluster_colors is None:
cluster_colors = ['g'] * len(clusters)
for i in range(len(clusters)):
h.outline_cluster(clusters[i], cluster_colors[i])
# add colorbar
if colorbar:
h.add_colorbar()
return h