Source code for lib5c.plotters.enrichment

"""
Module for plotting visualizations of enrichments of annotations within
categories of categorized loops.
"""

from __future__ import division

import numpy as np
from matplotlib import pyplot as plt

from lib5c.algorithms.enrichment import get_annotation_percentage_all, \
    get_fisher_exact_pvalue_all
from lib5c.util.plotting import plotter


[docs]@plotter def plot_looptype_vs_annotation_heatmap(annotationmaps, looping_classes, constant_annotation, loop_type_order=None, annotation_order=None, threshold=0, margin=1, vmin=-2.0, vmax=2.0, despine=False, style='dark', **kwargs): """ Plot a heatmap of enrichments for one fixed annotation, varying the loop category on the x-axis and the annotation on the other side on the y-axis. Parameters ---------- annotationmaps : dict of annotationmap A dict describing the annotations. In total, it should have the following structure:: { 'annotation_a_name': { 'region_1_name': list of int, 'region_2_name': list of int, ... }, 'annotation_b_name': { 'region_1_name': list of int, 'region_2_name': list of int, ... }, ... } where ``annotationmaps['annotation_a']['region_r']`` should be a list of ints describing the number of ``'annotation_a'``s present in each bin of ``'region_r'``. looping_classes : dict of np.ndarray with str dtype The keys should be region names as strings, the values should be square, symmetric arrays of the same size and shape as the indicated region, with string loop category names in the positions of categorized loops. constant_annotation : str The annotation to hold constant throughout the heatmap. loop_type_order : list of str The loop categories to include on the x-axis, in order. If None, falls back to the sorted unique categories in ``looping_classes``. annotation_order : list of str, optional The annotations to include on the y-axis, in order. If None, falls back to ``sorted(annotationmap.keys())``. threshold : int Bins are defined to contain an annotation if they are "hit" strictly more than ``threshold`` times by the annotation. margin : int A bin is defined to contain an annotation if any bin within ``margin`` bins is "hit" by the annotation. Corresponds to a "margin for error" in the intersection precision. vmin : float The lowest fold change to show on the colorbar. vmax : float The highest fold change to show on the colorbar. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ # resolve looping_classes if loop_type_order is None: loop_type_order = sorted(set(np.unique(np.concatenate( [looping_classes[region].flatten() for region in looping_classes]))) - {'', 'background'}) # resolve annotation_order if annotation_order is None: annotation_order = sorted(annotationmaps.keys()) # prepare array for imshow array = [] for i in range(len(annotation_order)): row = [] for j in range(len(loop_type_order)): selected_dict = get_annotation_percentage_all( annotation_order[i], constant_annotation, loop_type_order[j], annotationmaps, looping_classes, threshold=threshold, margin=margin) background_dict = get_annotation_percentage_all( annotation_order[i], constant_annotation, 'background', annotationmaps, looping_classes, threshold=threshold, margin=margin) if background_dict: fold_enrichment = selected_dict / float(background_dict) if fold_enrichment: row.append(np.log2(fold_enrichment)) else: row.append(np.log2(0.0001)) else: row.append(0) array.append(row) # prepare pvalue array p_values = [] for i in range(len(annotation_order)): row = [] for j in range(len(loop_type_order)): selected_dict = get_fisher_exact_pvalue_all( annotation_order[i], constant_annotation, loop_type_order[j], annotationmaps, looping_classes, threshold=threshold, margin=margin) row.append(selected_dict) p_values.append(row) # plot heatmap fig = plt.gcf() fig.set_size_inches(72 / 60.0 * len(annotation_order), 40 / 60.0 * len(annotation_order)) cmap = plt.get_cmap('bwr') im = plt.imshow(array, interpolation='none', cmap=cmap, origin='lower', vmin=vmin, vmax=vmax) for i in range(len(annotation_order)): for j in range(len(loop_type_order)): text = '' if p_values[i][j] >= 0.9: text = '1.0' elif p_values[i][j] >= 0.1: text = '0.%i' % (int(10 * p_values[i][j]) + 1) elif p_values[i][j] >= 0.01: if p_values[i][j] >= 0.09: text = '0.1' else: text = '0.0%i' % (int(100 * p_values[i][j]) + 1) elif p_values[i][j] == 0.0: text = '0.0' else: text = 'E-%i' % int(-np.log10(p_values[i][j])) plt.text(j, i, text, ha='center', va='center') plt.colorbar(im) plt.xticks(np.arange(len(loop_type_order)), loop_type_order, rotation=45, ha='right') plt.yticks(np.arange(len(annotation_order)), annotation_order)
[docs]@plotter def plot_annotation_vs_annotation_heatmap(annotationmaps, looping_classes, loop_type, axis_order=None, threshold=0, margin=1, vmin=-2.0, vmax=2.0, despine=False, style='dark', **kwargs): """ Plot a heatmap of enrichments for a fixed loop category, varying the annotation on either side on the x- and y-axes. Parameters ---------- annotationmaps : dict of annotationmap A dict describing the annotations. In total, it should have the following structure:: { 'annotation_a_name': { 'region_1_name': list of int, 'region_2_name': list of int, ... }, 'annotation_b_name': { 'region_1_name': list of int, 'region_2_name': list of int, ... }, ... } where ``annotationmaps['annotation_a']['region_r']`` should be a list of ints describing the number of ``'annotation_a'``s present in each bin of ``'region_r'``. looping_classes : dict of np.ndarray with str dtype The keys should be region names as strings, the values should be square, symmetric arrays of the same size and shape as the indicated region, with string loop category names in the positions of categorized loops. loop_type : str The loop category to hold constant throughout the heatmap. axis_order : list of str, optional The annotations to include on each axis, in order. If None, falls back to ``sorted(annotationmap.keys())``. threshold : int Bins are defined to contain an annotation if they are "hit" strictly more than ``threshold`` times by the annotation. margin : int A bin is defined to contain an annotation if any bin within ``margin`` bins is "hit" by the annotation. Corresponds to a "margin for error" in the intersection precision. vmin : float The lowest fold change to show on the colorbar. vmax : float The highest fold change to show on the colorbar. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ # resolve axis_order if axis_order is None: axis_order = sorted(annotationmaps.keys()) # prepare fold change array array = [] for i in range(len(axis_order)): row = [] for j in range(len(axis_order)): selected_dict = get_annotation_percentage_all( axis_order[i], axis_order[j], loop_type, annotationmaps, looping_classes, threshold=threshold, margin=margin) background_dict = get_annotation_percentage_all( axis_order[i], axis_order[j], 'background', annotationmaps, looping_classes, threshold=threshold, margin=margin) if background_dict: fold_enrichment = selected_dict / background_dict if fold_enrichment: row.append(np.log2(fold_enrichment)) else: row.append(np.log2(0.0001)) else: row.append(0) array.append(row) # prepare pvalue array p_values = [] for i in range(len(axis_order)): row = [] for j in range(len(axis_order)): selected_dict = get_fisher_exact_pvalue_all( axis_order[i], axis_order[j], loop_type, annotationmaps, looping_classes, threshold=threshold, margin=margin) row.append(selected_dict) p_values.append(row) # plot heatmap fig = plt.gcf() fig.set_size_inches(72 / 60.0 * len(axis_order), 40 / 60.0 * len(axis_order)) cmap = plt.get_cmap('bwr') im = plt.imshow(array, interpolation='none', cmap=cmap, origin='lower', vmin=vmin, vmax=vmax) for i in range(len(array)): for j in range(len(array)): text = '' if p_values[i][j] >= 0.9: text = '1.0' elif p_values[i][j] >= 0.1: text = '0.%i' % (int(10 * p_values[i][j]) + 1) elif p_values[i][j] >= 0.01: if p_values[i][j] >= 0.09: text = '0.1' else: text = '0.0%i' % (int(100 * p_values[i][j]) + 1) elif p_values[i][j] == 0.0: text = '0.0' else: text = 'E-%i' % int(-np.log10(p_values[i][j])) plt.text(j, i, text, ha='center', va='center') plt.colorbar(im) plt.xticks(np.arange(len(axis_order)), axis_order, rotation=45, ha='right') plt.yticks(np.arange(len(axis_order)), axis_order)
[docs]@plotter def plot_stack_bargraph(annotation_a, annotation_b, loop_types, labels, colors, annotationmaps, looping_classes, threshold=0, margin=1, **kwargs): """ Plots a bar graph with loop types arranged on the x-axis and the percentage of times ``annotation_a`` is interaction with ``annotation_b`` in all the loops of that loop type. Parameters ---------- annotation_a : str First annotation you are intereted in. annotation_b : str Second annotation you are interested in. loop_types : list of str The order in which to arrange the loop types along the x-axis, from left to right. If you exclude a loop type from this list, it will be excluded from the heatmap. labels : list of str The labels you want to be assigned on the x-axis to each of the loop types. The label order should correspond to the order of ``loop_types``. colors : list of valid matplotlib colors The colors to plot each bar with. The order should correspond to the order of ``loop_types``. annotationmaps : dict of annotationmap A dict describing the annotations. In total, it should have the following structure:: { 'annotation_a_name': { 'region_1_name': list of int, 'region_2_name': list of int, ... }, 'annotation_b_name': { 'region_1_name': list of int, 'region_2_name': list of int, ... }, ... } where ``annotationmaps['annotation_a']['region_r']`` should be a list of ints describing the number of ``'annotation_a'``s present in each bin of ``'region_r'``. looping_classes : dict of np.ndarray with str dtype The keys should be region names as strings, the values should be square, symmetric arrays of the same size and shape as the indicated region, with string loop category names in the positions of categorized loops. threshold : int Bins are defined to contain an annotation if they are "hit" strictly more than ``threshold`` times by the annotation. margin : int A bin is defined to contain an annotation if any bin within ``margin`` bins is "hit" by the annotation. Corresponds to a "margin for error" in the intersection precision. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ # extract data array_data = [100 * get_annotation_percentage_all( annotation_a, annotation_b, loop_type, annotationmaps, looping_classes, threshold=threshold, margin=margin) for loop_type in loop_types] # plot figure plt.clf() plt.figure(num=None, figsize=(5, 3.5), dpi=200, facecolor='w', edgecolor='w') xlocations = np.arange(len(array_data)) + 0.5 width = 0.5 plt.axhline(y=array_data[len(loop_types) - 1], linestyle='--', linewidth=3.0, color='#666666') plt.bar(xlocations, array_data, width=width, color=colors) plt.xticks(xlocations + width / 2, labels, fontsize=6) plt.xlim(0, xlocations[-1] + width * 2) plt.ylabel( 'Percentage with %s against %s' % (annotation_a, annotation_b), fontsize=5)