Source code for lib5c.plotters.distance_dependence

"""
Module for plotting distance dependence curves.
"""

import numpy as np
import pandas as pd
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import seaborn as sns

from lib5c.structures.dataset import Dataset
from lib5c.util.parallelization import parallelize_regions
from lib5c.util.plotting import plotter


DEFAULT_BINS = np.arange(0, 620e3, 20e3)


[docs]@plotter def plot_distance_dependence(counts_superdict, primermap, region=None, bins=None, labels=None, levels=None, colors=None, hue_order=None, **kwargs): """ Plots distance dependence curves. Parameters ---------- counts_superdict : counts_superdict The data to plot curves for. primermap : primermap The primermap associated with the counts_superdict. region : str or None Pass None to combine distance dependence information across all regions. Pass a region name to plot only that region's distance dependence curve. bins : list of numeric Bins to use to stratify interactions into distance groups. Should be in units of basepairs. labels : dict or None Pass a dict mapping the keys of counts_superdict to labels for plotting, or pass None to use the keys of counts_superdict as the labels. levels : dict or None Pass a dict mapping labels to levels to color-code the replicates by level. Pass None to give each replicate its own level. colors : dict or None Pass a dict mapping levels to matplotlib colors to decide what color to plot each level with. Pass None to automatically choose colors. hue_order : list of str or None Pass a list of the level names to determine their order in the legend. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ # resolve bins if bins is None: bins = DEFAULT_BINS # resolve labels if labels is None: labels = {rep: rep for rep in counts_superdict} # resolve levels if levels is None: levels = {label: label for label in labels.values()} # resolve colors if colors is None: unique_levels = list(set(levels.values())) palette = sns.color_palette('husl', len(unique_levels)) colors = {unique_levels[i]: palette[i] for i in range(len(unique_levels))} # compute palette (for seaborn functions) palette = {label: colors[levels[label]] for label in labels.values()} # make a Dataset d = Dataset.from_counts_superdict(counts_superdict, primermap) # slice region if appropriate if region is not None: d.df = d.df[d.df['region'] == region] # stratify on distance d.df['distance_stratum'] = pd.cut(d.df['distance'], bins) # reshape dataframe (stack replicates) stacked = pd.DataFrame(d.df['counts'].stack(), columns=['counts']) joined = stacked.join(d.df['distance_stratum']) joined.index.set_names('rep', level=1, inplace=True) joined.reset_index(inplace=True) # log counts joined['log_counts'] = np.log(joined['counts'] + 1) # honor labels and levels joined['rep'].replace(labels, inplace=True) # plot sns.pointplot(x='distance_stratum', y='log_counts', hue='rep', data=joined, ci=None, palette=palette, hue_order=hue_order) plt.xticks([-0.5, len(bins) - 1.5], [0, int(bins[len(bins) - 1] / 1e3)]) plt.xlabel('interaction distance (kb)') plt.ylabel('mean interaction frequency (log scale)') # add legend legend_labels = list(set(levels.values())) if hue_order is None \ else hue_order legend_handles = [mlines.Line2D([], [], color=colors[l], marker='.', markersize=15) for l in legend_labels] plt.legend(legend_handles, legend_labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plot_distance_dependence_parallel = parallelize_regions( plot_distance_dependence)