Source code for lib5c.plotters.distribution

"""
Module for plotting counts distributions.
"""

import numpy as np
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import seaborn as sns

from lib5c.util.counts import flatten_counts_to_list, flatten_regional_counts
from lib5c.util.parallelization import parallelize_regions
from lib5c.util.plotting import plotter


[docs]@plotter
def plot_global_distributions(counts_superdict, logged=True, drop_zeros=False,
                              shade=True, labels=None, levels=None, colors=None,
                              hue_order=None, **kwargs):
    """
    Plots overlayed global distributions for many replicates from a counts
    superdict.

    Parameters
    ----------
    counts_superdict : counts_superdict
        The data to plot distributions of.
    logged : bool
        Pass True to log the data before plotting.
    drop_zeros : bool
        Pass True to drop zeros from the distributions.
    shade : bool
        Pass True to fill in the area under the distribution curves.
    labels : dict or None
        Pass a dict mapping the keys of counts_superdict to labels for plotting,
        or pass None to use the keys of counts_superdict as the labels.
    levels : dict or None
        Pass a dict mapping labels to levels to color-code the replicates by
        level. Pass None to give each replicate its own level.
    colors : dict or None
        Pass a dict mapping levels to matplotlib colors to decide what color to
        plot each level with. Pass None to automatically choose colors.
    hue_order : list of str or None
        Pass a list of the level names to determine their order in the legend.
    kwargs : kwargs
        Typical plotter kwargs.

    Returns
    -------
    pyplot axis
        The axis plotted on.
    """
    # infer reps
    reps = list(counts_superdict.keys())

    # resolve labels
    if labels is None:
        labels = {rep: rep for rep in reps}

    # resolve levels
    if levels is None:
        levels = {label: label for label in labels.values()}

    # resolve colors
    if colors is None:
        unique_levels = list(set(levels.values()))
        palette = sns.color_palette('husl', len(unique_levels))
        colors = {unique_levels[i]: palette[i]
                  for i in range(len(unique_levels))}

    # resolve xlabel
    xlabel = 'Counts (log scale)' if logged else 'Counts'

    # prepare counts
    flattened_counts = {rep: flatten_counts_to_list(counts_superdict[rep],
                                                    discard_nan=True)
                        for rep in reps}

    # drop zeros
    if drop_zeros:
        flattened_counts = {
            rep: flattened_counts[rep][flattened_counts[rep] > 0]
            for rep in reps}

    # log
    if logged:
        flattened_counts = {rep: np.log(flattened_counts[rep] + 1)
                            for rep in reps}

    # plot
    for rep in reps:
        sns.kdeplot(
            flattened_counts[rep],
            shade=shade,
            color=colors[levels[labels[rep]]],
            label=labels[rep]
        )
    plt.ylabel('Probability density')
    plt.xlabel(xlabel)

    # add legend
    legend_labels = list(set(levels.values())) if hue_order is None \
        else hue_order
    legend_handles = [mpatches.Patch(color=colors[l]) for l in legend_labels]
    plt.legend(legend_handles, legend_labels, scatterpoints=1, loc='upper left',
               bbox_to_anchor=(1, 1.05))


[docs]@plotter
def plot_regional_distribtions(regional_counts_superdict, logged=True,
                               drop_zeros=False, shade=True, labels=None,
                               levels=None, colors=None, hue_order=None,
                               **kwargs):
    """
    Plots overlayed distributions for many replicates from a regional counts
    superdict.

    Parameters
    ----------
    regional_counts_superdict : counts_superdict
        The data to plot distributions of.
    logged : bool
        Pass True to log the data before plotting.
    drop_zeros : bool
        Pass True to drop zeros from the distributions.
    shade : bool
        Pass True to fill in the area under the distribution curves.
    labels : dict or None
        Pass a dict mapping the keys of counts_superdict to labels for plotting,
        or pass None to use the keys of counts_superdict as the labels.
    levels : dict or None
        Pass a dict mapping labels to levels to color-code the replicates by
        level. Pass None to give each replicate its own level.
    colors : dict or None
        Pass a dict mapping levels to matplotlib colors to decide what color to
        plot each level with. Pass None to automatically choose colors.
    hue_order : list of str or None
        Pass a list of the level names to determine their order in the legend.
    kwargs : kwargs
        Typical plotter kwargs.

    Returns
    -------
    pyplot axis
        The axis plotted on.
    """
    # infer reps
    reps = list(regional_counts_superdict.keys())

    # resolve labels
    if labels is None:
        labels = {rep: rep for rep in reps}

    # resolve levels
    if levels is None:
        levels = {label: label for label in labels.values()}

    # resolve colors
    if colors is None:
        unique_levels = list(set(levels.values()))
        palette = sns.color_palette('husl', len(unique_levels))
        colors = {unique_levels[i]: palette[i]
                  for i in range(len(unique_levels))}

    # resolve xlabel
    xlabel = 'Counts (log scale)' if logged else 'Counts'

    # prepare counts
    flattened_counts = {
        rep: flatten_regional_counts(regional_counts_superdict[rep],
                                     discard_nan=True)
        for rep in reps}

    # drop zeros
    if drop_zeros:
        flattened_counts = {
            rep: flattened_counts[rep][flattened_counts[rep] > 0]
            for rep in reps}

    # log
    if logged:
        flattened_counts = {rep: np.log(flattened_counts[rep] + 1)
                            for rep in reps}

    # plot
    for rep in reps:
        sns.kdeplot(
            flattened_counts[rep],
            shade=shade,
            color=colors[levels[labels[rep]]],
            label=labels[rep]
        )
    plt.legend()
    plt.ylabel('Probability density')
    plt.xlabel(xlabel)

    # add legend
    legend_labels = list(set(levels.values())) if hue_order is None \
        else hue_order
    legend_handles = [mpatches.Patch(color=colors[l]) for l in legend_labels]
    plt.legend(legend_handles, legend_labels, scatterpoints=1, loc='upper left',
               bbox_to_anchor=(1, 1.05))


plot_regional_distribtions_parallel = parallelize_regions(
    plot_regional_distribtions)