Source code for lib5c.plotters.fits

"""
Module for plotting visualizations comparing fitted theoretical distributions to
real data.
"""

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from lib5c.util.counts import flatten_obs_and_exp
from lib5c.util.donut import make_donut_selector
from lib5c.util.plotting import plotter


[docs]@plotter
def plot_fit(data, frozen_dist, legend=True, **kwargs):
    """
    Base function for plotting fits.

    Parameters
    ----------
    data : np.ndarray
        The real data to be compared to the theoretical distribution.
    frozen_dist : scipy.stats.rv_frozen
        The theoretical distribution to be compared to the real data.
    kwargs : kwargs
        Typical plotter kwargs.

    Returns
    -------
    pyplot axis
        The axis plotted on.
    """
    # plot KDE of data
    try:
        sns.kdeplot(data, shade=True, color='r', label='data')
    except ZeroDivisionError:
        # when there isn't any data, the kdeplot fails with a ZeroDivisionError
        # we draw a line at zero from mu-2.5sigma to mu+2.5sigma labeled no data
        mean, variance = frozen_dist.stats(moments='mv')
        xmin = mean - 2.5 * np.sqrt(variance)
        xmax = mean + 2.5 * np.sqrt(variance)
        xvals = np.linspace(xmin, xmax, num=100)
        yvals = np.zeros_like(xvals)
        plt.plot(xvals, yvals, color='r', label='no data')
        plt.fill_between(xvals, 0, yvals, alpha=0.2, edgecolor='r',
                         facecolor='r', linewidth=0)
        plt.xlim([xmin, xmax])

    # get curve for the PDF of the frozen_dist
    xvals = np.linspace(*plt.xlim(), num=100)
    if hasattr(frozen_dist.dist, 'pdf'):
        yvals = frozen_dist.pdf(xvals)
    else:
        xvals = xvals.astype(int)
        yvals = frozen_dist.pmf(xvals)

    # plot and shade frozen_dist PDF curve
    plt.plot(xvals, yvals, color='b', label='fit')
    plt.fill_between(xvals, 0, yvals, alpha=0.2, edgecolor='b',
                     facecolor='b', linewidth=0)

    # force recalculation of limits
    plt.gca().relim()
    plt.gca().autoscale()


[docs]def plot_group_fit(obs, exp, i, j, frozen_dist, local=False, p=5, w=15,
                   group_fractional_tolerance=0.1, vst=False, log=False,
                   legend=True, **kwargs):
    """
    Convenience function to select a subset of some data and compare it to a
    frozen distribution via plot_fit().

    Parameters
    ----------
    obs : np.ndarray
        Regional matrix of the observed values.
    exp : np.ndarray
        Regional matrix of the expected values.
    i, j : int
        Row and column indices, respectively, of the target point.
    frozen_dist : scipy.stats.rv_frozen
        The theoretical distribution to be compared to the real data.
    local : bool
        Pass True to compare the theoretical distribution to observed data
        points in a donut window around the target point. Pass False to compare
        the theoretical distribution to observed data points with similar
        expected values to the target point.
    w : int
        The outer radius of the donut window to use when local=True.
    p : int
        The inner radius of the donut window to use when local=True.
    group_fractional_tolerance : float
        The fractional tolerance in expected value used to select points with
        "similar" expected values when local=False.
    vst : bool
        Pass True if a VST-style step has been performed upstream and the
        expected values should be interpreted as already logged.
    log : bool
        Pass True to log the selected observed data points before plotting.
    kwargs : kwargs
        Typical plotter kwargs.

    Returns
    -------
    pyplot axis
        The axis plotted on.
    """
    target_exp = exp[i, j]
    if local:
        idx = make_donut_selector(i, j, p, w, len(obs))
        obs = obs[idx]
        exp = exp[idx]
        if vst:
            data = obs + target_exp - exp
        else:
            data = obs * target_exp / exp
    else:
        obs, exp = flatten_obs_and_exp(obs, exp)
        if vst:
            target_exp = np.exp(target_exp) - 1
            exp = np.exp(exp) - 1
        data = obs[
            np.abs(target_exp - exp) / target_exp < group_fractional_tolerance]
    if log:
        data = np.log(data + 1)
    return plot_fit(data, frozen_dist, legend=legend, **kwargs)