Source code for lib5c.plotters.fits

"""
Module for plotting visualizations comparing fitted theoretical distributions to
real data.
"""

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from lib5c.util.counts import flatten_obs_and_exp
from lib5c.util.donut import make_donut_selector
from lib5c.util.plotting import plotter


[docs]@plotter def plot_fit(data, frozen_dist, legend=True, **kwargs): """ Base function for plotting fits. Parameters ---------- data : np.ndarray The real data to be compared to the theoretical distribution. frozen_dist : scipy.stats.rv_frozen The theoretical distribution to be compared to the real data. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ # plot KDE of data try: sns.kdeplot(data, shade=True, color='r', label='data') except ZeroDivisionError: # when there isn't any data, the kdeplot fails with a ZeroDivisionError # we draw a line at zero from mu-2.5sigma to mu+2.5sigma labeled no data mean, variance = frozen_dist.stats(moments='mv') xmin = mean - 2.5 * np.sqrt(variance) xmax = mean + 2.5 * np.sqrt(variance) xvals = np.linspace(xmin, xmax, num=100) yvals = np.zeros_like(xvals) plt.plot(xvals, yvals, color='r', label='no data') plt.fill_between(xvals, 0, yvals, alpha=0.2, edgecolor='r', facecolor='r', linewidth=0) plt.xlim([xmin, xmax]) # get curve for the PDF of the frozen_dist xvals = np.linspace(*plt.xlim(), num=100) if hasattr(frozen_dist.dist, 'pdf'): yvals = frozen_dist.pdf(xvals) else: xvals = xvals.astype(int) yvals = frozen_dist.pmf(xvals) # plot and shade frozen_dist PDF curve plt.plot(xvals, yvals, color='b', label='fit') plt.fill_between(xvals, 0, yvals, alpha=0.2, edgecolor='b', facecolor='b', linewidth=0) # force recalculation of limits plt.gca().relim() plt.gca().autoscale()
[docs]def plot_group_fit(obs, exp, i, j, frozen_dist, local=False, p=5, w=15, group_fractional_tolerance=0.1, vst=False, log=False, legend=True, **kwargs): """ Convenience function to select a subset of some data and compare it to a frozen distribution via plot_fit(). Parameters ---------- obs : np.ndarray Regional matrix of the observed values. exp : np.ndarray Regional matrix of the expected values. i, j : int Row and column indices, respectively, of the target point. frozen_dist : scipy.stats.rv_frozen The theoretical distribution to be compared to the real data. local : bool Pass True to compare the theoretical distribution to observed data points in a donut window around the target point. Pass False to compare the theoretical distribution to observed data points with similar expected values to the target point. w : int The outer radius of the donut window to use when local=True. p : int The inner radius of the donut window to use when local=True. group_fractional_tolerance : float The fractional tolerance in expected value used to select points with "similar" expected values when local=False. vst : bool Pass True if a VST-style step has been performed upstream and the expected values should be interpreted as already logged. log : bool Pass True to log the selected observed data points before plotting. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ target_exp = exp[i, j] if local: idx = make_donut_selector(i, j, p, w, len(obs)) obs = obs[idx] exp = exp[idx] if vst: data = obs + target_exp - exp else: data = obs * target_exp / exp else: obs, exp = flatten_obs_and_exp(obs, exp) if vst: target_exp = np.exp(target_exp) - 1 exp = np.exp(exp) - 1 data = obs[ np.abs(target_exp - exp) / target_exp < group_fractional_tolerance] if log: data = np.log(data + 1) return plot_fit(data, frozen_dist, legend=legend, **kwargs)