"""
Module for visualizing spline surfaces fit to 5C counts data.
"""
import warnings
import numpy as np
from lib5c.algorithms.expected import make_poly_log_log_fragment_expected_matrix
from lib5c.util.counts import parallel_divide_counts
[docs]def visualize_spline(counts_list, primermap, bias_factor, spline,
grid_points=10, sample_rate=100, log=True,
asymmetric=False):
"""
Open an interactive pyplot window showing a visualization of a spline
surface, overlayed over representative 5C counts data.
Parameters
----------
counts_list : List[Dict[str, np.ndarray]]
A list of counts dicts to use as data points to be compared to the
spline surface.
primermap : Dict[str, List[Dict[str, Any]]]
The primermap corresponding to the counts dicts in ``counts_list``.
bias_factor : str
The bias factor being plotted. This string must match metadata keys in
``primermap``. That is to say, if ``bias_list`` is ``['length']`` then
we expect ``primermap[region][i]['length']`` to be a number representing
the length of the ``i`` th fragment in the region specified by
``region``.
spline : scipy.interpolate.BivariateSpline
The spline object to visualize.
grid_points : int
The number of grid points to use when constructing the wireframe of the
surface represented by ``spline``.
sample_rate : int
Only every ``sample_rate`` th real-data point will be included in the
visualization to reduce computational load.
log : bool
Pass True to show counts on a log-scale axis.
asymmetric : bool
Pass True to iterate only over the upper-triangular elements of the
counts matrices, which can lead to asymmetric visualizations. By
default, the algorithm iterates over all elements of the counts
matrices, enforcing symmetry in thevisualizations but incurring some
redundancy in the actual counts information.
Notes
-----
The spline will be displayed in an interactive window via ``plt.show()``. If
your default matplotlib backend is not interactive, this function will try
to set your backend to TkAgg. If you prefer to use a different interactive
backend, set the ``MPLBACKEND`` environment variable before invoking Python.
"""
# interactive backend shenanigans
import matplotlib as mpl
if mpl.get_backend().lower() not in [x.lower()
for x in mpl.rcsetup.interactive_bk]:
with warnings.catch_warnings():
warnings.filterwarnings('error')
try:
mpl.use('TkAgg')
except UserWarning:
import matplotlib.pyplot as plt
plt.switch_backend('TkAgg')
from mpl_toolkits.mplot3d import axes3d # noqa: F401
import matplotlib.pyplot as plt
# deduce regions
regions = list(counts_list[0].keys())
# compute distance dependence
distance_dependences = [make_poly_log_log_fragment_expected_matrix(
counts, primermap) for counts in counts_list]
# divide by distance dependence
counts_list = [
parallel_divide_counts(counts_list[k], distance_dependences[k])
for k in range(len(counts_list))]
# construct arrays of actual data
x = []
y = []
z = []
for counts in counts_list:
for region in regions:
for i in range(len(counts[region])):
if asymmetric:
jrange = range(i + 1)
else:
jrange = range(len(counts[region]))
for j in jrange:
if np.isfinite(counts[region][i, j]):
x.append(primermap[region][i][bias_factor])
y.append(primermap[region][j][bias_factor])
if log:
z.append(np.log(counts[region][i, j] + 1))
else:
z.append(counts[region][i, j])
# construct arrays of values from spline
x_grid = np.linspace(min(x), max(x), grid_points)
y_grid = np.linspace(min(y), max(y), grid_points)
x_spline = np.zeros([len(x_grid), len(y_grid)])
y_spline = np.zeros([len(x_grid), len(y_grid)])
z_spline = np.zeros([len(x_grid), len(y_grid)])
for i in range(len(x_grid)):
for j in range(len(y_grid)):
x_spline[i, j] = x_grid[i]
y_spline[i, j] = y_grid[j]
z_spline[i, j] = spline.ev(x_grid[i], y_grid[j])
# plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x[::sample_rate], y[::sample_rate], z[::sample_rate])
ax.plot_wireframe(x_spline, y_spline, z_spline, rstride=1, cstride=1)
ax.set_zlim([0, max(z)])
plt.show()