Source code for lib5c.algorithms.filtering.bin_bin_filtering

"""
Module for smoothing bin-level 5C interaction matrices.
"""

import numpy as np

from lib5c.algorithms.filtering.util import filter_selector
from lib5c.util.primers import guess_bin_step
from lib5c.util.parallelization import parallelize_regions


[docs]def find_nearby_bins(index, regional_pixelmap, threshold): """ Finds the bins near a target bin as specified by an index. Parameters ---------- index : int The index of the bin to look near. regional_pixelmap : List[Dict[str, Any]] The list of bins in this region. threshold : int The threshold for deciding if a bin is "nearby" or not, as a distance in base pairs. Returns ------- List[Dict[str, int]] A list of nearby bins, where each nearby bin is represented as a dict of the following form:: { 'index': int, 'distance': int } where 'index' is the index of the bin within the region and 'distance' is the distance between this bin and the target bin. """ # convert the threshold into units of bins bin_step = guess_bin_step(regional_pixelmap) bin_threshold = int((threshold - bin_step / 2) / bin_step) # a list of nearby bins to fill in nearby_bins = [{'index': index, 'distance': 0}] # fill in the nearby_bins list for i in range(index - bin_threshold, index + bin_threshold + 1): if 0 <= i < len(regional_pixelmap): nearby_bins.append({'index' : i, 'distance': abs(index - i) * bin_step}) return nearby_bins
[docs]@parallelize_regions def bin_bin_filter(array, filter_function, regional_pixelmap, threshold, filter_kwargs=None): """ Convenience function for filtering a bin-level matrix to a bin-level matrix. Parameters ---------- array : np.ndarray The matrix to filter. filter_function : Callable[[List[Dict[str, Any]]], float] The filter function to use when filtering. This function should take in a "neighborhood" and return the filtered value given that neighborhood. A neighborhood is represented as a list of "nearby points" where each nearby point is represented as a dict of the following form:: { 'value': float, 'x_dist': int, 'y_dist': int } where 'value' is the value at the point and 'x_dist' and 'y_dist' are its distances from the center of the neighborhood along the x- and y-axis, respectively, in base pairs. See ``lib5c.algorithms.filtering.filter_functions`` for examples of filter functions and how they can be created. regional_pixelmap : List[Dict[str, Any]] The list of bins in this region. threshold : int The threshold for defining the size of the neighborhood passed to the filter function, in base pairs. filter_kwargs : Optional[Dict[str, Any]] Kwargs to be passed to the ``filter_function``. Returns ------- np.ndarray The filtered matrix. """ # resolve function_kwargs if filter_kwargs is None: filter_kwargs = {} output = np.array(array) nearby_bins = [find_nearby_bins(i, regional_pixelmap, threshold) for i in range(len(output))] for i in range(len(output)): for j in range(i + 1): value = filter_function(filter_selector( array, nearby_bins[i], nearby_bins[j]), **filter_kwargs) output[i, j] = value output[j, i] = value return output
[docs]def bin_bin_filter_counts(counts, function, pixelmap, threshold, function_kwargs=None): """ Non-parallel wrapper for ``bin_bin_filter()``. Deprecated now that ``bin_bin_filter()`` is decorated with ``@parallelize_regions``. Parameters ---------- counts : Dict[str, np.ndarray] The counts dict to filter. function : Callable[[List[Dict[str, Any]]], float] The filter function to use for filtering. See the description of the ``filter_function`` arg in ``bin_bin_filter()``. pixelmap : Dict[str, List[Dict[str, Any]]] The pixelmap describing the bins for ``counts``. threshold : int The threshold for defining the size of the neighborhood passed to the filter function, in base pairs. function_kwargs : Optional[Dict[str, Any]] Kwargs to be passed to the ``function``. Returns ------- Dict[str, np.ndarray] The dict of filtered counts. """ return {region: bin_bin_filter( counts[region], function, pixelmap[region], threshold, filter_kwargs=function_kwargs) for region in counts.keys()}