Source code for lib5c.operators.standardization

import numpy as np

from lib5c.operators.base import MultiInteractionMatrixOperator
from lib5c.core.loci import LocusMap


[docs]class Standardizer(MultiInteractionMatrixOperator):
    """
    Operator for standardizing InteractionMatrix objects. This process reduces
    all InteractionMatrix objects passed to the lowest common denominator of
    loci. In other words, loci that are not present in every InteractionMatrix
    object will be discarded from all InteractionMatrix objects.

    Attributes
    ----------
    propagate_nan : bool
        If True, nan values will be propagated across InteractionMatrix objects.

    Notes
    -----
    The InteractionMatrix objects supplied must have ``locusmap`` attributes.
    """

    def __init__(self, propagate_nan=True):
        """
        Constructor. See class docstring for description of parameters.

        Parameters
        ----------
        propagate_nan : bool
        """
        self.propoagate_nan = propagate_nan

[docs]    def apply_inplace(self, targets, **kwargs):
        """
        Apply the standardization operation to the target InteractionMatrix
        objects.

        Parameters
        ----------
        targets : list of InteractionMatrix
            The InteractionMatrix objects to standardize.
        kwargs : other keyword arguments
            To be utilized by subclasses.

        Returns
        -------
        list of InteractionMatrix
            The standardized InteractionMatrix objects.

        Examples
        --------
        >>> import numpy as np
        >>> from lib5c.core.interactions import InteractionMatrix
        >>> from lib5c.core.loci import Locus, LocusMap
        >>> from lib5c.operators.standardization import Standardizer
        >>> s = Standardizer()
        >>> lm = LocusMap([
        ...     Locus('chr3', 34109023, 34113109),
        ...     Locus('chr3', 34113147, 34116141),
        ...     Locus('chr3', 87282063, 87285636),
        ...     Locus('chr3', 87285637, 87295935)
        ... ])
        ...
        >>> im1 = InteractionMatrix([[  0.,   5.,  10.,  15.],
        ...                          [  5.,  10.,  15.,  20.],
        ...                          [ 10.,  15.,  20.,  25.],
        ...                          [ 15.,  20.,  25.,  30.]], locusmap=lm)
        ...
        >>> im2 = InteractionMatrix([[    1., np.nan,    11.],
        ...                          [np.nan,    11.,    16.],
        ...                          [   11.,    16.,    21.]], locusmap=lm[:3])
        ...
        >>> results = s.apply([im1, im2])
        >>> print(results[0])
        InteractionMatrix of size 3
        [[ 0. nan 10.]
         [nan 10. 15.]
         [10. 15. 20.]]
        Associated LocusMap:
        LocusMap comprising 3 loci
            Range: chr3:34109023-34113109 to chr3:87282063-87285636
        >>> print(results[1])
        InteractionMatrix of size 3
        [[ 1. nan 11.]
         [nan 11. 16.]
         [11. 16. 21.]]
        Associated LocusMap:
        LocusMap comprising 3 loci
            Range: chr3:34109023-34113109 to chr3:87282063-87285636
        >>> results[0].print_log()
        InteractionMatrix created
        standardized with propagate_nan=True
        deleted locus at index 3
        """
        # check for locusmap attributes
        for target in targets:
            if target.locusmap is None:
                raise ValueError('Target InteractionMatrix objects must possess'
                                 'locusmap attributes for standardization.')
        # log
        for target in targets:
            target.log_event('standardized with propagate_nan=%s' %
                             self.propoagate_nan)

        # determine total LocusMap
        total_locus_set = set()
        for target in targets:
            for locus in target.locusmap:
                total_locus_set.add(locus)
        total_locusmap = LocusMap(list(total_locus_set))

        # delete non-common loci
        for locus in total_locusmap:
            # information for this locus
            locus_hash = hash(locus)
            discard_flag = False

            # if the locus is missing from any target, set the flag
            for target in targets:
                if target.locusmap.get_index_by_hash(locus_hash) is None:
                    discard_flag = True
                    break

            # if the flag was set, delete this locus from all targets
            if discard_flag:
                for target in targets:
                    delete_index = target.locusmap.get_index_by_hash(locus_hash)
                    if delete_index is not None:
                        target.delete(delete_index)

        # honor propagate_nan
        if self.propoagate_nan:
            for i in range(targets[0].size()):
                for j in range(i+1):
                    # if this position is nan in any target, set the flag
                    nan_flag = False
                    for target in targets:
                        if not np.isfinite(target[i, j]):
                            nan_flag = True
                            break

                    # if the flag was set, set nan in all targets
                    if nan_flag:
                        for target in targets:
                            target[i, j] = np.nan

        return targets