Source code for lib5c.operators.qnorm

from lib5c.operators.base import MultiInteractionMatrixOperator
from lib5c.operators.standardization import Standardizer
from lib5c.algorithms.qnorm import qnorm


[docs]class QuantileNormalizer(MultiInteractionMatrixOperator):
    """
    Operator for quantile normalizing InteractionMatrix objects.

    Attributes
    ----------
    tie : {'lowest', 'average'}
        How this QuantileNormalizer will resolve ties. If ``'lowest'``, it will
        set all tied entries to the value of the lowest rank. If ``'average'``,
        it will set all tied entries to the average value across the tied ranks.

    Notes
    -----
    This operator will first standardize the target InteractionMatrix objects,
    including propagation of nan's, if they have ``locusmap`` attributes
    defined. Otherwise, the target InteractionMatrix objects must be the same
    size.
    """

    def __init__(self, tie='lowest'):
        """
        Constructor. See class docstring for description of parameters.

        Parameters
        ----------
        tie : {'lowest', 'average'}
        """
        self.tie = tie

[docs]    def apply_inplace(self, targets, **kwargs):
        """
        Quantile normalizes the target InteractionMatrix objects.

        Parameters
        ----------
        targets : list of InteractionMatrix
            The InteractionMatrix objects to quantile normalize. These must
            either have ``locusmap`` attributes or be the same size.
        kwargs : other keyword arguments
            To be utilized by subclasses.

        Returns
        -------
        list of InteractionMatrix
            The standardized InteractionMatrix objects.

        Examples
        --------
        >>> import numpy as np
        >>> from lib5c.core.interactions import InteractionMatrix
        >>> from lib5c.operators.qnorm import QuantileNormalizer
        >>> q = QuantileNormalizer()
        >>> im1 = InteractionMatrix([[    5., np.nan,     3.],
        ...                          [np.nan,     2., np.nan],
        ...                          [    3., np.nan,     4.]])
        ...
        >>> im2 = InteractionMatrix([[    4., np.nan,     4.],
        ...                          [np.nan,     1., np.nan],
        ...                          [    4., np.nan,     2.]])
        ...
        >>> im3 = InteractionMatrix([[    3., np.nan,     6.],
        ...                          [np.nan,     4., np.nan],
        ...                          [    6., np.nan,     8.]])
        ...
        >>> results = q.apply([im1, im2, im3])
        >>> print(results[0])
        InteractionMatrix of size 3
        [[5.66666667        nan 3.        ]
         [       nan 2.                nan]
         [3.                nan 4.66666667]]
        >>> print(results[1])
        InteractionMatrix of size 3
        [[4.66666667        nan 4.66666667]
         [       nan 2.                nan]
         [4.66666667        nan 3.        ]]
        >>> print(results[2])
        InteractionMatrix of size 3
        [[2.                nan 4.66666667]
         [       nan 3.                nan]
         [4.66666667        nan 5.66666667]]

        >>> import numpy as np
        >>> from lib5c.core.interactions import InteractionMatrix
        >>> from lib5c.core.loci import Locus, LocusMap
        >>> from lib5c.operators.qnorm import QuantileNormalizer
        >>> q = QuantileNormalizer()
        >>> lm = LocusMap([
        ...     Locus('chr3', 34109023, 34113109),
        ...     Locus('chr3', 34113147, 34116141),
        ...     Locus('chr3', 87282063, 87285636),
        ...     Locus('chr3', 87285637, 87295935)
        ... ])
        ...
        >>> im1 = InteractionMatrix([[  0.,   5.,  10.,  15.],
        ...                          [  5.,  10.,  15.,  20.],
        ...                          [ 10.,  15.,  20.,  25.],
        ...                          [ 15.,  20.,  25.,  30.]], locusmap=lm)
        ...
        >>> im2 = InteractionMatrix([[    1., np.nan,    11.],
        ...                          [np.nan,    11.,    21.],
        ...                          [   11.,    21.,    16.]], locusmap=lm[:3])
        ...
        >>> results = q.apply([im1, im2])
        >>> print(results[0])
        InteractionMatrix of size 3
        [[ 0.5  nan 10.5]
         [ nan 10.5 15.5]
         [10.5 15.5 20.5]]
        Associated LocusMap:
        LocusMap comprising 3 loci
            Range: chr3:34109023-34113109 to chr3:87282063-87285636
        >>> print(results[1])
        InteractionMatrix of size 3
        [[ 0.5  nan 10.5]
         [ nan 10.5 20.5]
         [10.5 20.5 15.5]]
        Associated LocusMap:
        LocusMap comprising 3 loci
            Range: chr3:34109023-34113109 to chr3:87282063-87285636
        >>> results[0].print_log()
        InteractionMatrix created
        standardized with propagate_nan=True
        deleted locus at index 3
        qnormed with tie=lowest

        >>> import numpy as np
        >>> from lib5c.core.interactions import InteractionMatrix
        >>> from lib5c.operators.qnorm import QuantileNormalizer
        >>> from lib5c.core.loci import Locus, LocusMap
        >>> locus_list = [Locus('chr3', 34109023, 34113109, region='Sox2'),
        ...               Locus('chr3', 34113147, 34116141, region='Sox2'),
        ...               Locus('chr3', 87282063, 87285636, region='Nestin'),
        ...               Locus('chr3', 87285637, 87295935, region='Nestin')]
        ...
        >>> locus_map = LocusMap(locus_list)
        >>> X = np.arange(16, dtype=float).reshape((4, 4))
        >>> im1 = InteractionMatrix(X + X.T, locusmap=locus_map)
        >>> print(im1)
        InteractionMatrix of size 4
        [[ 0.  5. 10. 15.]
         [ 5. 10. 15. 20.]
         [10. 15. 20. 25.]
         [15. 20. 25. 30.]]
        Associated LocusMap:
        LocusMap comprising 4 loci
            Range: chr3:34109023-34113109 to chr3:87285637-87295935
            Regions: ['Sox2', 'Nestin']
        >>> im2 = InteractionMatrix((X + X.T) + 1, locusmap=locus_map)
        >>> print(im2)
        InteractionMatrix of size 4
        [[ 1.  6. 11. 16.]
         [ 6. 11. 16. 21.]
         [11. 16. 21. 26.]
         [16. 21. 26. 31.]]
        Associated LocusMap:
        LocusMap comprising 4 loci
            Range: chr3:34109023-34113109 to chr3:87285637-87295935
            Regions: ['Sox2', 'Nestin']
        >>> q = QuantileNormalizer()
        >>> results = q.apply_by_region([im1, im2])
        >>> print(results[0])
        InteractionMatrix of size 4
        [[  0.5   5.5   0.    0. ]
         [  5.5  10.5   0.    0. ]
         [  0.    0.   20.5  25.5]
         [  0.    0.   25.5  30.5]]
        Associated LocusMap:
        LocusMap comprising 4 loci
            Range: chr3:34109023-34113109 to chr3:87285637-87295935
            Regions: ['Sox2', 'Nestin']
        >>> print(results[1])
        InteractionMatrix of size 4
        [[  0.5   5.5   0.    0. ]
         [  5.5  10.5   0.    0. ]
         [  0.    0.   20.5  25.5]
         [  0.    0.   25.5  30.5]]
        Associated LocusMap:
        LocusMap comprising 4 loci
            Range: chr3:34109023-34113109 to chr3:87285637-87295935
            Regions: ['Sox2', 'Nestin']
        >>> results[0].print_log()
        InteractionMatrix created
        applying by region
        extracted region Sox2
        standardized with propagate_nan=True
        qnormed with tie=lowest
        done applying by region
        """
        # standardize targets
        if all(target.locusmap is not None for target in targets):
            s = Standardizer()
            targets = s.apply_inplace(targets)

        # flatten targets
        flattened_targets = {i: targets[i].flatten(discard_nan=False)
                             for i in range(len(targets))}

        # qnorm
        qnormed_flattened_targets = qnorm(flattened_targets, tie=self.tie)

        # regenerate InteractionMatrix objects
        for i in range(len(targets)):
            targets[i].unflatten(qnormed_flattened_targets[i])
            targets[i].log_event('qnormed with tie=%s' % self.tie)

        return targets