import numpy as np
from lib5c.operators.base import InteractionMatrixOperator
[docs]class LocusTrimmer(InteractionMatrixOperator):
"""
Operator for removing Loci from an InteractionMatrix object according to
specified criteria.
Attributes
----------
sum_threshold_upper : float or None
If not None, Loci whose row sums are greater than this value will be
removed.
sum_threshold_lower : float or None
If not None, Loci whose row sums are less than this value will be
removed.
max_threshold : float or None
If not None, Loci containing at least one interaction above this value
will be removed.
min_threshold : float or None
If not None, Loci containing at least one interaction below this value
will be removed.
percentage_threshold_lower : float or None
If not None, this percentage of of the Loci with the lowest row sums
will be removed.
percentage_threshold_upper : float or None
If not None, this percentage of of the Loci with the highest row sums
will be removed.
fold_threshold_upper : float or None
If not None, Loci whose row sums are more than this many times the
median row sum will be removed.
fold_threshold_lower : float or None
If not None, Loci whose row sums are less than this many times the
median row sum will be removed.
"""
def __init__(self, sum_threshold_upper=None, sum_threshold_lower=None,
max_threshold=None, min_threshold=None,
percentage_threshold_lower=None,
percentage_threshold_upper=None,
fold_threshold_upper=None, fold_threshold_lower=None):
"""
Constructor. See class docstring for description of parameters.
Parameters
----------
sum_threshold_upper : float or None
sum_threshold_lower : float or None
max_threshold : float or None
min_threshold : float or None
percentage_threshold_lower : float or None
percentage_threshold_upper : float or None
fold_threshold_upper : float or None
fold_threshold_lower : float or None
"""
self.sum_threshold_upper = sum_threshold_upper
self.sum_threshold_lower = sum_threshold_lower
self.max_threshold = max_threshold
self.min_threshold = min_threshold
self.percentage_threshold_lower = percentage_threshold_lower
self.percentage_threshold_upper = percentage_threshold_upper
self.fold_threshold_upper = fold_threshold_upper
self.fold_threshold_lower = fold_threshold_lower
[docs] def apply_inplace(self, target, **kwargs):
"""
Apply the trimming operation to the target InteractionMatrix.
Parameters
----------
target : InteractionMatrix
The InteractionMatrix object to trim.
kwargs : other keyword arguments
To be utilized by subclasses.
Returns
-------
InteractionMatrix
The trimmed InteractionMatrix.
Examples
--------
>>> import numpy as np
>>> from lib5c.core.interactions import InteractionMatrix
>>> from lib5c.operators.trimming import LocusTrimmer
>>> X = np.arange(16, dtype=float).reshape((4, 4))
>>> im = InteractionMatrix(X + X.T)
>>> print(im)
InteractionMatrix of size 4
[[ 0. 5. 10. 15.]
[ 5. 10. 15. 20.]
[10. 15. 20. 25.]
[15. 20. 25. 30.]]
>>> locus_trimmer = LocusTrimmer(sum_threshold_lower=35)
>>> print(locus_trimmer.apply(im))
InteractionMatrix of size 3
[[10. 15. 20.]
[15. 20. 25.]
[20. 25. 30.]]
>>> locus_trimmer = LocusTrimmer(percentage_threshold_lower=50.0)
>>> print(locus_trimmer.apply(im))
InteractionMatrix of size 2
[[20. 25.]
[25. 30.]]
>>> locus_trimmer = LocusTrimmer(sum_threshold_upper=80.0)
>>> print(locus_trimmer.apply(im))
InteractionMatrix of size 3
[[ 0. 5. 10.]
[ 5. 10. 15.]
[10. 15. 20.]]
>>> locus_trimmer = LocusTrimmer(percentage_threshold_upper=50.0)
>>> print(locus_trimmer.apply(im))
InteractionMatrix of size 2
[[ 0. 5.]
[ 5. 10.]]
>>> locus_trimmer = LocusTrimmer(min_threshold=0.0)
>>> print(locus_trimmer.apply(im))
InteractionMatrix of size 3
[[10. 15. 20.]
[15. 20. 25.]
[20. 25. 30.]]
>>> locus_trimmer = LocusTrimmer(max_threshold=30.0)
>>> print(locus_trimmer.apply(im))
InteractionMatrix of size 3
[[ 0. 5. 10.]
[ 5. 10. 15.]
[10. 15. 20.]]
>>> locus_trimmer = LocusTrimmer(fold_threshold_lower=0.5)
>>> print(locus_trimmer.apply(im))
InteractionMatrix of size 3
[[10. 15. 20.]
[15. 20. 25.]
[20. 25. 30.]]
>>> locus_trimmer = LocusTrimmer(fold_threshold_upper=1.5)
>>> result = locus_trimmer.apply(im)
>>> result.print_log()
InteractionMatrix created
loci trimmed with:
fold_threshold_upper=1.5
deleted locus at index 3
>>> print(result)
InteractionMatrix of size 3
[[ 0. 5. 10.]
[ 5. 10. 15.]
[10. 15. 20.]]
>>> result = locus_trimmer.apply_inplace(im)
>>> im.print_log()
InteractionMatrix created
loci trimmed with:
fold_threshold_upper=1.5
deleted locus at index 3
>>> print(im)
InteractionMatrix of size 3
[[ 0. 5. 10.]
[ 5. 10. 15.]
[10. 15. 20.]]
>>> import numpy as np
>>> from lib5c.core.interactions import InteractionMatrix
>>> from lib5c.operators.trimming import LocusTrimmer
>>> from lib5c.core.loci import Locus, LocusMap
>>> locus_list = [Locus('chr3', 34109023, 34113109, region='Sox2'),
... Locus('chr3', 34113147, 34116141, region='Sox2'),
... Locus('chr3', 87282063, 87285636, region='Nestin'),
... Locus('chr3', 87285637, 87295935, region='Nestin')]
...
>>> locus_map = LocusMap(locus_list)
>>> X = np.arange(16, dtype=float).reshape((4, 4))
>>> im = InteractionMatrix(X + X.T, locusmap=locus_map)
>>> im.matrix
matrix([[ 0., 5., 10., 15.],
[ 5., 10., 15., 20.],
[10., 15., 20., 25.],
[15., 20., 25., 30.]])
>>> locus_trimmer = LocusTrimmer(percentage_threshold_lower=50.0)
>>> result = locus_trimmer.apply_by_region(im)
>>> print(result)
InteractionMatrix of size 2
[[10. 0.]
[ 0. 30.]]
Associated LocusMap:
LocusMap comprising 2 loci
Range: chr3:34113147-34116141 to chr3:87285637-87295935
Regions: ['Sox2', 'Nestin']
>>> result.print_log()
InteractionMatrix created
applying by region
extracted region Sox2
loci trimmed with:
percentage_threshold_lower=50.0
deleted locus at index 0
done applying by region
"""
# log
event = 'loci trimmed with:'
for parameter in self.__dict__:
if self.__dict__[parameter] is not None:
event += '\n\t%s=%s' % (parameter, self.__dict__[parameter])
target.log_event(event)
# maintain a set of indices to delete
deleted_indices = set()
# comupte row sums
row_sums = np.nansum(target.matrix, axis=0)
median_row_sum = np.median(row_sums, axis=1)
# resolve sum_threshold_lower
if self.sum_threshold_lower is not None:
for index in range(target.size()):
if row_sums[0, index] <= self.sum_threshold_lower:
deleted_indices.add(index)
# resolve sum_threshold_upper
if self.sum_threshold_upper is not None:
for index in range(target.size()):
if row_sums[0, index] >= self.sum_threshold_upper:
deleted_indices.add(index)
# resolve percentage_threshold_lower
if self.percentage_threshold_lower is not None:
sum_threshold_lower = np.percentile(
row_sums, self.percentage_threshold_lower)
for index in range(target.size()):
if row_sums[0, index] <= sum_threshold_lower:
deleted_indices.add(index)
# resolve percentage_threshold_upper
if self.percentage_threshold_upper is not None:
sum_threshold_upper = np.percentile(
row_sums, self.percentage_threshold_upper)
for index in range(target.size()):
if row_sums[0, index] >= sum_threshold_upper:
deleted_indices.add(index)
# resolve fold_threshold_lower
if self.fold_threshold_lower is not None and median_row_sum:
for index in range(target.size()):
if row_sums[0, index] / median_row_sum <=\
self.fold_threshold_lower:
deleted_indices.add(index)
# resolve fold_threshold_upper
if self.fold_threshold_upper is not None and median_row_sum:
for index in range(target.size()):
if row_sums[0, index] / median_row_sum >=\
self.fold_threshold_upper:
deleted_indices.add(index)
# resolve min_threshold
if self.min_threshold is not None:
for index in range(target.size()):
if np.any(target.matrix[index] <= self.min_threshold):
deleted_indices.add(index)
# resolve max_threshold
if self.max_threshold is not None:
for index in range(target.size()):
if np.any(target.matrix[index] >= self.max_threshold):
deleted_indices.add(index)
# delete indices
for index in sorted(deleted_indices, reverse=True):
target.delete(index)
return target
[docs]class InteractionTrimmer(InteractionMatrixOperator):
"""
Operator for removing specific interactions from an InteractionMatrix object
according to specified criteria by setting their values to ``np.nan``.
Attributes
----------
value_threshold_lower : float or None
If not None, interactions with values lower than this number will be
removed.
value_threshold_upper : float or None
If not None, interactions with values higher than this number will be
removed.
locus_percentage_threshold_lower : float or None
If not None, this percentage of interactions at each locus with the
lowest values will be removed.
locus_percentage_threshold_upper : float or None
If not None, this percentage of interactions at each locus with the
highest values will be removed.
global_percentage_threshold_lower : float or None
If not None, this percentage of interactions with the lowest values will
be removed.
global_percentage_threshold_upper : float or None
If not None, this percentage of interactions with the highest values
will be removed.
locus_fold_threshold_lower : float or None
If not None, interactions whose values are less than this many times the
median value across either participating locus will be removed.
locus_fold_threshold_upper : float or None
If not None, interactions whose values are more than this many times the
median value across either participating locus will be removed.
global_fold_threshold_lower : float or None
If not None, interactions whose values are less than this many times the
median value across all interactions will be removed.
global_fold_threshold_upper : float or None
If not None, interactions whose values are more than this many times the
median value across all interactions will be removed.
"""
def __init__(self,
value_threshold_lower=None,
value_threshold_upper=None,
locus_percentage_threshold_lower=None,
locus_percentage_threshold_upper=None,
global_percentage_threshold_lower=None,
global_percentage_threshold_upper=None,
locus_fold_threshold_lower=None,
locus_fold_threshold_upper=None,
global_fold_threshold_lower=None,
global_fold_threshold_upper=None):
"""
Constructor. See class docstring for description of parameters.
Parameters
----------
value_threshold_lower : float or None
value_threshold_upper : float or None
locus_percentage_threshold_lower : float or None
locus_percentage_threshold_upper : float or None
global_percentage_threshold_lower : float or None
global_percentage_threshold_upper : float or None
locus_fold_threshold_lower : float or None
locus_fold_threshold_upper : float or None
global_fold_threshold_lower : float or None
global_fold_threshold_upper : float or None
"""
self.value_threshold_lower = value_threshold_lower
self.value_threshold_upper = value_threshold_upper
self.locus_percentage_threshold_lower = locus_percentage_threshold_lower
self.locus_percentage_threshold_upper = locus_percentage_threshold_upper
self.global_percentage_threshold_lower =\
global_percentage_threshold_lower
self.global_percentage_threshold_upper =\
global_percentage_threshold_upper
self.locus_fold_threshold_lower = locus_fold_threshold_lower
self.locus_fold_threshold_upper = locus_fold_threshold_upper
self.global_fold_threshold_lower = global_fold_threshold_lower
self.global_fold_threshold_upper = global_fold_threshold_upper
[docs] def apply_inplace(self, target, **kwargs):
"""
Apply the trimming operation to the target InteractionMatrix.
Parameters
----------
target : InteractionMatrix
The InteractionMatrix object to trim.
kwargs : other keyword arguments
To be utilized by subclasses.
Returns
-------
InteractionMatrix
The trimmed InteractionMatrix.
Examples
--------
>>> import numpy as np
>>> from lib5c.core.interactions import InteractionMatrix
>>> from lib5c.operators.trimming import InteractionTrimmer
>>> X = np.arange(16, dtype=float).reshape((4, 4))
>>> im = InteractionMatrix(X + X.T)
>>> print(im)
InteractionMatrix of size 4
[[ 0. 5. 10. 15.]
[ 5. 10. 15. 20.]
[10. 15. 20. 25.]
[15. 20. 25. 30.]]
>>> trimmer = InteractionTrimmer(value_threshold_lower=5.0)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[nan nan 10. 15.]
[nan 10. 15. 20.]
[10. 15. 20. 25.]
[15. 20. 25. 30.]]
>>> trimmer = InteractionTrimmer(value_threshold_upper=25.0)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[ 0. 5. 10. 15.]
[ 5. 10. 15. 20.]
[10. 15. 20. nan]
[15. 20. nan nan]]
>>> trimmer = InteractionTrimmer(locus_percentage_threshold_lower=25.0)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[nan nan nan nan]
[nan 10. 15. 20.]
[nan 15. 20. 25.]
[nan 20. 25. 30.]]
>>> trimmer = InteractionTrimmer(locus_percentage_threshold_upper=75.0)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[ 0. 5. 10. nan]
[ 5. 10. 15. nan]
[10. 15. 20. nan]
[nan nan nan nan]]
>>> trimmer = InteractionTrimmer(global_percentage_threshold_lower=25.0)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[nan nan nan 15.]
[nan nan 15. 20.]
[nan 15. 20. 25.]
[15. 20. 25. 30.]]
>>> trimmer = InteractionTrimmer(global_percentage_threshold_upper=75.0)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[ 0. 5. 10. 15.]
[ 5. 10. 15. nan]
[10. 15. nan nan]
[15. nan nan nan]]
>>> trimmer = InteractionTrimmer(locus_fold_threshold_lower=0.5)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[nan nan 10. 15.]
[nan 10. 15. 20.]
[10. 15. 20. 25.]
[15. 20. 25. 30.]]
>>> trimmer = InteractionTrimmer(locus_fold_threshold_upper=2.0)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[ 0. 5. 10. nan]
[ 5. 10. 15. 20.]
[10. 15. 20. 25.]
[nan 20. 25. 30.]]
>>> trimmer = InteractionTrimmer(global_fold_threshold_lower=0.25)
>>> print(trimmer.apply(im))
InteractionMatrix of size 4
[[nan 5. 10. 15.]
[ 5. 10. 15. 20.]
[10. 15. 20. 25.]
[15. 20. 25. 30.]]
>>> trimmer = InteractionTrimmer(global_fold_threshold_upper=2.0)
>>> result = trimmer.apply(im)
>>> print(result)
InteractionMatrix of size 4
[[ 0. 5. 10. 15.]
[ 5. 10. 15. 20.]
[10. 15. 20. 25.]
[15. 20. 25. nan]]
>>> result.print_log()
InteractionMatrix created
interactions trimmed with:
global_fold_threshold_upper=2.0
"""
# log
event = 'interactions trimmed with:'
for parameter in self.__dict__:
if self.__dict__[parameter] is not None:
event += '\n\t%s=%s' % (parameter, self.__dict__[parameter])
target.log_event(event)
# set of coordinates to remove
removed_coords = set()
# resolve value_threshold_lower
if self.value_threshold_lower is not None:
for i in range(target.size()):
for j in range(i+1):
if target[i, j] <= self.value_threshold_lower:
removed_coords.add((i, j))
# resolve value_threshold_upper
if self.value_threshold_upper is not None:
for i in range(target.size()):
for j in range(i+1):
if target[i, j] >= self.value_threshold_upper:
removed_coords.add((i, j))
# resolve locus_percentage_threshold_lower
if self.locus_percentage_threshold_lower is not None:
for i in range(target.size()):
value_threshold_lower = np.percentile(
target.matrix[i], self.locus_percentage_threshold_lower)
for j in range(target.size()):
if target[i, j] <= value_threshold_lower:
removed_coords.add((i, j))
# resolve locus_percentage_threshold_upper
if self.locus_percentage_threshold_upper is not None:
for i in range(target.size()):
value_threshold_upper = np.percentile(
target.matrix[i], self.locus_percentage_threshold_upper)
for j in range(target.size()):
if target[i, j] >= value_threshold_upper:
removed_coords.add((i, j))
# resolve global_percentage_threshold_lower
if self.global_percentage_threshold_lower is not None:
value_threshold_lower = np.percentile(
target.flatten(), self.global_percentage_threshold_lower)
for i in range(target.size()):
for j in range(i+1):
if target[i, j] <= value_threshold_lower:
removed_coords.add((i, j))
# resolve global_percentage_threshold_upper
if self.global_percentage_threshold_upper is not None:
value_threshold_upper = np.percentile(
target.flatten(), self.global_percentage_threshold_upper)
for i in range(target.size()):
for j in range(i+1):
if target[i, j] >= value_threshold_upper:
removed_coords.add((i, j))
# resolve locus_fold_threshold_lower
if self.locus_fold_threshold_lower is not None:
for i in range(target.size()):
median_value = np.median(target.matrix[i], axis=1)
if median_value:
for j in range(target.size()):
if target[i, j] / median_value <=\
self.locus_fold_threshold_lower:
removed_coords.add((i, j))
# resolve locus_fold_threshold_upper
if self.locus_fold_threshold_upper is not None:
for i in range(target.size()):
median_value = np.median(target.matrix[i], axis=1)
if median_value:
for j in range(target.size()):
if target[i, j] / median_value >=\
self.locus_fold_threshold_upper:
removed_coords.add((i, j))
# resolve global_fold_threshold_lower
if self.global_fold_threshold_lower is not None:
median_value = np.median(target.flatten())
if median_value:
for i in range(target.size()):
for j in range(i+1):
if target[i, j] / median_value <=\
self.global_fold_threshold_lower:
removed_coords.add((i, j))
# resolve global_fold_threshold_upper
if self.global_fold_threshold_upper is not None:
median_value = np.median(target.flatten())
if median_value:
for i in range(target.size()):
for j in range(i+1):
if target[i, j] / median_value >=\
self.global_fold_threshold_upper:
removed_coords.add((i, j))
# remove removed coordinates
for removed_coord in removed_coords:
target[removed_coord] = np.nan
return target