"""
Module for computing enrichments of annotations within categories of categorized
loops.
"""
from __future__ import division
from __future__ import absolute_import
import numpy as np
from scipy.ndimage import generic_filter
import scipy.stats as stats
from lib5c.util.lru_cache import lru_cache
[docs]@lru_cache(maxsize=None)
def process_annotations(annotation_label, region, annotationmaps, threshold=0,
margin=1):
"""
Extracts one annotation and one region from a dict of annotationmaps and
returns it in a vector form.
This function should be called from within the bodies of vectorized
enrichment functions that accept standard annotationmaps as arguments.
Parameters
----------
annotation_label : str
The annotation for which a vector should be created. Must be a key of
``annotationmaps``.
region : str
The specific region for which a vector should be created. Must be a key
of ``annotationmaps[annotation_label]``.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
Returns
-------
np.ndarray
The processed vector representing the coverage of the selected
annotation across the selected region, according to the definitions
implied by the choise of ``threshold`` and ``margin``.
Examples
--------
>>> annotationmaps = {'a': {'r1': [0, 0, 2, 1]}}
>>> process_annotations('a', 'r1', annotationmaps)
array([0, 1, 1, 1])
>>> process_annotations('a', 'r1', annotationmaps, threshold=1, margin=0)
array([0, 0, 1, 0])
"""
return generic_filter(annotationmaps[annotation_label][region],
lambda x: x.sum() > threshold, size=1+2*margin,
mode='constant')
[docs]@lru_cache(maxsize=None)
def count_intersections(annotation_a, annotation_b, region, category,
annotationmaps, looping_classes, threshold=0, margin=1,
asymmetric=False):
"""
Counts the number of times one annotation intersects another at a particular
category of called loops within a specified region.
Parameters
----------
annotation_a : str
The annotation to look for on one side of the loop. Must be a key into
``annotationmaps``.
annotation_b : str
The annotation to look for on the other side of the loop. Must be a key
into ``annotationmaps``.
region : str
The region to count intersections over.
category : str
The loop category to count intersections for.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
int
The total number of intersections.
Examples
--------
>>> import numpy as np
>>> clear_enrichment_caches()
>>> annotationmaps = {'a': {'r1': [0, 0, 2, 1]},
... 'b': {'r1': [1, 1, 0, 0]}}
>>> looping_classes = {'r1': np.array([['' , '' , 'es' , 'ips'],
... ['' , '' , 'npc', 'npc'],
... ['es' , 'npc', '' , '' ],
... ['ips', 'npc', '' , '' ]],
... dtype='U25')}
>>> count_intersections('a', 'b', 'r1', 'es', annotationmaps,
... looping_classes, margin=0)
1
>>> count_intersections('a', 'b', 'r1', 'npc', annotationmaps,
... looping_classes, margin=0)
2
>>> count_intersections('a', 'b', 'r1', 'npc', annotationmaps,
... looping_classes, margin=0)
2
>>> count_intersections.cache_info()
CacheInfo(hits=1, misses=2, maxsize=None, currsize=2)
>>> count_intersections('a', 'b', 'r1', 'es', annotationmaps,
... looping_classes, margin=0, asymmetric=True)
0
>>> count_intersections('b', 'a', 'r1', 'es', annotationmaps,
... looping_classes, margin=0, asymmetric=True)
1
"""
temp = np.outer(
process_annotations(annotation_a, region, annotationmaps,
threshold=threshold, margin=margin),
process_annotations(annotation_b, region, annotationmaps,
threshold=threshold, margin=margin)) > 0
if not asymmetric:
return np.tril(looping_classes[region] == category)[temp | temp.T].sum()
return np.tril(looping_classes[region] == category)[temp.T].sum()
[docs]@lru_cache(maxsize=None)
def count_intersections_all(annotation_a, annotation_b, category,
annotationmaps, looping_classes, threshold=0,
margin=1, asymmetric=False):
"""
Counts the number of times ``annotation_a`` and ``annotation_b`` are found
on opposite ends of loops in a given category of loop type across all
genomic regions.
Parameters
----------
annotation_a : str
Annotation to look for on one side of the loop.
annotation_b : str
Annotation to look for on the other side of the loop.
category : str
Only consider loops of this category.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
int
The total number of intersections across all regions.
Examples
--------
>>> import numpy as np
>>> annotationmaps = {'a': {'r1': [0, 0, 2], 'r2': [1, 0]},
... 'b': {'r1': [1, 1, 0], 'r2': [0, 1]}}
>>> looping_classes = {'r1': np.array([['npc', '' , 'es' ],
... ['' , '' , 'npc'],
... ['es' , 'npc', '' ]],
... dtype='U25'),
... 'r2': np.array([['' , 'es' ],
... ['es' , '' ]],
... dtype='U25')}
>>> count_intersections_all('a', 'b', 'es', annotationmaps,
... looping_classes, margin=0)
2
>>> count_intersections_all('a', 'b', 'npc', annotationmaps,
... looping_classes, margin=0)
1
"""
all_intersections = 0
for region in looping_classes.keys():
all_intersections += count_intersections(
annotation_a, annotation_b, region, category, annotationmaps,
looping_classes, threshold=threshold, margin=margin,
asymmetric=asymmetric)
return all_intersections
[docs]@lru_cache(maxsize=None)
def get_annotation_percentage(annotation_a, annotation_b, region, category,
annotationmaps, looping_classes, threshold=0,
margin=1, asymmetric=False):
"""
Computes the precentage of loops within a particular region categorized into
a particular category that represent loops between ``annotation_a`` and
``annotation_b``.
Parameters
----------
annotation_a : str
Annotation to look for on one side of the loop.
annotation_b : str
Annotation to look for on the other side of the loop.
region : str
The region to compute the percentage within.
category : str
The category of loops to consider.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
float
The fraction of loops within the region of the specified category that
represent loops between the indicated annotations.
Examples
--------
>>> import numpy as np
>>> annotationmaps = {'a': {'r1': [0, 0, 0, 1]},
... 'b': {'r1': [1, 1, 0, 0]}}
>>> looping_classes = {'r1': np.array([['' , '' , 'es' , 'ips'],
... ['' , '' , 'npc', 'npc'],
... ['es' , 'npc', '' , '' ],
... ['ips', 'npc', '' , '' ]],
... dtype='U25')}
>>> get_annotation_percentage('a', 'b', 'r1', 'ips', annotationmaps,
... looping_classes, margin=0)
1.0
>>> get_annotation_percentage('a', 'b', 'r1', 'npc', annotationmaps,
... looping_classes, margin=0)
0.5
"""
return (count_intersections(annotation_a, annotation_b, region, category,
annotationmaps, looping_classes,
threshold=threshold, margin=margin,
asymmetric=asymmetric) /
np.tril(looping_classes[region] == category).sum())
[docs]@lru_cache(maxsize=None)
def get_annotation_percentage_all(annotation_a, annotation_b, category,
annotationmaps, looping_classes, threshold=0,
margin=1, asymmetric=False):
"""
Computes the precentage of loops across all regions categorized into a
particular category that represent loops between ``annotation_a`` and
``annotation_b``.
Parameters
----------
annotation_a : str
Annotation to look for on one side of the loop.
annotation_b : str
Annotation to look for on the other side of the loop.
category : str
The category of loops to consider.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
float
The fraction of loops across all regions of the specified category that
represent loops between the indicated annotations.
Examples
--------
>>> import numpy as np
>>> annotationmaps = {'a': {'r1': [0, 0, 2], 'r2': [1, 0]},
... 'b': {'r1': [1, 1, 0], 'r2': [0, 1]}}
>>> looping_classes = {'r1': np.array([['npc', '' , 'es' ],
... ['' , '' , 'npc'],
... ['es' , 'npc', '' ]],
... dtype='U25'),
... 'r2': np.array([['npc', 'es' ],
... ['es' , 'npc']],
... dtype='U25')}
>>> get_annotation_percentage_all('a', 'b', 'es', annotationmaps,
... looping_classes, margin=0)
1.0
>>> get_annotation_percentage_all('a', 'b', 'npc', annotationmaps,
... looping_classes, margin=0)
0.25
"""
all_interactions = 0
for region in looping_classes.keys():
all_interactions += np.tril(looping_classes[region] == category).sum()
return (count_intersections_all(annotation_a, annotation_b, category,
annotationmaps, looping_classes,
threshold=threshold, margin=margin,
asymmetric=asymmetric) /
all_interactions)
[docs]@lru_cache(maxsize=None)
def get_fold_change(annotation_a, annotation_b, region, category,
annotationmaps, looping_classes, threshold=0, margin=1,
asymmetric=False):
"""
Computes the fold enrichment of the percentage of loops of a particular
category in a particular region connecting specified annotations relative
to the special "background" reference category.
Parameters
----------
annotation_a : str
Annotation to look for on one side of the loop.
annotation_b : str
Annotation to look for on the other side of the loop.
region : str
The region to compute the fold enrichment within.
category : str
The category of loops to consider.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
float
The fold enrichment.
Examples
--------
>>> import numpy as np
>>> annotationmaps = {'a': {'r1': [1, 0, 0, 1]},
... 'b': {'r1': [1, 1, 0, 0]}}
>>> looping_classes = {'r1': np.array([['' , '' , 'es' , 'ips'],
... ['' , '' , 'npc', 'npc'],
... ['es' , 'npc', '' , '' ],
... ['ips', 'npc', '' , '' ]],
... dtype='U25')}
>>> looping_classes['r1'][looping_classes['r1'] == ''] = 'background'
>>> get_fold_change('a', 'b', 'r1', 'ips', annotationmaps, looping_classes,
... margin=0)
3.0
>>> get_fold_change('a', 'b', 'r1', 'npc', annotationmaps, looping_classes,
... margin=0)
1.5
"""
return (get_annotation_percentage(annotation_a, annotation_b, region,
category, annotationmaps, looping_classes,
threshold=threshold, margin=margin,
asymmetric=asymmetric) /
get_annotation_percentage(annotation_a, annotation_b, region,
'background', annotationmaps,
looping_classes, threshold=threshold,
margin=margin, asymmetric=asymmetric))
[docs]@lru_cache(maxsize=None)
def get_fold_change_all(annotation_a, annotation_b, category, annotationmaps,
looping_classes, threshold=0, margin=1,
asymmetric=False):
"""
Computes the fold enrichment of the percentage of loops of a particular
category across all regions connecting specified annotations relative to the
special "background" reference category.
Parameters
----------
annotation_a : str
Annotation to look for on one side of the loop.
annotation_b : str
Annotation to look for on the other side of the loop.
category : str
The category of loops to consider.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
float
The fold enrichment.
Examples
--------
>>> import numpy as np
>>> annotationmaps = {'a': {'r1': [0, 1, 2], 'r2': [1, 0]},
... 'b': {'r1': [1, 1, 0], 'r2': [0, 1]}}
>>> looping_classes = {'r1': np.array([['npc', '' , 'es' ],
... ['' , '' , 'npc'],
... ['es' , 'npc', '' ]],
... dtype='U25'),
... 'r2': np.array([['ips', 'es' ],
... ['es' , '' ]],
... dtype='U25')}
>>> looping_classes['r1'][looping_classes['r1'] == ''] = 'background'
>>> looping_classes['r2'][looping_classes['r2'] == ''] = 'background'
>>> get_fold_change_all('a', 'b', 'es', annotationmaps, looping_classes,
... margin=0)
2.0
>>> get_fold_change_all('a', 'b', 'npc', annotationmaps, looping_classes,
... margin=0)
1.0
>>> get_fold_change_all('a', 'b', 'ips', annotationmaps, looping_classes,
... margin=0)
0.0
"""
denominator = get_annotation_percentage_all(
annotation_a, annotation_b, 'background', annotationmaps,
looping_classes, threshold=threshold, margin=margin,
asymmetric=asymmetric)
if denominator == 0:
return 0
numerator = get_annotation_percentage_all(
annotation_a, annotation_b, category, annotationmaps, looping_classes,
threshold=threshold, margin=margin, asymmetric=asymmetric)
return numerator / denominator
[docs]@lru_cache(maxsize=None)
def get_fisher_exact_pvalue(annotation_a, annotation_b, region, category,
annotationmaps, looping_classes, threshold=0,
margin=1, asymmetric=False):
"""
Use Fisher's exact test to compute a one-sided p-value against the null
hypothesis that the selected loop category's overlap with selected
annotations in a chosen region is the same as the special "background"
reference loop category's overlap with the same annotations.
Parameters
----------
annotation_a : str
Annotation to look for on one side of the loop.
annotation_b : str
Annotation to look for on the other side of the loop.
region : str
The region to compute the p-value within
category : str
The category of loops to consider.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
float
The p-value.
Examples
--------
>>> import numpy as np
>>> annotationmaps = {'a': {'r1': [1, 0, 0, 1]},
... 'b': {'r1': [1, 1, 0, 0]}}
>>> looping_classes = {'r1': np.array([['' , '' , 'es' , 'ips'],
... ['' , '' , 'npc', 'npc'],
... ['es' , 'npc', '' , '' ],
... ['ips', 'npc', '' , '' ]],
... dtype='U25')}
>>> looping_classes['r1'][looping_classes['r1'] == ''] = 'background'
>>> get_fisher_exact_pvalue('a', 'b', 'r1', 'ips', annotationmaps,
... looping_classes, margin=0)
0.428571428571428...
>>> get_fisher_exact_pvalue('a', 'b', 'r1', 'npc', annotationmaps,
... looping_classes, margin=0)
0.642857142857142...
"""
# count loops in the specified category
category_loops_total = np.tril(looping_classes[region] == category).sum()
category_loops_hit = count_intersections(
annotation_a, annotation_b, region, category, annotationmaps,
looping_classes, threshold=threshold, margin=margin,
asymmetric=asymmetric)
category_loops_not_hit = category_loops_total - category_loops_hit
# count loops in the background category
bkgd_loops_total = np.tril(looping_classes[region] == 'background').sum()
bkgd_loops_hit = count_intersections(
annotation_a, annotation_b, region, 'background', annotationmaps,
looping_classes, threshold=threshold, margin=margin,
asymmetric=asymmetric)
bkgd_loops_not_hit = bkgd_loops_total - bkgd_loops_hit
# short-circuit if neither category nor background have any hits
if category_loops_hit == 0 and bkgd_loops_hit == 0:
return 0.5
# assemble contingency table
cont_table = [[category_loops_hit, bkgd_loops_hit],
[category_loops_not_hit, bkgd_loops_not_hit]]
# return the smaller of the two single-tailed p-values
return min(stats.fisher_exact(cont_table, alternative='less')[1],
stats.fisher_exact(cont_table, alternative='greater')[1])
[docs]@lru_cache(maxsize=None)
def get_fisher_exact_pvalue_all(annotation_a, annotation_b, category,
annotationmaps, looping_classes, threshold=0,
margin=1, asymmetric=False):
"""
Use Fisher's exact test to compute a one-sided p-value against the null
hypothesis that the selected loop category's overlap with selected
annotations across all regions is the same as the special "background"
reference loop category's overlap with the same annotations.
Parameters
----------
annotation_a : str
Annotation to look for on one side of the loop.
annotation_b : str
Annotation to look for on the other side of the loop.
category : str
The category of loops to consider.
annotationmaps : dict of annotationmap
A dict describing the annotations. In total, it should have the
following structure::
{
'annotation_a_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
'annotation_b_name': {
'region_1_name': list of int,
'region_2_name': list of int,
...
},
...
}
where ``annotationmaps['annotation_a']['region_r']`` should be a list of
ints describing the number of ``'annotation_a'``s present in each bin of
``'region_r'``.
looping_classes : dict of np.ndarray with str dtype
The keys should be region names as strings, the values should be square,
symmetric arrays of the same size and shape as the indicated region,
with string loop category names in the positions of categorized loops.
threshold : int
Bins are defined to contain an annotation if they are "hit" strictly
more than ``threshold`` times by the annotation.
margin : int
A bin is defined to contain an annotation if any bin within ``margin``
bins is "hit" by the annotation. Corresponds to a "margin for error" in
the intersection precision.
asymmetric : bool
Pass True to only count situations when A is upstream of B. Pass False
to count intersections regardless of order.
Returns
-------
float
The p-value.
Examples
--------
>>> import numpy as np
>>> annotationmaps = {'a': {'r1': [0, 1, 2], 'r2': [1, 0]},
... 'b': {'r1': [1, 1, 0], 'r2': [0, 1]}}
>>> looping_classes = {'r1': np.array([['npc', '' , 'es' ],
... ['' , '' , 'npc'],
... ['es' , 'npc', '' ]],
... dtype='U25'),
... 'r2': np.array([['ips', 'es' ],
... ['es' , '' ]],
... dtype='U25')}
>>> looping_classes['r1'][looping_classes['r1'] == ''] = 'background'
>>> looping_classes['r2'][looping_classes['r2'] == ''] = 'background'
>>> round(get_fisher_exact_pvalue_all('a', 'b', 'es', annotationmaps,
... looping_classes, margin=0), 14)
0.4
>>> round(get_fisher_exact_pvalue_all('a', 'b', 'npc', annotationmaps,
... looping_classes, margin=0), 14)
0.8
>>> round(get_fisher_exact_pvalue_all('a', 'b', 'ips', annotationmaps,
... looping_classes, margin=0), 14)
0.6
"""
# count loops in the specified category
category_loops_total = sum(
[np.tril(looping_classes[region] == category).sum()
for region in looping_classes])
category_loops_hit = count_intersections_all(
annotation_a, annotation_b, category, annotationmaps, looping_classes,
threshold=threshold, margin=margin, asymmetric=asymmetric)
category_loops_not_hit = category_loops_total - category_loops_hit
# count loops in the background category
bkgd_loops_total = sum(
[np.tril(looping_classes[region] == 'background').sum()
for region in looping_classes])
bkgd_loops_hit = count_intersections_all(
annotation_a, annotation_b, 'background', annotationmaps,
looping_classes, threshold=threshold, margin=margin,
asymmetric=asymmetric)
bkgd_loops_not_hit = bkgd_loops_total - bkgd_loops_hit
# short-circuit if neither category nor background have any hits
if category_loops_hit == 0 and bkgd_loops_hit == 0:
return 0.5
# assemble contingency table
cont_table = [[category_loops_hit, bkgd_loops_hit],
[category_loops_not_hit, bkgd_loops_not_hit]]
# return the smaller of the two single-tailed p-values
return min(stats.fisher_exact(cont_table, alternative='less')[1],
stats.fisher_exact(cont_table, alternative='greater')[1])
[docs]def clear_enrichment_caches():
"""
Clear all caches related to enrichment computations.
This function is deprecated. Previously, it was necessary to call this
function within a script whenever the content of ``annotationmaps`` or
``looping_classes`` changed. The current cache implementation does not need
to be cleared when this happens.
Examples
--------
>>> import numpy as np
>>> clear_enrichment_caches()
>>> annotationmaps = {'a': {'r1': [0, 0, 2, 1]},
... 'b': {'r1': [1, 1, 0, 0]}}
>>> looping_classes = {'r1': np.array([['' , '' , 'es' , 'ips'],
... ['' , '' , 'npc', 'npc'],
... ['es' , 'npc', '' , '' ],
... ['ips', 'npc', '' , '' ]],
... dtype='U25')}
>>> count_intersections('a', 'b', 'r1', 'es', annotationmaps,
... looping_classes, margin=0)
1
>>> looping_classes = {'r1': np.array([['' , '' , 'ips', 'ips'],
... ['' , '' , 'npc', 'npc'],
... ['ips', 'npc', '' , '' ],
... ['ips', 'npc', '' , '' ]],
... dtype='U25')}
>>> count_intersections('a', 'b', 'r1', 'es', annotationmaps,
... looping_classes, margin=0)
0
>>> clear_enrichment_caches()
>>> count_intersections('a', 'b', 'r1', 'es', annotationmaps,
... looping_classes, margin=0)
0
"""
process_annotations.cache_clear()
count_intersections.cache_clear()
count_intersections_all.cache_clear()
get_annotation_percentage.cache_clear()
get_annotation_percentage_all.cache_clear()
get_fold_change.cache_clear()
get_fold_change_all.cache_clear()
get_fisher_exact_pvalue.cache_clear()
get_fisher_exact_pvalue_all.cache_clear()