"""
Module containing utilities for constructing annotatiomaps.
Annotatiomaps record the number of BED features of a certain type present at a
given linear bin as specified by a pixelmap.
"""
import glob
import os
import numpy as np
from lib5c.parsers.bed import load_features
from lib5c.util.bed import count_intersections
[docs]def make_single_annotationmap(annotation, pixelmap):
"""
Generates an annotationmap given an annotation and a pixelmap.
Parameters
----------
annotation : dict of lists of dicts
The keys are chromosome names. The values are lists of features for that
chromosome. The features are represented as dicts with the following
structure::
{
'chrom': str
'start': int,
'end' : int,
}
See ``lib5c.parsers.bed.load_features()``.
pixelmap: pixelmap
The pixelmap to use to generate the annotationmap. See
``lib5c.parsers.bed.get_pixelmap()``.
Returns
-------
dict of lists
The keys of the dictionary are region names. The values are lists, where
the ``i`` th entry represents the number of intersections between the
annotation and the ``i`` th bin of that region.
"""
annotationmap = {}
for region in pixelmap:
annotationmap[region] = []
for i in range(len(pixelmap[region])):
if pixelmap[region][i]['chrom'] in annotation:
annotationmap[region].append(
count_intersections(
pixelmap[region][i],
annotation[pixelmap[region][i]['chrom']]))
else:
annotationmap[region].append(0)
annotationmap[region] = np.array(annotationmap[region])
return annotationmap
[docs]def make_annotationmaps(pixelmap, directory='./annotations', add_wildcard=True):
"""
Gets a dict of annotationmaps, one for every BED file in a specified
directory.
Parameters
----------
pixelmap: pixelmap
The pixelmap to use to generate the annotationmap. See
``lib5c.parsers.bed.get_pixelmap()``.
directory: str
The directory to look in for BED files describing the annotations.
add_wildcard : bool
Pass True to add a 'wildcard' annotation that has 100 hits in every bin.
Useful for doing "unsided" enrichments later.
Returns
-------
dict of dict of lists
The keys of the outer dict are annotation names as parsed from the names
of the BED files in directory. The values are annotationmaps. See
``lib5c.util.annotationmap.get_single_annotatiomap()``.
"""
# normalize directory
directory = os.path.normcase(directory)
# annotatopmaps to return
annotationmaps = {}
# make annotationmaps
for path in glob.glob('%s/*.bed' % directory) + \
glob.glob('%s/*.interval' % directory):
annotation = load_features(path)
name = os.path.splitext(os.path.split(path)[-1])[0]
annotationmap = make_single_annotationmap(annotation, pixelmap)
annotationmaps[name] = annotationmap
# add wildcard if desired
if annotationmaps and add_wildcard:
any_key = list(annotationmaps.keys())[0]
annotationmaps['wildcard'] = {}
for region in annotationmaps[any_key].keys():
annotationmaps['wildcard'][region] = np.array(
[100] * len(annotationmaps[any_key][region]))
return annotationmaps
# test client
[docs]def main():
# we'll need a pixelmap
from lib5c.parsers.primers import load_primermap
pixelmap = load_primermap('test/bins.bed')
# directory where we should look for annotations
directory = 'test/annotations'
# make the annotationmaps
annotationmaps = make_annotationmaps(pixelmap, directory)
# prove that we have the annotationmap
print(list(annotationmaps.keys()))
for key in annotationmaps.keys():
print('%s %i' % (key, annotationmaps[key]['Sox2'][45]))
if __name__ == "__main__":
main()