Source code for lib5c.util.annotationmap

"""
Module containing utilities for constructing annotatiomaps.

Annotatiomaps record the number of BED features of a certain type present at a
given linear bin as specified by a pixelmap.
"""

import glob
import os

import numpy as np

from lib5c.parsers.bed import load_features
from lib5c.util.bed import count_intersections


[docs]def make_single_annotationmap(annotation, pixelmap):
    """
    Generates an annotationmap given an annotation and a pixelmap.

    Parameters
    ----------
    annotation : dict of lists of dicts
        The keys are chromosome names. The values are lists of features for that
        chromosome. The features are represented as dicts with the following
        structure::

            {
                'chrom': str
                'start': int,
                'end'  : int,
            }

        See ``lib5c.parsers.bed.load_features()``.
    pixelmap: pixelmap
        The pixelmap to use to generate the annotationmap. See
        ``lib5c.parsers.bed.get_pixelmap()``.

    Returns
    -------
    dict of lists
        The keys of the dictionary are region names. The values are lists, where
        the ``i`` th entry represents the number of intersections between the
        annotation and the ``i`` th bin of that region.
    """
    annotationmap = {}
    for region in pixelmap:
        annotationmap[region] = []
        for i in range(len(pixelmap[region])):
            if pixelmap[region][i]['chrom'] in annotation:
                annotationmap[region].append(
                    count_intersections(
                        pixelmap[region][i],
                        annotation[pixelmap[region][i]['chrom']]))
            else:
                annotationmap[region].append(0)
        annotationmap[region] = np.array(annotationmap[region])
    return annotationmap


[docs]def make_annotationmaps(pixelmap, directory='./annotations', add_wildcard=True):
    """
    Gets a dict of annotationmaps, one for every BED file in a specified
    directory.

    Parameters
    ----------
    pixelmap: pixelmap
        The pixelmap to use to generate the annotationmap. See
        ``lib5c.parsers.bed.get_pixelmap()``.
    directory: str
        The directory to look in for BED files describing the annotations.
    add_wildcard : bool
        Pass True to add a 'wildcard' annotation that has 100 hits in every bin.
        Useful for doing "unsided" enrichments later.

    Returns
    -------
    dict of dict of lists
        The keys of the outer dict are annotation names as parsed from the names
        of the BED files in directory. The values are annotationmaps. See
        ``lib5c.util.annotationmap.get_single_annotatiomap()``.
    """
    # normalize directory
    directory = os.path.normcase(directory)

    # annotatopmaps to return
    annotationmaps = {}

    # make annotationmaps
    for path in glob.glob('%s/*.bed' % directory) + \
            glob.glob('%s/*.interval' % directory):
        annotation = load_features(path)
        name = os.path.splitext(os.path.split(path)[-1])[0]
        annotationmap = make_single_annotationmap(annotation, pixelmap)
        annotationmaps[name] = annotationmap

    # add wildcard if desired
    if annotationmaps and add_wildcard:
        any_key = list(annotationmaps.keys())[0]
        annotationmaps['wildcard'] = {}
        for region in annotationmaps[any_key].keys():
            annotationmaps['wildcard'][region] = np.array(
                [100] * len(annotationmaps[any_key][region]))

    return annotationmaps


# test client
[docs]def main():
    # we'll need a pixelmap
    from lib5c.parsers.primers import load_primermap
    pixelmap = load_primermap('test/bins.bed')

    # directory where we should look for annotations
    directory = 'test/annotations'

    # make the annotationmaps
    annotationmaps = make_annotationmaps(pixelmap, directory)

    # prove that we have the annotationmap
    print(list(annotationmaps.keys()))
    for key in annotationmaps.keys():
        print('%s %i' % (key, annotationmaps[key]['Sox2'][45]))


if __name__ == "__main__":
    main()