Source code for lib5c.util.primers

"""
Module containing utilities for manipulating 5C primer information.
"""

import re


_nsre = re.compile('([0-9]+)')


[docs]def natural_sort_key(s): """ Function to enable natural sorting of alphanumeric strings. Parameters ---------- s : str String being sorted. Returns ------- List[Union[int, str]] This list is an alternative represenation of the input string that will sort in natural order. Notes ----- Function written by SO user http://stackoverflow.com/users/15055/claudiu and provided in answer http://stackoverflow.com/a/16090640. """ return [int(text) if text.isdigit() else text.lower() for text in re.split(_nsre, s)]
[docs]def determine_region_order(primermap): """ Orders regions in a primermap by genomic coordinate. Parameters ---------- primermap : Dict[str, List[Dict[str, Any]]] Primermap containing information about the regions to be ordered. See ``lib5c.parsers.primers.get_primermap()``. Returns ------- List[str] List of ordered region names. """ regions = list(primermap.keys()) regions.sort(key=lambda x: primermap[x][0]['start']) regions.sort(key=lambda x: natural_sort_key(primermap[x][0]['chrom'])) return regions
[docs]def aggregate_primermap(primermap, region_order=None): """ Aggregates a primermap into a single list. Parameters ---------- primermap : Dict[str, List[Dict[str, Any]]] Primermap to aggregate. See ``lib5c.parsers.primers.get_primermap()``. region_order : Optional[List[str]] Order in which regions should be concatenated. If None, the regions will be concatenated in order of increasing genomic coordinate. See ``lib5c.util.primers.determine_region_order()``. Returns ------- List[Dict[str, Any]] The dicts represent primers in the same format as the inner dicts of the passed primermap; however, they exist as a single flat list instead of within an outer dict structure. The regions are arranged within this list in contiguous blocks, arranged in the order specified by the region_order kwarg. Notes ----- This function returns a list of references to the original primermap, under the assumption that primer dicts are rarely modified. To avoid this, pass a copy of the primermap instead of the original primermap. """ # determine region order if not specified if region_order is None: region_order = determine_region_order(primermap) return [primer_dict for region in region_order for primer_dict in primermap[region]]
[docs]def guess_bin_step(regional_pixelmap): """ Guesses the bin step from a regional pixelmap. Parameters ---------- regional_pixelmap : List[Dict[str, Any]] Ordered list of bins for a single region. Returns ------- int The guessed bin step for this pixelmap. """ return regional_pixelmap[1]['start'] - regional_pixelmap[0]['start']