Source code for lib5c.parsers.primer_names

"""
Module providing helper functions for working with primer naming conventions,
necessary for parsing certain primerfiles.
"""


[docs]def guess_primer_name_parser(name): """ Guesses the appropriate primer or bin name parser to use by looping through a list of possible parsers and testing if they work on a given primer name. Parameters ---------- name : str The name of a primer to use for testing. Returns ------- function The parser thought to be appropriate for this kind of primer name. """ parsers = [dblalt_primer_parser, default_primer_parser, default_bin_parser] for parser in parsers: try: parser(name) return parser except (ValueError, IndexError): pass
[docs]def default_primer_parser(name): """ The default primer name parser. Parameters --------- name : str The name of the primer found in the appropriate column of the primer bedfile. Returns ------- dict This dict has the following structure:: { 'region': str, 'number': int, 'name': str } These fields are parsed from the primer name. Notes ----- You can write other name parsers to accommodate different primer naming conventions. """ pieces = name.split('_') region = pieces[2] number = int(pieces[4].split(':')[0]) if pieces[3] == 'FOR': orientation = "3'" strand = '+' elif pieces[3] == 'REV': orientation = "5'" strand = '-' else: raise ValueError('default primer name scheme violation') corrected_name = name.split(':')[0] return {'region' : region, 'number' : number, 'orientation': orientation, 'strand' : strand, 'name' : corrected_name}
[docs]def dblalt_primer_parser(name): """ The double alternating primer name parser. Parameters --------- name : str The name of the primer found in the appropriate column of the primer bedfile. Returns ------- dict This dict has the following structure:: { 'region': str, 'number': int, 'orientation': "3'" or "5'", 'name': str } These fields are parsed from the primer name. Notes ----- You can write other name parsers to accommodate different primer naming conventions. """ pieces = name.split('_') region = pieces[2].split('-')[0] if len(pieces[4].split(':')[0].split('|')) == 1: raise ValueError('dblalt primer name scheme violation') number = int(pieces[4].split(':')[0].split('|')[0]) if pieces[3].split('-')[0] in ['FOR', 'LFOR']: orientation = "3'" strand = '+' elif pieces[3].split('-')[0] in ['REV', 'LREV']: orientation = "5'" strand = '-' else: raise ValueError('dblalt primer name scheme violation') corrected_name = name.split(':')[0]+':'+name.split(':')[1] return {'region' : region, 'number' : number, 'orientation': orientation, 'strand' : strand, 'name' : corrected_name}
[docs]def default_bin_parser(name): """ The default bin name parser. Parameters --------- name : str The name of the bin found in the appropriate column of the bin bedfile. Returns ------- dict This dict has the following structure:: { 'region': str, 'index': int } These fields are parsed from the bin name. Notes ----- You can write other name parsers to accommodate different bin naming conventions. """ pieces = name.split('_') region = pieces[0] if pieces[1] != 'BIN': raise ValueError('default bin name scheme violation') index = int(pieces[2]) return {'region': region, 'index': index}