Source code for lib5c.parsers.primer_names

"""
Module providing helper functions for working with primer naming conventions,
necessary for parsing certain primerfiles.
"""


[docs]def guess_primer_name_parser(name):
    """
    Guesses the appropriate primer or bin name parser to use by looping through
    a list of possible parsers and testing if they work on a given primer name.

    Parameters
    ----------
    name : str
        The name of a primer to use for testing.

    Returns
    -------
    function
        The parser thought to be appropriate for this kind of primer name.
    """
    parsers = [dblalt_primer_parser, default_primer_parser, default_bin_parser]
    for parser in parsers:
        try:
            parser(name)
            return parser
        except (ValueError, IndexError):
            pass


[docs]def default_primer_parser(name):
    """
    The default primer name parser.

    Parameters
    ---------
    name : str
        The name of the primer found in the appropriate column of the primer
        bedfile.

    Returns
    -------
    dict
        This dict has the following structure::

            {
                'region': str,
                'number': int,
                'name': str
            }

        These fields are parsed from the primer name.

    Notes
    -----
    You can write other name parsers to accommodate different primer naming
    conventions.
    """
    pieces = name.split('_')
    region = pieces[2]
    number = int(pieces[4].split(':')[0])
    if pieces[3] == 'FOR':
        orientation = "3'"
        strand = '+'
    elif pieces[3] == 'REV':
        orientation = "5'"
        strand = '-'
    else:
        raise ValueError('default primer name scheme violation')
    corrected_name = name.split(':')[0]
    return {'region'     : region,
            'number'     : number,
            'orientation': orientation,
            'strand'     : strand,
            'name'       : corrected_name}


[docs]def dblalt_primer_parser(name):
    """
    The double alternating primer name parser.

    Parameters
    ---------
    name : str
        The name of the primer found in the appropriate column of the primer
        bedfile.

    Returns
    -------
    dict
        This dict has the following structure::

            {
                'region': str,
                'number': int,
                'orientation': "3'" or "5'",
                'name': str
            }

        These fields are parsed from the primer name.

    Notes
    -----
    You can write other name parsers to accommodate different primer naming
    conventions.
    """
    pieces = name.split('_')
    region = pieces[2].split('-')[0]
    if len(pieces[4].split(':')[0].split('|')) == 1:
        raise ValueError('dblalt primer name scheme violation')
    number = int(pieces[4].split(':')[0].split('|')[0])
    if pieces[3].split('-')[0] in ['FOR', 'LFOR']:
        orientation = "3'"
        strand = '+'
    elif pieces[3].split('-')[0] in ['REV', 'LREV']:
        orientation = "5'"
        strand = '-'
    else:
        raise ValueError('dblalt primer name scheme violation')
    corrected_name = name.split(':')[0]+':'+name.split(':')[1]

    return {'region'     : region,
            'number'     : number,
            'orientation': orientation,
            'strand'     : strand,
            'name'       : corrected_name}


[docs]def default_bin_parser(name):
    """
    The default bin name parser.

    Parameters
    ---------
    name : str
        The name of the bin found in the appropriate column of the bin bedfile.

    Returns
    -------
    dict
        This dict has the following structure::

            {
                'region': str,
                'index': int
            }

        These fields are parsed from the bin name.

    Notes
    -----
    You can write other name parsers to accommodate different bin naming
    conventions.
    """
    pieces = name.split('_')
    region = pieces[0]
    if pieces[1] != 'BIN':
        raise ValueError('default bin name scheme violation')
    index = int(pieces[2])
    return {'region': region,
            'index': index}