Source code for lib5c.parsers.table

"""
Module for parsing table files, which function as a simple extension of .counts
files to multiple replicates.
"""

import numpy as np

from lib5c.parsers.util import parse_field
from lib5c.util.table import make_fflj_id_map


[docs]def load_table(filename, primermap, sep='\t', dtype=float):
    """
    Loads a table into a counts_superdict structure.

    Parameters
    ----------
    filename : str
        The table file to load.
    primermap : primermap or pixelmap
        Defines the FFLJs or bin-bin pairs.
    sep : str
        The separator used in the table file.
    dtype : numpy-compatible dtype
        The dtype to use when constructing the arrays in the counts_superdict.

    Returns
    -------
    counts_superdict
        The loaded counts_superdict.
    """
    # prepare fflj_id_map
    fflj_id_map = make_fflj_id_map(primermap)

    with open(filename, 'r') as handle:
        # get replicates from header
        replicates = [part.strip() for part in handle.readline().split(sep)[1:]]

        # initialize counts_superdict
        counts_superdict = {rep: {region: np.zeros((len(primermap[region]),
                                                    len(primermap[region])),
                                                   dtype=dtype)
                                  for region in primermap}
                            for rep in replicates}

        # process remaining lines
        for line in handle:
            pieces = line.strip().split(sep)
            region, i, j = fflj_id_map[pieces[0]]
            for k in range(len(replicates)):
                parsed_value = parse_field(pieces[k + 1])
                counts_superdict[replicates[k]][region][i, j] = parsed_value
                counts_superdict[replicates[k]][region][j, i] = parsed_value

    return counts_superdict