Source code for lib5c.parsers.table

"""
Module for parsing table files, which function as a simple extension of .counts
files to multiple replicates.
"""

import numpy as np

from lib5c.parsers.util import parse_field
from lib5c.util.table import make_fflj_id_map


[docs]def load_table(filename, primermap, sep='\t', dtype=float): """ Loads a table into a counts_superdict structure. Parameters ---------- filename : str The table file to load. primermap : primermap or pixelmap Defines the FFLJs or bin-bin pairs. sep : str The separator used in the table file. dtype : numpy-compatible dtype The dtype to use when constructing the arrays in the counts_superdict. Returns ------- counts_superdict The loaded counts_superdict. """ # prepare fflj_id_map fflj_id_map = make_fflj_id_map(primermap) with open(filename, 'r') as handle: # get replicates from header replicates = [part.strip() for part in handle.readline().split(sep)[1:]] # initialize counts_superdict counts_superdict = {rep: {region: np.zeros((len(primermap[region]), len(primermap[region])), dtype=dtype) for region in primermap} for rep in replicates} # process remaining lines for line in handle: pieces = line.strip().split(sep) region, i, j = fflj_id_map[pieces[0]] for k in range(len(replicates)): parsed_value = parse_field(pieces[k + 1]) counts_superdict[replicates[k]][region][i, j] = parsed_value counts_superdict[replicates[k]][region][j, i] = parsed_value return counts_superdict