Source code for lib5c.writers.counts

"""
Module for writing .counts files.
"""

import numpy as np

from lib5c.util.primers import aggregate_primermap
from lib5c.util.system import check_outdir


[docs]def write_counts(counts, outfile, primermap, skip_zeros=False): """ Writes a standard counts file. Parameters ---------- counts : dict of 2d arrays The counts to be written. The keys are the region names. The values are the arrays of counts values for that region. These arrays should be square and symmetric. outfile : str String reference to the file to write counts to. primermap : dict of lists of dicts The keys of the outer dict are region names. The values are lists, where the ``i`` th entry represents the ``i`` th primer or bin in that region. Primers or bins are represented as dicts with the following structure:: { 'chrom' : str, 'start' : int, 'end' : int, 'name' : str, 'orientation': "3'" or "5'" } The strand key is optional and only makes sense when writing primer-primer interaction counts. If present, impossible primer-primer combinations will be omitted from the output. See ``lib5c.parsers.primers.get_primermap()`` or ``lib5c.parsers.primers.get_pixelmap()``. skip_zeros : bool Pass True to omit writing output lines for bin-bin pairs with zero, nan, or empty string value. """ check_outdir(outfile) if outfile.endswith('.npz'): np.savez(outfile, **counts) return with open(outfile, 'w') as handle: for region in counts: for i in range(len(counts[region])): for j in range(i+1): # skip impossible primer-primer combinations if ('orientation' in primermap[region][i].keys() and 'orientation' in primermap[region][j].keys() and primermap[region][i]['orientation'] == primermap[region][j]['orientation']): continue value = counts[region][i, j] # skip zeros if skip_zeros and (not value or (isinstance(value, float) and np.isnan(value))): continue handle.write('%s\t%s\t%s\n' % (primermap[region][i]['name'], primermap[region][j]['name'], value))
[docs]def write_cis_trans_counts(counts, outfile, primermap, omit_zeros=True): """ Writes a counts file including both the cis and trans counts. Parameters ---------- counts : 2d numpy array The square, symmetric array of counts to be written. This must be a single matrix that contains the cis contacts for all regions as well as the trans contacts between them. The rows of the matrix must correspond to genomic loci in order of increasing genomic coordinate. outfile : str String reference to the file to write counts to. primermap : dict of lists of dicts The keys of the outer dict are region names. The values are lists, where the ``i`` th entry represents the ``i`` th primer or bin in that region. Primers or bins are represented as dicts with the following structure:: { 'chrom' : str, 'start' : int, 'end' : int, 'name' : str, 'orientation': "3'" or "5'" } The strand key is optional and only makes sense when writing primer-primer interaction counts. If present, impossible primer-primer combinations will be omitted from the output. See ``lib5c.parsers.primers.get_primermap()`` or ``lib5c.parsers.primers.get_pixelmap()``. omit_zeros : bool If True, lines will not be written to the outfile if the counts for that line are zero. """ check_outdir(outfile) aggregated_primermap = aggregate_primermap(primermap) with open(outfile, 'w') as handle: for i in range(len(aggregated_primermap)): for j in range(i+1): # skip impossible primer-primer combinations if ('orientation' in aggregated_primermap[i].keys() and 'orientation' in aggregated_primermap[j].keys()): if aggregated_primermap[i]['orientation'] == \ aggregated_primermap[j]['orientation']: continue # skip zeros if omit_zeros and counts[i, j] == 0: continue # write line handle.write('%s\t%s\t%s\n' % (aggregated_primermap[i]['name'], aggregated_primermap[j]['name'], counts[i, j]))
# test client
[docs]def main(): from lib5c.parsers.primers import load_primermap from lib5c.parsers.counts import load_counts pixelmap = load_primermap('test/bins.bed') counts = load_counts('test/test.counts', pixelmap) # pvalues = load_counts('test/pvalues.counts') write_counts(counts, 'test/test_written.counts', pixelmap) primermap = load_primermap('test/primers.bed') counts = load_counts('test/test_raw.counts', primermap) write_counts(counts, 'test/test_raw_cis.counts', primermap)
if __name__ == "__main__": main()