Source code for lib5c.tools.hic_extract

[docs]def add_hic_extract_tool(parser): hic_extract_parser = parser.add_parser( 'hic-extract', prog='lib5c hic-extract', help='extract chunks from Hi-C data' ) hic_extract_parser.add_argument( 'matrix', type=str, help='''Path to contact matrix file. %%c will be replaced by the chromosome name if multiple files are necessary.''') hic_extract_parser.add_argument( 'range', type=str, help='''Genomic range to extract, in the form 'chrom:start-end' Pass a path to a tab-separated file whose columns are region names and ranges to extract multiple named ranges.''') hic_extract_parser.add_argument( 'output_countsfile', type=str, help='''Path to write extracted counts to.''') hic_extract_parser.add_argument( 'output_bedfile', type=str, help='''Path to write information about extracted bins to.''') hic_extract_parser.add_argument( '-b', '--bias_vector_file', type=str, help='''Path to file containing bias vector that counts will be divided by before being written. %%c will be replaced by the chromosome name if multiple files are necessary.''') hic_extract_parser.set_defaults(func=hic_extract_tool)
[docs]def parse_range_string(range_string): chrom, start_end = range_string.split(':') start, end = start_end.split('-') return {'chrom': chrom, 'start': int(start), 'end': int(end)}
[docs]def hic_extract_tool(parser, args): from lib5c.parsers.hic import load_range_from_contact_matrix from lib5c.writers.counts import write_counts from lib5c.writers.primers import write_primermap # resolve ranges if ':' in args.range: ranges = {'unnamed_region': parse_range_string(args.range)} else: with open(args.range, 'r') as handle: ranges = {} for line in handle: if line.startswith('#'): continue pieces = line.strip().split('\t') ranges[pieces[0]] = parse_range_string(pieces[1]) # resolve matrix matrices = {region: args.matrix.replace(r'%c', ranges[region]['chrom']) for region in ranges} # resolve bias_vectors bias_vectors = None if args.bias_vector_file is not None: bias_vectors = { region: args.bias_vector_file.replace('%c', ranges[region]['chrom']) for region in ranges } # resolve region names region_names = {region: region for region in ranges} # parse counts, pixelmap = load_range_from_contact_matrix( matrices, ranges, region_name=region_names, norm_file=bias_vectors) # write write_counts(counts, args.output_countsfile, pixelmap) write_primermap(pixelmap, args.output_bedfile)