Source code for lib5c.tools.kr
from lib5c.tools.parents import simple_in_out_parser, parallelization_parser, \
primerfile_parser
[docs]def add_kr_tool(parser):
kr_parser = parser.add_parser(
'kr',
prog='lib5c kr',
help='knight-ruiz matrix balancing normalization',
parents=[primerfile_parser, simple_in_out_parser,
parallelization_parser]
)
kr_parser.add_argument(
'-B', '--output_bias',
action='store_true',
help='''If this flag is present, the bias vectors will be written to
.bias files located next to the output .counts files.''')
kr_parser.add_argument(
'-i', '--max_iterations',
type=int,
default=3000,
help='''Maximum number of iterations. The default is 3000.''')
kr_parser.add_argument(
'-s', '--imputation_size',
type=int,
default=0,
help='''Size of window, in units of matrix indices, to use to impute nan
values in the original counts matrix. Pass 0 to skip imputation, which
is the default behavior.''')
kr_parser.set_defaults(func=kr_tool)
[docs]def kr_tool(parser, args):
import os
from lib5c.tools.helpers import resolve_primerfile, resolve_parallel
from lib5c.algorithms.knight_ruiz import kr_balance_matrix
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.writers.counts import write_counts
from lib5c.writers.bias import write_cis_bias_vector
# resolve parallel and primerfile
resolve_parallel(parser, args, subcommand='kr')
primerfile = resolve_primerfile(args.infile, args.primerfile)
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
counts = load_counts(args.infile, primermap)
# express normalize
print('kr balancing')
balanced_counts, bias_vectors, errs = kr_balance_matrix(
counts, max_iter=args.max_iterations,
imputation_size=args.imputation_size)
# write counts
print('writing counts')
write_counts(balanced_counts, args.outfile, primermap)
# write bias vector
if args.output_bias:
print('writing bias vector')
write_cis_bias_vector(bias_vectors, primermap, '%s.bias' %
os.path.splitext(args.outfile)[0])