Source code for lib5c.tools.outliers
from lib5c.tools.parents import level_parser, simple_in_out_parser, \
parallelization_parser, primerfile_parser
[docs]def add_outliers_tool(parser):
outliers_parser = parser.add_parser(
'outliers',
prog='lib5c outliers',
help='remove high spatial outliers',
parents=[primerfile_parser, level_parser, simple_in_out_parser,
parallelization_parser]
)
outliers_parser.add_argument(
'-f', '--fold_threshold',
type=float,
default=8.0,
help='''Remove any interaction whose fold change relative to the median
of its local neighborhood would be greater than this value after
balancing. The default is 8.0.''')
outliers_parser.add_argument(
'-w', '--window_size',
type=int,
default=5,
help='''The size of the window to use to define the local neighborhood.
This value should be an odd integer. The default is 5.''')
outliers_parser.add_argument(
'-o', '--overwrite_value',
type=str,
choices=['nan', 'median', 'zero'],
default='nan',
help='''This flag specifies what value will be used to overwrite the
high outliers. The default is 'nan'.''')
outliers_parser.set_defaults(func=outliers_tool)
[docs]def outliers_tool(parser, args):
from lib5c.parsers.primers import load_primermap
from lib5c.parsers.counts import load_counts
from lib5c.writers.counts import write_counts
from lib5c.tools.helpers import resolve_parallel, resolve_level, \
resolve_primerfile
from lib5c.algorithms.outliers import remove_high_spatial_outliers
# resolve primerfile and parallel
resolve_parallel(parser, args, subcommand='outliers')
primerfile = resolve_primerfile(args.infile, args.primerfile)
# load counts
print('loading counts')
primermap = load_primermap(primerfile)
counts = load_counts(args.infile, primermap)
# remove outliers
processed_counts = remove_high_spatial_outliers(
counts, size=args.window_size, fold_threshold=args.fold_threshold,
overwrite_value=args.overwrite_value, primermap=primermap,
level=resolve_level(primermap, args.level))
# write counts
write_counts(processed_counts, args.outfile, primermap)