import argparse
from methods import Method
from models import Phenotype, Experiment
from .parser import Parser, Argument
from .types import Slice, one_of, Indices, dsv, Range
from .method_parser import MethodParser
[docs]class PhenotypeFactory(Parser):
"""Provide {parser_name} samples. Requires a file (or files) with samples.
The files should come in Delimiter Separated Values format
(like .csv or .tsv). The default delimiter is a tab character.
The first column of each file should contain gene identifiers.
To use only a subset of samples from files(s) specify column numbers
(--columns) or sample names (--samples) of desired samples.
"""
files = Argument(
type=argparse.FileType('r'),
# at least one file is always required
nargs='+',
optional=False
)
name = Argument(help='Your custom name for this set of samples.')
samples = Argument(
type=dsv(str),
nargs='*',
as_many_as=files,
help='Names of samples (columns) to be extracted from the file. '
'Sample names are determined from the first non-empty row. '
'Use a comma to separate samples. '
'Samples for each of files should be separated by space.'
)
columns = Argument(
# we want to handle either ":4", "5:" or even "1,2,3"
type=one_of(Slice, Indices, Range),
# user may (but do not have to) specify columns
# to be extracted from given file(s).
nargs='*',
as_many_as=files,
help='Columns to be extracted from files: '
'either a comma delimited list of 0-based numbers (e.g. 0,2,3) '
'or a range defined using Python slice notation (e.g. 3:10). '
'Columns for each of files should be separated by space.'
)
delimiter = Argument(
default='\t',
help='Delimiter of the provided file(s). Default: tabulation mark.'
)
header = Argument(
nargs='*',
type=one_of(int, str),
as_many_as=files,
default=lambda file_object: 0,
help='Defines how the sample names should be created. '
'Provide a number to specify which line should be used '
'to extract names for samples. Please remember that '
'empty lines will be skipped. If your file has no row '
'with sample names, provide a string to be used as a '
'prefix for naming consecutive samples. '
'For example, `--header cancer` will lead to naming '
'all relevant samples like: cancer_1, cancer_2, etc. '
'Default: create sample names from first non-empty '
'line in the file.'
)
description_column = Argument(
short='d',
action='store_true',
help='Enable this switch, if there is a column with columns '
'descriptions (the column has to be on position two, '
'i.e. immediately after gene identifiers). By default '
'it is assumed that there is no such column.'
)
[docs] def produce(self, unknown_args=None):
opts = self.namespace
name = opts.name or self.name
if opts.files:
# load all files
sample_collections = []
if callable(opts.header):
opts.header = [opts.header(f) for f in opts.files]
for i, file_obj in enumerate(opts.files):
use_header = isinstance(opts.header[i], int)
sample_collections.append(
Phenotype.from_file(
f'Sample collection, part {i} of {name}',
file_obj,
columns_selector=opts.columns[i].get_iterator if opts.columns else None,
samples=opts.samples[i] if opts.samples else None,
reverse_selection=getattr(opts, 'reverse', False),
delimiter=opts.delimiter,
header_line=opts.header[i] if use_header else None,
use_header=use_header,
prefix=opts.header[i] if not use_header else None,
description_column=opts.description_column
)
)
opts.phenotype = sum(sample_collections, Phenotype(name))
return opts
[docs]class SingleFileExperimentFactory(Parser):
"""Provide both: case and control samples from a single file.
This is just a shortcut for specifying the same file for both:
case and control samples sets. You have to provide --case or
--control (or both) to specify which columns contain controls.
If you specify only one of --case and --control, it will be
assumed that all other columns should be used for the other
set of samples (if you use `--case 0,1,2` and your file has
five columns with samples, then columns three and four will
be used to create control samples).
To enable more advanced features, please use `control`&`case`
options (instead of the currently selected `data` sub-parser).
"""
# exactly one file is required
files = Argument(
type=argparse.FileType('r'),
nargs=1, # transforms result into a single-element list
optional=False,
help='file with samples for both control and cases.'
)
case = Argument(
type=one_of(Slice, Indices, Range),
nargs=1,
help='columns from which case samples should be extracted.'
)
control = Argument(
type=one_of(Slice, Indices, Range),
nargs=1,
help='columns from which control samples should be extracted.',
)
[docs] def produce(self, unknown_args=None):
opts = self.namespace
def produce_phenotype(created_group, other_group):
reverse = hasattr(opts, 'reverse_' + created_group)
get_columns_from = created_group
if reverse:
get_columns_from = other_group
return PhenotypeFactory(
name=created_group,
files=opts.files,
columns=getattr(opts, get_columns_from),
reverse=reverse
).produce()
if opts.files:
if not (opts.case and opts.control):
if opts.case:
opts.reverse_control = True
elif opts.control:
opts.reverse_case = True
else:
raise ValueError(
'Neither --case nor --control provided: '
'please specify which columns should be used as control '
'and which should be used as the case.'
)
phenotypes = {'control': produce_phenotype('control', 'case')}
# reuse the same file(s)
for f in opts.files:
f.seek(0)
phenotypes['case'] = produce_phenotype('case', 'control')
for name, phenotype in phenotypes.items():
setattr(opts, name, phenotype)
return opts
[docs]class CLIExperiment(Parser):
"""Use both: case and control or data to create an Experiment."""
pull_to_namespace_above = True
control = PhenotypeFactory()
case = PhenotypeFactory()
data = SingleFileExperimentFactory()
[docs] def produce(self, unknown_args=None):
opts = self.namespace
if opts.data:
if opts.control or opts.case:
raise ValueError('Cannot handle data and case/control at once')
opts.case = self.data.namespace.case
opts.control = self.data.namespace.control
elif opts.case and opts.control:
# that's nice :)
pass
elif opts.case:
raise ValueError('Control has not been provided!')
elif opts.control:
raise ValueError('Case has not been provided!')
else:
raise ValueError('Neither data nor (case & control) have been provided!')
del opts.data
opts.experiment = Experiment(opts.case.phenotype, opts.control.phenotype)
return opts
[docs]class CLI(Parser):
"""The main parser, the one exposed directly to the user."""
method_name = Argument(choices=Method.members, name='method', optional=False)
experiment = CLIExperiment()
@staticmethod
def create_method(name):
# first - take an appropriate method class
method = Method.members[name]
# initialize parser for this method
# (different methods require different arguments)
method_parser = MethodParser(method=method)
return method_parser
[docs] def parse_args(self, args):
help_args = {'-h', '--help'}
if help_args.intersection(args):
args_without_help = [
arg
for arg in args
if arg not in help_args
]
if len(args_without_help) != 0:
name = args_without_help[0]
# in case of a conflict, help for both (for a sub-parser
# and for a method) should be displayed.
methods = {
name: MethodParser(method=method)
for name, method in Method.members.items()
}
def match_parser(subparsers):
return subparsers.get(name, None)
all_subparsers = [methods, self.subparsers, self.lifted_parsers]
for parser in filter(bool, map(match_parser, all_subparsers)):
return parser.parse_args(args_without_help[1:] + ['-h'])
return super().parse_args(args)
[docs] def produce(self, unknown_args):
options = self.namespace
method_parser = self.create_method(options.method)
# parse arguments
method_options, remaining_unknown_args = method_parser.parse_known_args(unknown_args)
for argument in unknown_args[:]:
if argument not in remaining_unknown_args:
unknown_args.remove(argument)
# ant initialize the method with these arguments
options.method = method_parser.method(**vars(method_options))
return options