Source code for command_line.main

import argparse

from methods import Method
from models import Phenotype, Experiment

from .parser import Parser, Argument
from .types import Slice, one_of, Indices, dsv, Range
from .method_parser import MethodParser


[docs]class PhenotypeFactory(Parser):
    """Provide {parser_name} samples. Requires a file (or files) with samples.

     The files should come in Delimiter Separated Values format
     (like .csv or .tsv). The default delimiter is a tab character.
     The first column of each file should contain gene identifiers.

     To use only a subset of samples from files(s) specify column numbers
     (--columns) or sample names (--samples) of desired samples.
     """

    files = Argument(
        type=argparse.FileType('r'),
        # at least one file is always required
        nargs='+',
        optional=False
    )

    name = Argument(help='Your custom name for this set of samples.')

    samples = Argument(
        type=dsv(str),
        nargs='*',
        as_many_as=files,
        help='Names of samples (columns) to be extracted from the file. '
             'Sample names are determined from the first non-empty row. '
             'Use a comma to separate samples. '
             'Samples for each of files should be separated by space.'
    )

    columns = Argument(
        # we want to handle either ":4", "5:" or even "1,2,3"
        type=one_of(Slice, Indices, Range),
        # user may (but do not have to) specify columns
        # to be extracted from given file(s).
        nargs='*',
        as_many_as=files,
        help='Columns to be extracted from files: '
             'either a comma delimited list of 0-based numbers (e.g. 0,2,3) '
             'or a range defined using Python slice notation (e.g. 3:10). '
             'Columns for each of files should be separated by space.'
    )

    delimiter = Argument(
        default='\t',
        help='Delimiter of the provided file(s). Default: tabulation mark.'
    )

    header = Argument(
        nargs='*',
        type=one_of(int, str),
        as_many_as=files,
        default=lambda file_object: 0,
        help='Defines how the sample names should be created. '
             'Provide a number to specify which line should be used '
             'to extract names for samples. Please remember that '
             'empty lines will be skipped. If your file has no row '
             'with sample names, provide a string to be used as a '
             'prefix for naming consecutive samples. '
             'For example, `--header cancer` will lead to naming '
             'all relevant samples like: cancer_1, cancer_2, etc. '
             'Default: create sample names from first non-empty '
             'line in the file.'
    )

    description_column = Argument(
        short='d',
        action='store_true',
        help='Enable this switch, if there is a column with columns '
             'descriptions (the column has to be on position two, '
             'i.e. immediately after gene identifiers). By default '
             'it is assumed that there is no such column.'
    )

[docs]    def produce(self, unknown_args=None):
        opts = self.namespace
        name = opts.name or self.name

        if opts.files:
            # load all files
            sample_collections = []

            if callable(opts.header):
                opts.header = [opts.header(f) for f in opts.files]

            for i, file_obj in enumerate(opts.files):

                use_header = isinstance(opts.header[i], int)

                sample_collections.append(
                    Phenotype.from_file(
                        f'Sample collection, part {i} of {name}',
                        file_obj,
                        columns_selector=opts.columns[i].get_iterator if opts.columns else None,
                        samples=opts.samples[i] if opts.samples else None,
                        reverse_selection=getattr(opts, 'reverse', False),
                        delimiter=opts.delimiter,
                        header_line=opts.header[i] if use_header else None,
                        use_header=use_header,
                        prefix=opts.header[i] if not use_header else None,
                        description_column=opts.description_column
                    )
                )

            opts.phenotype = sum(sample_collections, Phenotype(name))
        return opts


[docs]class SingleFileExperimentFactory(Parser):
    """Provide both: case and control samples from a single file.

    This is just a shortcut for specifying the same file for both:
    case and control samples sets. You have to provide --case or
    --control (or both) to specify which columns contain controls.

    If you specify only one of --case and --control, it will be
    assumed that all other columns should be used for the other
    set of samples (if you use `--case 0,1,2` and your file has
    five columns with samples, then columns three and four will
    be used to create control samples).

    To enable more advanced features, please use `control`&`case`
    options (instead of the currently selected `data` sub-parser).
    """

    # exactly one file is required
    files = Argument(
        type=argparse.FileType('r'),
        nargs=1,    # transforms result into a single-element list
        optional=False,
        help='file with samples for both control and cases.'
    )
    case = Argument(
        type=one_of(Slice, Indices, Range),
        nargs=1,
        help='columns from which case samples should be extracted.'
    )
    control = Argument(
        type=one_of(Slice, Indices, Range),
        nargs=1,
        help='columns from which control samples should be extracted.',
    )

[docs]    def produce(self, unknown_args=None):

        opts = self.namespace

        def produce_phenotype(created_group, other_group):
            reverse = hasattr(opts, 'reverse_' + created_group)
            get_columns_from = created_group
            if reverse:
                get_columns_from = other_group

            return PhenotypeFactory(
                name=created_group,
                files=opts.files,
                columns=getattr(opts, get_columns_from),
                reverse=reverse
            ).produce()

        if opts.files:
            if not (opts.case and opts.control):
                if opts.case:
                    opts.reverse_control = True
                elif opts.control:
                    opts.reverse_case = True
                else:
                    raise ValueError(
                        'Neither --case nor --control provided: '
                        'please specify which columns should be used as control '
                        'and which should be used as the case.'
                    )

            phenotypes = {'control': produce_phenotype('control', 'case')}
            # reuse the same file(s)
            for f in opts.files:
                f.seek(0)
            phenotypes['case'] = produce_phenotype('case', 'control')

            for name, phenotype in phenotypes.items():
                setattr(opts, name, phenotype)

        return opts


[docs]class CLIExperiment(Parser):
    """Use both: case and control or data to create an Experiment."""

    pull_to_namespace_above = True

    control = PhenotypeFactory()
    case = PhenotypeFactory()
    data = SingleFileExperimentFactory()

[docs]    def produce(self, unknown_args=None):

        opts = self.namespace
        if opts.data:
            if opts.control or opts.case:
                raise ValueError('Cannot handle data and case/control at once')

            opts.case = self.data.namespace.case
            opts.control = self.data.namespace.control
        elif opts.case and opts.control:
            # that's nice :)
            pass
        elif opts.case:
            raise ValueError('Control has not been provided!')
        elif opts.control:
            raise ValueError('Case has not been provided!')
        else:
            raise ValueError('Neither data nor (case & control) have been provided!')

        del opts.data

        opts.experiment = Experiment(opts.case.phenotype, opts.control.phenotype)

        return opts


[docs]class CLI(Parser):
    """The main parser, the one exposed directly to the user."""

    method_name = Argument(choices=Method.members, name='method', optional=False)
    experiment = CLIExperiment()

    @staticmethod
    def create_method(name):
        # first - take an appropriate method class
        method = Method.members[name]

        # initialize parser for this method
        # (different methods require different arguments)
        method_parser = MethodParser(method=method)

        return method_parser

[docs]    def parse_args(self, args):
        help_args = {'-h', '--help'}

        if help_args.intersection(args):
            args_without_help = [
                arg
                for arg in args
                if arg not in help_args
            ]

            if len(args_without_help) != 0:

                name = args_without_help[0]

                # in case of a conflict, help for both (for a sub-parser
                # and for a method) should be displayed.

                methods = {
                    name: MethodParser(method=method)
                    for name, method in Method.members.items()
                }

                def match_parser(subparsers):
                    return subparsers.get(name, None)

                all_subparsers = [methods, self.subparsers, self.lifted_parsers]

                for parser in filter(bool, map(match_parser, all_subparsers)):
                    return parser.parse_args(args_without_help[1:] + ['-h'])

        return super().parse_args(args)

[docs]    def produce(self, unknown_args):
        options = self.namespace

        method_parser = self.create_method(options.method)

        # parse arguments
        method_options, remaining_unknown_args = method_parser.parse_known_args(unknown_args)

        for argument in unknown_args[:]:
            if argument not in remaining_unknown_args:
                unknown_args.remove(argument)

        # ant initialize the method with these arguments
        options.method = method_parser.method(**vars(method_options))

        return options
Source code for command_line.main

Pathway Analysis

Navigation

Related Topics