iterative/dvc

View on GitHub
dvc/commands/stage.py

Summary

Maintainability
B
4 hrs
Test Coverage
import argparse
import logging
from itertools import chain, filterfalse
from typing import TYPE_CHECKING, Dict, Iterable, List

from dvc.cli import completion
from dvc.cli.command import CmdBase
from dvc.cli.utils import append_doc_link, fix_subparsers
from dvc.utils.cli_parse import parse_params
from dvc.utils.humanize import truncate_text

if TYPE_CHECKING:
    from dvc.output import Output
    from dvc.stage import Stage

logger = logging.getLogger(__name__)

MAX_TEXT_LENGTH = 80


def generate_description(stage: "Stage") -> str:
    def part_desc(outs: Iterable["Output"]) -> str:
        return ", ".join(out.def_path for out in outs)

    if not stage.deps and not stage.outs:
        return "No outputs or dependencies"

    if not stage.outs and stage.deps:
        return "Depends on " + part_desc(stage.deps)

    def is_plot_or_metric(out: "Output"):
        return bool(out.plot) or bool(out.metric)

    desc: List[str] = []

    outs = list(filterfalse(is_plot_or_metric, stage.outs))
    if outs:
        desc.append("Outputs " + part_desc(outs))

    plots_and_metrics = list(filter(is_plot_or_metric, stage.outs))
    if plots_and_metrics:
        desc.append("Reports " + part_desc(plots_and_metrics))

    return "; ".join(desc)


def prepare_description(
    stage: "Stage", max_length: int = MAX_TEXT_LENGTH
) -> str:
    desc = stage.short_description() or generate_description(stage)
    return truncate_text(desc, max_length)


def prepare_stages_data(
    stages: Iterable["Stage"],
    description: bool = True,
    max_length: int = MAX_TEXT_LENGTH,
) -> Dict[str, str]:
    return {
        stage.addressing: prepare_description(stage, max_length=max_length)
        if description
        else ""
        for stage in stages
    }


class CmdStageList(CmdBase):
    def _get_stages(self) -> Iterable["Stage"]:
        if self.args.all:
            stages: List["Stage"] = self.repo.index.stages  # type: ignore
            logger.trace(  # type: ignore[attr-defined]
                "%d no. of stages found", len(stages)
            )
            return stages

        # removing duplicates while maintaining order
        collected = chain.from_iterable(
            self.repo.stage.collect(
                target=target, recursive=self.args.recursive, accept_group=True
            )
            for target in self.args.targets
        )
        return dict.fromkeys(collected).keys()

    def run(self):
        from dvc.ui import ui

        def log_error(relpath: str, exc: Exception):
            if self.args.fail:
                raise exc
            logger.debug("Stages from %s failed to load", relpath)

        # silence stage collection error by default
        self.repo.stage_collection_error_handler = log_error

        stages = self._get_stages()
        data = prepare_stages_data(stages, description=not self.args.name_only)
        ui.table(data.items())

        return 0


def parse_cmd(commands: List[str]) -> str:
    """
    We need to take into account two cases:

    - ['python code.py foo bar']: Used mainly with dvc as a library
    - ['echo', 'foo bar']: List of arguments received from the CLI

    The second case would need quoting, as it was passed through:
            dvc run echo "foo bar"
    """

    def quote_argument(arg: str):
        if not arg:
            return '""'
        if " " in arg and '"' not in arg:
            return f'"{arg}"'
        return arg

    if len(commands) < 2:
        return " ".join(commands)
    return " ".join(map(quote_argument, commands))


class CmdStageAdd(CmdBase):
    def run(self):
        kwargs = vars(self.args)
        kwargs.update(
            {
                "cmd": parse_cmd(kwargs.pop("command")),
                "params": parse_params(self.args.params),
            }
        )
        self.repo.stage.add(**kwargs)
        return 0


def _add_common_args(parser):
    parser.add_argument(
        "-f",
        "--force",
        action="store_true",
        default=False,
        help="Overwrite existing stage",
    )
    parser.add_argument(
        "-d",
        "--deps",
        action="append",
        default=[],
        help="Declare dependencies for reproducible cmd.",
        metavar="<path>",
    ).complete = completion.FILE
    parser.add_argument(
        "-p",
        "--params",
        action="append",
        default=[],
        help="Declare parameter to use as additional dependency.",
        metavar="[<filename>:]<params_list>",
    ).complete = completion.FILE
    parser.add_argument(
        "-o",
        "--outs",
        action="append",
        default=[],
        help="Declare output file or directory.",
        metavar="<filename>",
    ).complete = completion.FILE
    parser.add_argument(
        "-O",
        "--outs-no-cache",
        action="append",
        default=[],
        help="Declare output file or directory "
        "(do not put into DVC cache).",
        metavar="<filename>",
    ).complete = completion.FILE
    parser.add_argument(
        "-c",
        "--checkpoints",
        action="append",
        default=[],
        help="Declare checkpoint output file or directory for 'dvc exp run'. "
        "Not compatible with 'dvc repro'.",
        metavar="<filename>",
    ).complete = completion.FILE
    parser.add_argument(
        "--external",
        action="store_true",
        default=False,
        help="Allow outputs that are outside of the DVC repository.",
    )
    parser.add_argument(
        "--outs-persist",
        action="append",
        default=[],
        help="Declare output file or directory that will not be "
        "removed upon repro.",
        metavar="<filename>",
    )
    parser.add_argument(
        "--outs-persist-no-cache",
        action="append",
        default=[],
        help="Declare output file or directory that will not be "
        "removed upon repro (do not put into DVC cache).",
        metavar="<filename>",
    )
    parser.add_argument(
        "-m",
        "--metrics",
        action="append",
        default=[],
        help="Declare output metrics file.",
        metavar="<path>",
    )
    parser.add_argument(
        "-M",
        "--metrics-no-cache",
        action="append",
        default=[],
        help="Declare output metrics file (do not put into DVC cache).",
        metavar="<path>",
    )
    parser.add_argument(
        "--plots",
        action="append",
        default=[],
        help="Declare output plot file.",
        metavar="<path>",
    )
    parser.add_argument(
        "--plots-no-cache",
        action="append",
        default=[],
        help="Declare output plot file (do not put into DVC cache).",
        metavar="<path>",
    )
    parser.add_argument(
        "--live", help="Declare output as dvclive.", metavar="<path>"
    )
    parser.add_argument(
        "--live-no-cache",
        help="Declare output as dvclive (do not put into DVC cache).",
        metavar="<path>",
    )
    parser.add_argument(
        "--live-no-summary",
        action="store_true",
        default=False,
        help=argparse.SUPPRESS,
    )
    parser.add_argument(
        "--live-no-html",
        action="store_true",
        default=False,
        help="Signal dvclive logger to not produce training report.",
    )
    parser.add_argument(
        "-w",
        "--wdir",
        help="Directory within your repo to run your command in.",
        metavar="<path>",
    )
    parser.add_argument(
        "--always-changed",
        action="store_true",
        default=False,
        help="Always consider this DVC-file as changed.",
    )
    parser.add_argument(
        "--desc",
        type=str,
        metavar="<text>",
        help=(
            "User description of the stage (optional). "
            "This doesn't affect any DVC operations."
        ),
    )
    parser.add_argument(
        "command",
        nargs=argparse.REMAINDER,
        help="Command to execute.",
        metavar="command",
    )


def add_parser(subparsers, parent_parser):
    STAGES_HELP = "Commands to list and create stages."

    stage_parser = subparsers.add_parser(
        "stage",
        parents=[parent_parser],
        description=append_doc_link(STAGES_HELP, "stage"),
        help=STAGES_HELP,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    stage_subparsers = stage_parser.add_subparsers(
        dest="cmd",
        help="Use `dvc stage CMD --help` to display command-specific help.",
    )

    fix_subparsers(stage_subparsers)

    STAGE_ADD_HELP = "Create stage"
    stage_add_parser = stage_subparsers.add_parser(
        "add",
        parents=[parent_parser],
        description=append_doc_link(STAGE_ADD_HELP, "stage/add"),
        help=STAGE_ADD_HELP,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    stage_add_parser.add_argument(
        "-n", "--name", help="Name of the stage to add", required=True
    )
    _add_common_args(stage_add_parser)
    stage_add_parser.set_defaults(func=CmdStageAdd)

    STAGE_LIST_HELP = "List stages."
    stage_list_parser = stage_subparsers.add_parser(
        "list",
        parents=[parent_parser],
        description=append_doc_link(STAGE_LIST_HELP, "stage/list"),
        help=STAGE_LIST_HELP,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    stage_list_parser.add_argument(
        "targets",
        nargs="*",
        default=["dvc.yaml"],
        help=(
            "Show stages from a dvc.yaml/.dvc file or a directory. "
            "'dvc.yaml' by default"
        ),
    )
    stage_list_parser.add_argument(
        "--all",
        action="store_true",
        default=False,
        help="List all of the stages in the repo.",
    )
    stage_list_parser.add_argument(
        "--fail",
        action="store_true",
        default=False,
        help="Fail immediately, do not suppress any syntax errors.",
    )
    stage_list_parser.add_argument(
        "-R",
        "--recursive",
        action="store_true",
        default=False,
        help="List all stages inside the specified directory.",
    )
    stage_list_parser.add_argument(
        "--name-only",
        "--names-only",
        action="store_true",
        default=False,
        help="List only stage names.",
    )
    stage_list_parser.set_defaults(func=CmdStageList)