john-kurkowski/tldextract

View on GitHub
tldextract/cli.py

Summary

Maintainability
A
1 hr
Test Coverage
"""tldextract CLI."""

import argparse
import dataclasses
import json
import logging
import os.path
import pathlib
import sys

from ._version import version as __version__
from .tldextract import TLDExtract


def main() -> None:
    """Tldextract CLI main command."""
    logging.basicConfig()

    parser = argparse.ArgumentParser(
        prog="tldextract", description="Parse hostname from a url or fqdn"
    )

    parser.add_argument(
        "--version", action="version", version="%(prog)s " + __version__
    )
    parser.add_argument(
        "-j",
        "--json",
        default=False,
        action="store_true",
        help="output in json format",
    )
    parser.add_argument(
        "input", metavar="fqdn|url", type=str, nargs="*", help="fqdn or url"
    )

    parser.add_argument(
        "-u",
        "--update",
        default=False,
        action="store_true",
        help="force fetch the latest TLD definitions",
    )
    parser.add_argument(
        "--suffix_list_url",
        action="append",
        required=False,
        help="use an alternate URL or local file for TLD definitions",
    )
    parser.add_argument(
        "-c", "--cache_dir", help="use an alternate TLD definition caching folder"
    )
    parser.add_argument(
        "-p",
        "--include_psl_private_domains",
        "--private_domains",
        default=False,
        action="store_true",
        help="Include private domains",
    )
    parser.add_argument(
        "--no_fallback_to_snapshot",
        default=True,
        action="store_false",
        dest="fallback_to_snapshot",
        help="Don't fall back to the package's snapshot of the suffix list",
    )

    args = parser.parse_args()

    obj_kwargs = {
        "include_psl_private_domains": args.include_psl_private_domains,
        "fallback_to_snapshot": args.fallback_to_snapshot,
    }

    if args.cache_dir:
        obj_kwargs["cache_dir"] = args.cache_dir

    if args.suffix_list_url is not None:
        suffix_list_urls = []
        for source in args.suffix_list_url:
            if os.path.isfile(source):
                as_path_uri = pathlib.Path(os.path.abspath(source)).as_uri()
                suffix_list_urls.append(as_path_uri)
            else:
                suffix_list_urls.append(source)

        obj_kwargs["suffix_list_urls"] = suffix_list_urls

    tld_extract = TLDExtract(**obj_kwargs)

    if args.update:
        tld_extract.update(True)
    elif not args.input:
        parser.print_usage()
        sys.exit(1)

    for i in args.input:
        ext = tld_extract(i)
        if args.json:
            properties = ("fqdn", "ipv4", "ipv6", "registered_domain")
            print(
                json.dumps(
                    {
                        **dataclasses.asdict(ext),
                        **{prop: getattr(ext, prop) for prop in properties},
                    }
                )
            )
        else:
            print(f"{ext.subdomain} {ext.domain} {ext.suffix}")