whylabs/whylogs-python

View on GitHub
python/pyproject.toml

Summary

Maintainability
Test Coverage
[tool.poetry]
name = "whylogs"
version = "1.4.0"
description = "Profile and monitor your ML data pipeline end-to-end"
authors = ["WhyLabs.ai <support@whylabs.ai>"]
license = "Apache-2.0"
homepage = "https://docs.whylabs.ai"
readme = "DESCRIPTION.md"
include = ["whylogs/core/proto/v0/*.py*", "whylogs/core/proto/*.py*"]


[tool.poetry.dependencies]
# core dependencies. Be REALLY mindful when touching this list
python = ">=3.7.1, <4"
whylogs-sketching = ">=3.4.1.dev3"
protobuf = ">=3.19.4"
importlib-metadata = { version = "<4.3", python = "<3.8" }
typing-extensions = {version = ">=3.10", markers = "python_version < \"4\""}
whylabs-client = "^0.6.2"
requests = "^2.27"
types-requests = "^2.30.0.0"

# viz module. Everything after this should be optional
pybars3 = { version = "^0.9", optional = true }
ipython = { version = "*", optional = true }
scipy = [
  { version = ">=1.5", python = "<3.11", optional = true },
  { version = ">=1.9.2", python = ">=3.11", optional = true }
]
numpy = [
  { version = "*", python = "<3.11", optional = true },
  { version = ">=1.23.2", python = ">=3.11", optional = true }
]

# datasets module.
pandas = { version = "*", optional = true}

# Doc dependencies
sphinx = { version = "*", optional = true }
sphinx-autoapi = { version = "*", optional = true }
sphinx-copybutton = { version = "^0.5.0", optional = true }
myst-parser = { extras = ["sphinx"], version = "^0.17.2", optional = true }
furo = { version = "^2022.3.4", optional = true }
sphinx-autobuild = { version = "^2021.3.14", optional = true }
sphinxext-opengraph = { version = "^0.6.3", optional = true }
sphinx-inline-tabs = { version = "*", python = ">=3.8,<4", optional = true }
ipython_genutils = {version = "^0.2.0", optional = true}
nbsphinx = {version = "^0.8.9", optional = true}
nbconvert = {version = "^7.0.0", optional = true}

# Integrations dependencies
boto3 = {version = "^1.22.13", optional = true }
mlflow-skinny = [
  { version = "^2.5.0", python = ">=3.8", optional = true },
  { version = "<2.0.1", python = "<3.8", optional = true }
]
google-cloud-storage = {version = "^2.5.0", optional = true}

# Pyspark related dependencies
pyarrow = {version = ">=8.0.0, <13", optional = true}
pyspark = {version = "^3.0.0", optional = true}

# process logging dependencies
faster-fifo = {version = "^1.4.5", optional = true}
orjson = {version = "^3.8.10", optional = true}

# Image support related dependencies
Pillow = {version = "^9.2.0", optional = true}

# Embeddings support related dependencies
scikit-learn = [
  { version = "^1.0.2", python = "<3.11", optional = true },
  { version = ">=1.1.2, <2", python = ">=3.11", optional = true }
]

# Fugue related dependencies
fugue = {version = "^0.8.1", optional = true}
platformdirs = "^3.5.0"


[tool.poetry.extras]
docs = [
    "sphinx",
    "sphinx-autoapi",
    "sphinx-autobuild",
    "furo",
    "sphinx-copybutton",
    "myst-parser",
    "sphinx-inline-tabs",
    "sphinxext-opengraph",
    "nbsphinx",
    "nbconvert",
    "ipython_genutils",
]
viz = [
    "ipython",
    "pybars3",
    "numpy",
    "scipy",
    "Pillow",
]
spark = [
    "pyarrow",
    "pyspark",
]
datasets = [
  "pandas",
]
gcs = [
  "google-cloud-storage",
]
s3 = [
  "boto3"
]
mlflow = [
  "mlflow-skinny"
]
image = [
  "Pillow",
  "numpy"
]
fugue = [
  "fugue",
]
embeddings = [
  "numpy",
  "scikit-learn",
]
proc = [
  "faster-fifo",  # For much better queue performance than mp.Queue and bulk retrieval
  "orjson",  # For faster json parsing when serializing data between processes
  "pandas",  # For merging dataframes during batch processing
]
proc-mp = [
  "orjson",  # For faster json parsing when serializing data between processes
  "pandas",  # For merging dataframes during batch processing
]
all = [
  "scikit-learn",
  "fugue",
  "Pillow",
  "mlflow-skinny",
  "boto3",
  "google-cloud-storage",
  "pandas",
  "pyarrow",
  "pyspark",
  "ipython",
  "pybars3",
  "numpy",
  "scipy",
  "faster-fifo",
  "orjson",
]

[tool.poetry.group.dev.dependencies]
2to3 = "^1.0"
black = { version = "^22.10.0", allow-prereleases = true }
bump2version = "^1.0.1"
flake8 = "^5"
isort = "^5.6"
mypy = ">=0.942,<1"
pre-commit = "^2.8"
pytest = "^7.2.0"
pytest-cov = ">=3"
pytest-mock = "^3.3"
pytest-spark = ">=0.6.0"
mistune = ">=2.0.4"
papermill = "^2.4.0"
autoflake = "^1.4"
mypy-protobuf = ">=3.2.0"
types-protobuf = ">=0.1.14"
pandas = "*"
pandas-stubs = "*"
ipykernel = ">=6.11" # for developing in Jupyter notebook
types-python-dateutil = "^2.8.12"
moto = "^4.1.6"
twine = "^4.0.1"
gcp-storage-emulator = "^2022.6.11"
types-urllib3 = "^1.26.25.5"
pyright = "^1.1.338"
ruff = "^0.4.4"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.isort]
profile = "black"
src_paths = ["whylogs", "tests"]
skip = ["whylogs/core/proto/", ".venv", "docs"]
multi_line_output = 3


[tool.mypy]
disallow_incomplete_defs = true
disallow_untyped_defs = true
explicit_package_bases = true
files = ["whylogs/**/*.py"]
namespace_packages = true
show_error_codes = true
plugins = [
    "numpy.typing.mypy_plugin"
]
exclude = '''
(
  /(
      \.eggs         # exclude a few common directories in the
    | \.git          # root of the project
    | \.hg
    | \.mypy_cache
    | \.tox
    | \.venv
    | _build
    | buck-out
    | build
    | dist
    | examples
    | examples/integration
    | tests
    | whylogs/core/proto
    | whylogs/viz/html
    | docs
    | *.pyi
  )/
  | foo.py           # also separately exclude a file named foo.py in
                     # the root of the project
)
'''

[tool.pytest.ini_options]
addopts = "-q"
minversion = "6.0"
testpaths = ["tests"]
spark_options = '''
  spark.master: local[*]
  spark.sql.catalogImplementation: in-memory
  spark.sql.shuffle.partitions: 4
  spark.default.parallelism: 4
  spark.executor.cores: 4
  spark.sql.execution.arrow.pyspark.enabled: true
  spark.sql.execution.arrow.enabled: false
  spark.sql.adaptive.enabled: false
'''

[tool.black]
line-length = 120
target-version = ['py37']
include = '\.pyi?$'
exclude = '''
(
  /(
      \.eggs         # exclude a few common directories in the
    | \.git          # root of the project
    | \.hg
    | \.mypy_cache
    | \.tox
    | \.venv
    | _build
    | buck-out
    | build
    | dist
    | tests
    | whylogs/core/proto
    | docs
    | whylogs/api/logger/experimental/logger
  )/
  | foo.py           # also separately exclude a file named foo.py in
                     # the root of the project
)
'''

[tool.pyright]
include = ["whylogs/api/logger/experimental/logger/**/*.py"]
typeCheckingMode = "strict"

reportMissingTypeStubs = false
reportMissingParameterType = false
reportMissingTypeArgumet = false

[tool.ruff]
line-length = 140
indent-width = 4
include = ["whylogs/api/logger/experimental/logger/**/*.py"]
select = ["E", "F", "I", "W"]

[tool.ruff.isort]
known-first-party = ["whylogs"]

[tool.ruff.lint]
fixable = ["ALL"]
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"