python/pyproject.toml
[tool.poetry]
name = "whylogs"
version = "1.4.0"
description = "Profile and monitor your ML data pipeline end-to-end"
authors = ["WhyLabs.ai <support@whylabs.ai>"]
license = "Apache-2.0"
homepage = "https://docs.whylabs.ai"
readme = "DESCRIPTION.md"
include = ["whylogs/core/proto/v0/*.py*", "whylogs/core/proto/*.py*"]
[tool.poetry.dependencies]
# core dependencies. Be REALLY mindful when touching this list
python = ">=3.7.1, <4"
whylogs-sketching = ">=3.4.1.dev3"
protobuf = ">=3.19.4"
importlib-metadata = { version = "<4.3", python = "<3.8" }
typing-extensions = {version = ">=3.10", markers = "python_version < \"4\""}
whylabs-client = "^0.6.2"
requests = "^2.27"
types-requests = "^2.30.0.0"
# viz module. Everything after this should be optional
pybars3 = { version = "^0.9", optional = true }
ipython = { version = "*", optional = true }
scipy = [
{ version = ">=1.5", python = "<3.11", optional = true },
{ version = ">=1.9.2", python = ">=3.11", optional = true }
]
numpy = [
{ version = "*", python = "<3.11", optional = true },
{ version = ">=1.23.2", python = ">=3.11", optional = true }
]
# datasets module.
pandas = { version = "*", optional = true}
# Doc dependencies
sphinx = { version = "*", optional = true }
sphinx-autoapi = { version = "*", optional = true }
sphinx-copybutton = { version = "^0.5.0", optional = true }
myst-parser = { extras = ["sphinx"], version = "^0.17.2", optional = true }
furo = { version = "^2022.3.4", optional = true }
sphinx-autobuild = { version = "^2021.3.14", optional = true }
sphinxext-opengraph = { version = "^0.6.3", optional = true }
sphinx-inline-tabs = { version = "*", python = ">=3.8,<4", optional = true }
ipython_genutils = {version = "^0.2.0", optional = true}
nbsphinx = {version = "^0.8.9", optional = true}
nbconvert = {version = "^7.0.0", optional = true}
# Integrations dependencies
boto3 = {version = "^1.22.13", optional = true }
mlflow-skinny = [
{ version = "^2.5.0", python = ">=3.8", optional = true },
{ version = "<2.0.1", python = "<3.8", optional = true }
]
google-cloud-storage = {version = "^2.5.0", optional = true}
# Pyspark related dependencies
pyarrow = {version = ">=8.0.0, <13", optional = true}
pyspark = {version = "^3.0.0", optional = true}
# process logging dependencies
faster-fifo = {version = "^1.4.5", optional = true}
orjson = {version = "^3.8.10", optional = true}
# Image support related dependencies
Pillow = {version = "^9.2.0", optional = true}
# Embeddings support related dependencies
scikit-learn = [
{ version = "^1.0.2", python = "<3.11", optional = true },
{ version = ">=1.1.2, <2", python = ">=3.11", optional = true }
]
# Fugue related dependencies
fugue = {version = "^0.8.1", optional = true}
platformdirs = "^3.5.0"
[tool.poetry.extras]
docs = [
"sphinx",
"sphinx-autoapi",
"sphinx-autobuild",
"furo",
"sphinx-copybutton",
"myst-parser",
"sphinx-inline-tabs",
"sphinxext-opengraph",
"nbsphinx",
"nbconvert",
"ipython_genutils",
]
viz = [
"ipython",
"pybars3",
"numpy",
"scipy",
"Pillow",
]
spark = [
"pyarrow",
"pyspark",
]
datasets = [
"pandas",
]
gcs = [
"google-cloud-storage",
]
s3 = [
"boto3"
]
mlflow = [
"mlflow-skinny"
]
image = [
"Pillow",
"numpy"
]
fugue = [
"fugue",
]
embeddings = [
"numpy",
"scikit-learn",
]
proc = [
"faster-fifo", # For much better queue performance than mp.Queue and bulk retrieval
"orjson", # For faster json parsing when serializing data between processes
"pandas", # For merging dataframes during batch processing
]
proc-mp = [
"orjson", # For faster json parsing when serializing data between processes
"pandas", # For merging dataframes during batch processing
]
all = [
"scikit-learn",
"fugue",
"Pillow",
"mlflow-skinny",
"boto3",
"google-cloud-storage",
"pandas",
"pyarrow",
"pyspark",
"ipython",
"pybars3",
"numpy",
"scipy",
"faster-fifo",
"orjson",
]
[tool.poetry.group.dev.dependencies]
2to3 = "^1.0"
black = { version = "^22.10.0", allow-prereleases = true }
bump2version = "^1.0.1"
flake8 = "^5"
isort = "^5.6"
mypy = ">=0.942,<1"
pre-commit = "^2.8"
pytest = "^7.2.0"
pytest-cov = ">=3"
pytest-mock = "^3.3"
pytest-spark = ">=0.6.0"
mistune = ">=2.0.4"
papermill = "^2.4.0"
autoflake = "^1.4"
mypy-protobuf = ">=3.2.0"
types-protobuf = ">=0.1.14"
pandas = "*"
pandas-stubs = "*"
ipykernel = ">=6.11" # for developing in Jupyter notebook
types-python-dateutil = "^2.8.12"
moto = "^4.1.6"
twine = "^4.0.1"
gcp-storage-emulator = "^2022.6.11"
types-urllib3 = "^1.26.25.5"
pyright = "^1.1.338"
ruff = "^0.4.4"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.isort]
profile = "black"
src_paths = ["whylogs", "tests"]
skip = ["whylogs/core/proto/", ".venv", "docs"]
multi_line_output = 3
[tool.mypy]
disallow_incomplete_defs = true
disallow_untyped_defs = true
explicit_package_bases = true
files = ["whylogs/**/*.py"]
namespace_packages = true
show_error_codes = true
plugins = [
"numpy.typing.mypy_plugin"
]
exclude = '''
(
/(
\.eggs # exclude a few common directories in the
| \.git # root of the project
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
| examples
| examples/integration
| tests
| whylogs/core/proto
| whylogs/viz/html
| docs
| *.pyi
)/
| foo.py # also separately exclude a file named foo.py in
# the root of the project
)
'''
[tool.pytest.ini_options]
addopts = "-q"
minversion = "6.0"
testpaths = ["tests"]
spark_options = '''
spark.master: local[*]
spark.sql.catalogImplementation: in-memory
spark.sql.shuffle.partitions: 4
spark.default.parallelism: 4
spark.executor.cores: 4
spark.sql.execution.arrow.pyspark.enabled: true
spark.sql.execution.arrow.enabled: false
spark.sql.adaptive.enabled: false
'''
[tool.black]
line-length = 120
target-version = ['py37']
include = '\.pyi?$'
exclude = '''
(
/(
\.eggs # exclude a few common directories in the
| \.git # root of the project
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
| tests
| whylogs/core/proto
| docs
| whylogs/api/logger/experimental/logger
)/
| foo.py # also separately exclude a file named foo.py in
# the root of the project
)
'''
[tool.pyright]
include = ["whylogs/api/logger/experimental/logger/**/*.py"]
typeCheckingMode = "strict"
reportMissingTypeStubs = false
reportMissingParameterType = false
reportMissingTypeArgumet = false
[tool.ruff]
line-length = 140
indent-width = 4
include = ["whylogs/api/logger/experimental/logger/**/*.py"]
select = ["E", "F", "I", "W"]
[tool.ruff.isort]
known-first-party = ["whylogs"]
[tool.ruff.lint]
fixable = ["ALL"]
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"