whylabs/whylogs-python

View on GitHub
python/whylogs/datasets/configs.py

Summary

Maintainability
A
0 mins
Test Coverage
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Dict, List, Optional


@dataclass
class BaseConfig:
    data_folder: str = "whylogs_data"
    description_folder: str = "whylogs.datasets.descr"


@dataclass
class DatasetConfig:
    folder_name: str
    url: str
    baseline_start_timestamp: Dict[str, datetime]
    inference_start_timestamp: Optional[Dict[str, datetime]]
    production_start_timestamp: Optional[Dict[str, datetime]]
    max_interval: int
    base_unit: str
    available_versions: tuple
    target_columns: dict
    prediction_columns: dict
    miscellaneous_columns: dict
    ignore_columns: dict


@dataclass
class EcommerceConfig(DatasetConfig):
    folder_name: str = "ecommerce"
    description_file: str = "ecommerce.rst"
    available_versions: tuple = ("base",)
    # url: str = "./whylogs/datasets"
    url: str = "https://whylabs-public.s3.us-west-2.amazonaws.com/whylogs_examples/Ecommerce"
    baseline_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "base": datetime(year=2022, month=8, day=9, tzinfo=timezone.utc),
        }
    )
    inference_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "base": datetime(year=2022, month=8, day=19, tzinfo=timezone.utc),
        }
    )
    production_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "base": datetime(year=2022, month=8, day=19, tzinfo=timezone.utc),
        }
    )
    max_interval: int = 21
    base_unit: str = "D"
    target_columns: Dict[str, List[str]] = field(default_factory=lambda: {"base": ["output_discount"]})
    prediction_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "base": ["output_prediction", "output_score"],
        }
    )
    miscellaneous_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "base": [
                "category.Baby Care",
                "category.Bakery, Cakes and Dairy",
                "category.Beauty and Hygiene",
                "category.Beverages",
                "category.Cleaning and Household",
                "category.Eggs, Meat and Fish",
                "category.Foodgrains, Oil and Masala",
                "category.Fruits and Vegetables",
                "category.Gourmet and World Food",
                "category.Kitchen, Garden and Pets",
                "category.Snacks and Branded Foods",
            ],
        }
    )
    ignore_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "base": ["date"],
        }
    )


@dataclass
class EmployeeConfig(DatasetConfig):
    folder_name: str = "employee"
    description_file: str = "employee.rst"
    available_versions: tuple = ("base",)
    url: str = "https://whylabs-public.s3.us-west-2.amazonaws.com/whylogs_examples/Employee"
    baseline_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "base": datetime(year=2023, month=1, day=16, tzinfo=timezone.utc),
        }
    )
    inference_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "base": datetime(year=2023, month=1, day=17, tzinfo=timezone.utc),
        }
    )
    production_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "base": datetime(year=2023, month=1, day=17, tzinfo=timezone.utc),
        }
    )
    max_interval: int = 31
    base_unit: str = "D"
    target_columns: Dict[str, List[str]] = field(default_factory=lambda: {"base": []})
    prediction_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "base": [],
        }
    )
    miscellaneous_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "base": ["assignment_category"],
        }
    )
    ignore_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "base": ["date"],
        }
    )


@dataclass
class WeatherConfig(DatasetConfig):
    folder_name: str = "weather_forecast"
    description_file: str = "weather.rst"
    url: str = "https://whylabs-public.s3.us-west-2.amazonaws.com/whylogs_examples/WeatherForecast"
    baseline_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "in_domain": datetime(year=2018, month=9, day=1, tzinfo=timezone.utc),
            "out_domain": datetime(year=2018, month=9, day=1, tzinfo=timezone.utc),
        }
    )
    inference_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "in_domain": datetime(year=2019, month=2, day=1, tzinfo=timezone.utc),
            "out_domain": datetime(year=2019, month=5, day=14, tzinfo=timezone.utc),
        }
    )
    production_start_timestamp: Dict[str, datetime] = field(
        default_factory=lambda: {
            "in_domain": datetime(year=2019, month=2, day=1, tzinfo=timezone.utc),
            "out_domain": datetime(year=2019, month=5, day=14, tzinfo=timezone.utc),
        }
    )
    production_start_timestamp = None
    max_interval: int = 30
    base_unit: str = "D"
    available_versions: tuple = ("in_domain", "out_domain")
    target_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {"in_domain": ["temperature"], "out_domain": ["temperature"]}
    )
    prediction_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "in_domain": ["prediction_temperature", "uncertainty"],
            "out_domain": ["prediction_temperature", "uncertainty"],
        }
    )
    miscellaneous_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "in_domain": ["meta_latitude", "meta_longitude", "meta_climate"],
            "out_domain": ["meta_latitude", "meta_longitude", "meta_climate"],
        }
    )
    ignore_columns: Dict[str, List[str]] = field(
        default_factory=lambda: {
            "in_domain": ["date"],
            "out_domain": ["date"],
        }
    )