WenjieDu/TSDB

View on GitHub
tsdb/database.py

Summary

Maintainability
A
0 mins
Test Coverage
"""
List available datasets and their official download links.
"""

# Created by Wenjie Du <wenjay.du@gmail.com>
# License: BSD-3-Clause


_DATABASE = {
    # http://www.physionet.org/challenge/2012
    # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
    "physionet_2012": [
        "https://www.physionet.org/files/challenge-2012/1.0.0/set-a.tar.gz",
        "https://www.physionet.org/files/challenge-2012/1.0.0/set-b.tar.gz",
        "https://www.physionet.org/files/challenge-2012/1.0.0/set-c.tar.gz",
        "https://www.physionet.org/files/challenge-2012/1.0.0/Outcomes-a.txt",
        "https://www.physionet.org/files/challenge-2012/1.0.0/Outcomes-b.txt",
        "https://www.physionet.org/files/challenge-2012/1.0.0/Outcomes-c.txt",
    ],
    # http://www.physionet.org/challenge/2019
    # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2019
    "physionet_2019": [
        "https://archive.physionet.org/users/shared/challenge-2019/training_setA.zip",
        "https://archive.physionet.org/users/shared/challenge-2019/training_setB.zip",
    ],
    #
    # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/electricity_load_diagrams
    "electricity_load_diagrams": "https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip",
    #
    # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/beijing_multisite_air_quality
    "beijing_multisite_air_quality": "https://archive.ics.uci.edu/ml/machine-learning-databases/00501/"
    "PRSA2017_Data_20130301-20170228.zip",
    # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/italy_air_quality
    "italy_air_quality": "https://archive.ics.uci.edu/static/public/360/air+quality.zip",
    #
    # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/vessel_ais
    "vessel_ais": "https://zenodo.org/record/8064564/files/parquets.zip",
    #
    # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/electricity_transformer_temperature
    "electricity_transformer_temperature": [
        "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTm1.csv",
        "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTm2.csv",
        "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh1.csv",
        "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv",
    ],
    # https://pems.dot.ca.gov, https://github.com/laiguokun/multivariate-time-series-data
    "pems_traffic": "https://raw.githubusercontent.com/laiguokun/multivariate-time-series-data/master/"
    "traffic/traffic.txt.gz",
    # https://www.nrel.gov/grid/solar-power-data.html, https://github.com/laiguokun/multivariate-time-series-data
    "solar_alabama": "https://raw.githubusercontent.com/laiguokun/multivariate-time-series-data/master/"
    "solar-energy/solar_AL.txt.gz",
}


# https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/ucr_uea_datasets
# 128 UCR + 33 UEA + 2 old removed (NonInvasiveFatalECGThorax1 and 2) = 163
_ucr_uea_datasets = [
    "ACSF1",
    "Adiac",
    "AllGestureWiimoteX",
    "AllGestureWiimoteY",
    "AllGestureWiimoteZ",
    "ArrowHead",
    "Beef",
    "BeetleFly",
    "BirdChicken",
    "BME",
    "Car",
    "CBF",
    "Chinatown",
    "ChlorineConcentration",
    "CinCECGTorso",
    "Coffee",
    "Computers",
    "CricketX",
    "CricketY",
    "CricketZ",
    "Crop",
    "DiatomSizeReduction",
    "DistalPhalanxOutlineCorrect",
    "DistalPhalanxOutlineAgeGroup",
    "DistalPhalanxTW",
    "DodgerLoopDay",
    "DodgerLoopGame",
    "DodgerLoopWeekend",
    "Earthquakes",
    "ECG200",
    "ECG5000",
    "ECGFiveDays",
    "ElectricDevices",
    "EOGHorizontalSignal",
    "EOGVerticalSignal",
    "EthanolLevel",
    "FaceAll",
    "FaceFour",
    "FacesUCR",
    "FiftyWords",
    "Fish",
    "FordA",
    "FordB",
    "FreezerRegularTrain",
    "FreezerSmallTrain",
    "Fungi",
    "GestureMidAirD1",
    "GestureMidAirD2",
    "GestureMidAirD3",
    "GesturePebbleZ1",
    "GesturePebbleZ2",
    "GunPoint",
    "GunPointAgeSpan",
    "GunPointMaleVersusFemale",
    "GunPointOldVersusYoung",
    "Ham",
    "HandOutlines",
    "Haptics",
    "Herring",
    "HouseTwenty",
    "InlineSkate",
    "InsectEPGRegularTrain",
    "InsectEPGSmallTrain",
    "InsectWingbeatSound",
    "ItalyPowerDemand",
    "LargeKitchenAppliances",
    "Lightning2",
    "Lightning7",
    "Mallat",
    "Meat",
    "MedicalImages",
    "MelbournePedestrian",
    "MiddlePhalanxOutlineCorrect",
    "MiddlePhalanxOutlineAgeGroup",
    "MiddlePhalanxTW",
    "MixedShapesRegularTrain",
    "MixedShapesSmallTrain",
    "MoteStrain",
    "NonInvasiveFetalECGThorax1",
    "NonInvasiveFetalECGThorax2",
    "OliveOil",
    "OSULeaf",
    "PhalangesOutlinesCorrect",
    "Phoneme",
    "PickupGestureWiimoteZ",
    "PigAirwayPressure",
    "PigArtPressure",
    "PigCVP",
    "PLAID",
    "Plane",
    "PowerCons",
    "ProximalPhalanxOutlineCorrect",
    "ProximalPhalanxOutlineAgeGroup",
    "ProximalPhalanxTW",
    "RefrigerationDevices",
    "Rock",
    "ScreenType",
    "SemgHandGenderCh2",
    "SemgHandMovementCh2",
    "SemgHandSubjectCh2",
    "ShakeGestureWiimoteZ",
    "ShapeletSim",
    "ShapesAll",
    "SmallKitchenAppliances",
    "SmoothSubspace",
    "SonyAIBORobotSurface1",
    "SonyAIBORobotSurface2",
    "StarLightCurves",
    "Strawberry",
    "SwedishLeaf",
    "Symbols",
    "SyntheticControl",
    "ToeSegmentation1",
    "ToeSegmentation2",
    "Trace",
    "TwoLeadECG",
    "TwoPatterns",
    "UMD",
    "UWaveGestureLibraryAll",
    "UWaveGestureLibraryX",
    "UWaveGestureLibraryY",
    "UWaveGestureLibraryZ",
    "Wafer",
    "Wine",
    "WordSynonyms",
    "Worms",
    "WormsTwoClass",
    "Yoga",
    "ArticularyWordRecognition",
    "AsphaltObstaclesCoordinates",
    "AsphaltPavementTypeCoordinates",
    "AsphaltRegularityCoordinates",
    "AtrialFibrillation",
    "BasicMotions",
    "CharacterTrajectories",
    "Cricket",
    "DuckDuckGeese",
    "EigenWorms",
    "Epilepsy",
    "EthanolConcentration",
    "ERing",
    "FaceDetection",
    "FingerMovements",
    "HandMovementDirection",
    "Handwriting",
    "Heartbeat",
    "InsectWingbeat",
    "JapaneseVowels",
    "Libras",
    "LSST",
    "MotorImagery",
    "NATOPS",
    "PenDigits",
    "PEMS-SF",
    "PhonemeSpectra",
    "RacketSports",
    "SelfRegulationSCP1",
    "SelfRegulationSCP2",
    "SpokenArabicDigits",
    "StandWalkJump",
    "UWaveGestureLibrary",
    "NonInvasiveFatalECGThorax1",
    "NonInvasiveFatalECGThorax2",
]

UCR_UEA_DATASETS = {}
for i in _ucr_uea_datasets:
    UCR_UEA_DATASETS[
        "ucr_uea_" + i
    ] = f"https://www.timeseriesclassification.com/aeon-toolkit/{i}.zip"

DATABASE = {**_DATABASE, **UCR_UEA_DATASETS}
AVAILABLE_DATASETS = list(DATABASE.keys())