.ipynb_checkpoints/main-checkpoint.ipynb from TianyuDu/AnnEconForecast

.ipynb_checkpoints/main-checkpoint.ipynb
Summary

Maintainability

Test Coverage

Issues
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.24.1) or chardet (3.0.4) doesn't match a supported version!\n",
      "  RequestsDependencyWarning)\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "import sklearn\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "from pprint import pprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import constants\n",
    "from core.tools.metrics import *\n",
    "from core.models.stat_models import *\n",
    "from core.models.baseline_rnn import *\n",
    "from core.tools.visualize import *\n",
    "from core.tools.time_series import *\n",
    "from core.tools.data_import import *\n",
    "import core.models.baseline_lstm as baseline_lstm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'0': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',\n",
      " '1': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',\n",
      " '2': '/home/ubuntu/AnnEconForecast/data/UNRATE.csv',\n",
      " '3': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}\n",
      "Select Dataset >>> \n",
      "Invalid data location received, try again...\n",
      "Select Dataset >>> 0\n"
     ]
    }
   ],
   "source": [
    "# Avaiable datasets, loaded from constants.\n",
    "pprint(constants.DATA_DIR)\n",
    "choice = None\n",
    "while choice is None or choice not in constants.DATA_DIR.keys():\n",
    "    if choice is not None:\n",
    "        print(\"Invalid data location received, try again...\")\n",
    "    choice = input(\"Select Dataset >>> \")\n",
    "FILE_DIR = constants.DATA_DIR[choice]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset chosen: \n",
      "/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv\n"
     ]
    }
   ],
   "source": [
    "print(f\"Dataset chosen: \\n{FILE_DIR}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pre-processing Parameters\n",
    "PERIODS = 1\n",
    "ORDER = 1\n",
    "LAGS = 12\n",
    "TRAIN_RATIO = 0.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Neural Network Parameters\n",
    "NUM_TIME_STEPS = LAGS\n",
    "# Number of series used to predict. (including concurrent)\n",
    "NUM_INPUTS = 1\n",
    "NUM_OUTPUTS = 1\n",
    "NUM_NEURONS = 64\n",
    "# Number of output series\n",
    "LEARNING_RATE = 0.01\n",
    "EPOCHS = 50\n",
    "# Training Settings\n",
    "REPORT_PERIODS = 1\n",
    "TENSORBOARD_DIR = \"./tensorboard/test/1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "EXPERIMENT_NAME = \"test1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "parameters = {\n",
    "    \"epochs\": 50,\n",
    "    \"num_time_steps\": LAGS,\n",
    "    \"num_inputs\": 1,\n",
    "    \"num_outputs\": 1,\n",
    "    \"num_neurons\": 64,\n",
    "    \"learning_rate\": 0.01,\n",
    "    \"report_periods\": 1,\n",
    "    \"tensorboard_dir\": f\"./tensorboard/{EXPERIMENT_NAME}\",\n",
    "    \"model_path\": f\"./saved_models/{EXPERIMENT_NAME}/my_model\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset loaded.    \n",
      "\tIndex type: datetime64[ns]    \n",
      "\tData type: float64\n",
      "First few rows of dataset loaded:\n",
      "            UNRATE_period1_order1\n",
      "DATE                             \n",
      "1948-02-01                    0.4\n",
      "1948-03-01                    0.2\n",
      "1948-04-01                   -0.1\n",
      "1948-05-01                   -0.4\n",
      "1948-06-01                    0.1\n"
     ]
    }
   ],
   "source": [
    "prepared_df = baseline_lstm.prepare_dataset(\n",
    "    file_dir=FILE_DIR,\n",
    "    periods=PERIODS,\n",
    "    order=ORDER\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total 836 observations generated.\n",
      "Note: shape format: (num_obs, time_steps, num_inputs/outputs)\n",
      "X shape = (836, 12, 1), y shape = (836, 1, 1)\n",
      "Training and testing set generated,        \n",
      "X_train shape: (601, 12, 1)        \n",
      "y_train shape: (601, 1)        \n",
      "X_test shape: (168, 12, 1)        \n",
      "y_test shape: (168, 1)        \n",
      "X_validation shape: (67, 12, 1)        \n",
      "y_validation shape: (67, 1)\n"
     ]
    }
   ],
   "source": [
    "# Normalize data\n",
    "(X_train, X_val, X_test,\n",
    " y_train, y_val, y_test) = baseline_lstm.normalize(\n",
    "    df=prepared_df,\n",
    "    train_ratio=TRAIN_RATIO,\n",
    "    lags=LAGS\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_collection = {\n",
    "    \"X_train\": X_train,\n",
    "    \"X_val\": X_val,\n",
    "    \"X_test\": X_test,\n",
    "    \"y_train\": y_train,\n",
    "    \"y_val\": y_val,\n",
    "    \"y_test\": y_test\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Resetting Tensorflow defalut graph...\n",
      "Note: no gradient clipping is applied.            \n",
      "If possible gradient exploding detected (e.g. nan loss), try use clip_grad.\n",
      "\n",
      "Iteration [0], Training MSE 5.5150199; Validation MSE 1.6544994\n",
      "\n",
      "Iteration [10], Training MSE 1.0117933; Validation MSE 0.3836617\n",
      "\n",
      "Iteration [20], Training MSE 0.8936085; Validation MSE 0.3389218\n",
      "\n",
      "Iteration [30], Training MSE 0.8511847; Validation MSE 0.3034329\n",
      "\n",
      "Iteration [40], Training MSE 0.8188694; Validation MSE 0.3090063\n",
      "Saving the trained model...\n",
      "Time taken for [50] epochs:  0:00:04.846999\n",
      "Loss Summary:\n",
      "\tmae=0.553080677986145\n",
      "\tmse=0.48683738708496094\n",
      "\trmse=0.6977373361587524\n",
      "\tmape=4.162381649017334\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'mae': 0.5530807, 'mse': 0.4868374, 'rmse': 0.69773734, 'mape': 4.1623816}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Run the model\n",
    "baseline_lstm.exec_core(\n",
    "    parameters=parameters,\n",
    "    data_collection=data_collection,\n",
    "    clip_grad=None\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}