.ipynb_checkpoints/main-checkpoint.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.24.1) or chardet (3.0.4) doesn't match a supported version!\n",
" RequestsDependencyWarning)\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"import sklearn\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"from pprint import pprint"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import constants\n",
"from core.tools.metrics import *\n",
"from core.models.stat_models import *\n",
"from core.models.baseline_rnn import *\n",
"from core.tools.visualize import *\n",
"from core.tools.time_series import *\n",
"from core.tools.data_import import *\n",
"import core.models.baseline_lstm as baseline_lstm"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'0': '/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv',\n",
" '1': '/home/ec2-user/AnnEconForecast/data/UNRATE.csv',\n",
" '2': '/home/ubuntu/AnnEconForecast/data/UNRATE.csv',\n",
" '3': '/home/ec2-user/AnnEconForecast/data/DEXCAUS.csv'}\n",
"Select Dataset >>> \n",
"Invalid data location received, try again...\n",
"Select Dataset >>> 0\n"
]
}
],
"source": [
"# Avaiable datasets, loaded from constants.\n",
"pprint(constants.DATA_DIR)\n",
"choice = None\n",
"while choice is None or choice not in constants.DATA_DIR.keys():\n",
" if choice is not None:\n",
" print(\"Invalid data location received, try again...\")\n",
" choice = input(\"Select Dataset >>> \")\n",
"FILE_DIR = constants.DATA_DIR[choice]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset chosen: \n",
"/Users/tianyudu/Documents/Academics/EconForecasting/AnnEconForecast/data/UNRATE.csv\n"
]
}
],
"source": [
"print(f\"Dataset chosen: \\n{FILE_DIR}\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Pre-processing Parameters\n",
"PERIODS = 1\n",
"ORDER = 1\n",
"LAGS = 12\n",
"TRAIN_RATIO = 0.8"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Neural Network Parameters\n",
"NUM_TIME_STEPS = LAGS\n",
"# Number of series used to predict. (including concurrent)\n",
"NUM_INPUTS = 1\n",
"NUM_OUTPUTS = 1\n",
"NUM_NEURONS = 64\n",
"# Number of output series\n",
"LEARNING_RATE = 0.01\n",
"EPOCHS = 50\n",
"# Training Settings\n",
"REPORT_PERIODS = 1\n",
"TENSORBOARD_DIR = \"./tensorboard/test/1\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"EXPERIMENT_NAME = \"test1\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"parameters = {\n",
" \"epochs\": 50,\n",
" \"num_time_steps\": LAGS,\n",
" \"num_inputs\": 1,\n",
" \"num_outputs\": 1,\n",
" \"num_neurons\": 64,\n",
" \"learning_rate\": 0.01,\n",
" \"report_periods\": 1,\n",
" \"tensorboard_dir\": f\"./tensorboard/{EXPERIMENT_NAME}\",\n",
" \"model_path\": f\"./saved_models/{EXPERIMENT_NAME}/my_model\"\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset loaded. \n",
"\tIndex type: datetime64[ns] \n",
"\tData type: float64\n",
"First few rows of dataset loaded:\n",
" UNRATE_period1_order1\n",
"DATE \n",
"1948-02-01 0.4\n",
"1948-03-01 0.2\n",
"1948-04-01 -0.1\n",
"1948-05-01 -0.4\n",
"1948-06-01 0.1\n"
]
}
],
"source": [
"prepared_df = baseline_lstm.prepare_dataset(\n",
" file_dir=FILE_DIR,\n",
" periods=PERIODS,\n",
" order=ORDER\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total 836 observations generated.\n",
"Note: shape format: (num_obs, time_steps, num_inputs/outputs)\n",
"X shape = (836, 12, 1), y shape = (836, 1, 1)\n",
"Training and testing set generated, \n",
"X_train shape: (601, 12, 1) \n",
"y_train shape: (601, 1) \n",
"X_test shape: (168, 12, 1) \n",
"y_test shape: (168, 1) \n",
"X_validation shape: (67, 12, 1) \n",
"y_validation shape: (67, 1)\n"
]
}
],
"source": [
"# Normalize data\n",
"(X_train, X_val, X_test,\n",
" y_train, y_val, y_test) = baseline_lstm.normalize(\n",
" df=prepared_df,\n",
" train_ratio=TRAIN_RATIO,\n",
" lags=LAGS\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"data_collection = {\n",
" \"X_train\": X_train,\n",
" \"X_val\": X_val,\n",
" \"X_test\": X_test,\n",
" \"y_train\": y_train,\n",
" \"y_val\": y_val,\n",
" \"y_test\": y_test\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Resetting Tensorflow defalut graph...\n",
"Note: no gradient clipping is applied. \n",
"If possible gradient exploding detected (e.g. nan loss), try use clip_grad.\n",
"\n",
"Iteration [0], Training MSE 5.5150199; Validation MSE 1.6544994\n",
"\n",
"Iteration [10], Training MSE 1.0117933; Validation MSE 0.3836617\n",
"\n",
"Iteration [20], Training MSE 0.8936085; Validation MSE 0.3389218\n",
"\n",
"Iteration [30], Training MSE 0.8511847; Validation MSE 0.3034329\n",
"\n",
"Iteration [40], Training MSE 0.8188694; Validation MSE 0.3090063\n",
"Saving the trained model...\n",
"Time taken for [50] epochs: 0:00:04.846999\n",
"Loss Summary:\n",
"\tmae=0.553080677986145\n",
"\tmse=0.48683738708496094\n",
"\trmse=0.6977373361587524\n",
"\tmape=4.162381649017334\n"
]
},
{
"data": {
"text/plain": [
"{'mae': 0.5530807, 'mse': 0.4868374, 'rmse': 0.69773734, 'mape': 4.1623816}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Run the model\n",
"baseline_lstm.exec_core(\n",
" parameters=parameters,\n",
" data_collection=data_collection,\n",
" clip_grad=None\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}