###########################################################################
## Team: The Diggers (D.A.Tsenov Academy of Economics, Svishtov, Bulgaria)
## Students:
## Ivalina Foteva (Marketing)
## Beatris Ljubenova (Finance)
## Ivan Dragomirov (Marketing)
## Mentors:
## Angelin Lalev, PhD (Dept. of Business Informatics)
## Atanaska Reshetkova, PhD (Dept. of Marketing)
## Kostadin Bashev (Dept. of Marketing)
##
## The code below may be used under Creative Commons CC-BY license.
##
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Sequential
from keras.models import load_model
from keras.layers import LSTM
from keras.layers import Dense
import matplotlib.pyplot as plt
# Subroutines
# Computes Mean Absolute Percentage Error
def compute_mape(test, prediction):
return np.mean(np.abs((test - prediction) / test)) * 100
# Computes Directional Symmetry
def compute_ds(test, prediction):
oldtest = test[:-1]
newtest = test[1:]
oldprediction = prediction[:-1]
newprediction = prediction[1:]
tmp1 = newtest-oldtest
tmp2 = newprediction-oldprediction
tmp = np.multiply(tmp1, tmp2.T)
percent = ((np.where(tmp>0)[0].shape[0]) * 100)/(oldtest.shape[0])
return percent
# Before we start, something about cheating.
# The models won't be tested on datapoints in the future, so if we could get
# our hands on some data that is more recent than the last point in our
# dataset we could cheat by including it in our training set covertly.
# This will result overfitting of our model to extremes, but since we
# test with the same data we train, testing process will miss it
# and we will appear to have achieved excelent results in
# predicting the "future".
# Then, to cover all up, we could blame the un-reproducibility of our
# training results on the the random seed generator. For example we may seed
# with our current system time in miliseconds, which is greatly different
# each time we start our program.
# We refuse to do this and seed our random generators with pre-determined
# numbers, which should guarantee total reproducibility of our results,
# including training the model.
# Note: We train on CPU and we are not sure if the rounding "mechanics"
# works the same way on the CPU as on GPU, so we don't know if training our
# net on GPU would change the end results.
# Keras is supposed to use numpy pseudo-random generator
# and tensorflow has its own.
# We seed both.
np.random.seed(1)
tf.set_random_seed(2)
data = pd.read_csv('/home/datacrunch/Downloads/matrix_one_file/price_data.csv')
# We start with the BitCoin model.
# The preliminary analysis indicates that:
# 1. The price of Bitcoin is heavily correlated with the prices of all major
# cryptocurrencies. We are making the educated guess that bitcoin price
# influences the prices of the other cryptocurrencies instead of the opposite
# way around.
# This means that the lesser known crpytocurrencies will just add noise in our
# model. Since we will not trade them, we just do not use the provided data
# about their prices, volumes, etc.
#
# 2. In short terms, the price of Bitcoin is not influenced significantly
# by the daily volumes of the trade and the total number of emited bitcoins,
# so we won't use this data in the model.
#
# Based on these observations, we conclude that the only semi-reliable
# predictor for the current prices, which is included in the dataset,
# is the data for historical prices of each cryptocurrency and especially
# the data about Bitcoin prices.
#
# We will approach the problem "Bollinger way", choosing to look 20 or so
# steps back and feeding them into LSTM neural network. The number is
# chosen based on conventional wisdom about how financial markets operate.
# We actually tried different numbers of periods to look back. Our results
# indicated that there there is no significant difference in the prformance
# of the neural network if we try to look as far as 40 periods back.
# When trying shorter periods (as short as 3), we were able to see small
# increase in success to predict the direction of the next move of the
# price at the cost of worsening the prediction about the scale of the
# movement itself.
# The question which property of the model - ability to predict direction or
# scale of the movement - is more important, depends on the particular
# trading strategy chosen. The assumptions of the competition - no short
# selling, no futures trading, autonomous AI-supported trading decisions
# imply that the direction is important enogh to warrant shorter periods
# of look-back.
# We split the data at row 13251 to get train and test set
splitpoint = 13251
# Bitcoin only.
bitcoin = data['1442'] # Bitcoin id in the dataset
# And we need the time column for neat data visualisations
# Too bad that in the dataset, the time column sometimes comes with
# milliseconds in different formats, so the column must be cleaned,
# if only to look nice on chart ticks
time = data['time'].str[0:16]
# We look *lookback* periods back in time
lookback = 20
# First we fill the missing data
bitcoin = bitcoin.fillna(method='pad')
# Then we scale and center the data
scalefactor = bitcoin.max()
bitcoin = bitcoin / scalefactor
bitcoin = bitcoin - 0.5
# Make the timeframes. Each timeframe contains price data about
# lookback periods back in time.
# We also switch to numpy arrays which we will need to feed data
# into the neural network.
timeframes = np.array(bitcoin[0:lookback])
for c in range(1, bitcoin.count()-lookback+1):
timeframes = np.vstack((timeframes, np.array(bitcoin[c:c+lookback])))
# We don't need the last timeframe, because we don't have y to test
# and learn against...
timeframes = timeframes[:-1]
# Split the dataset into traindata and testdata
(trainX, testX) = np.split(timeframes, [splitpoint])
(trainY, testY) = np.split(np.array(bitcoin[lookback:]), [splitpoint])
# And shape it as proper input to LSTM layer of Keras
trainX = np. reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np. reshape(testX, (testX.shape[0], testX.shape[1], 1))
# Init the model
model = Sequential()
# LSTM layer
# We experimented with more layers and more neurons in each layer.
# Our experiments seem to indicate that no significant improvement
# can be made this way. Maybe historic data just does not cointain
# enough information, which to be extracted in this fashion ???
model.add(LSTM(32, input_shape=(lookback,1), return_sequences = False))
# More LSTM are added in this fashion:
#model.add(LSTM(32, return_sequences = True))
# Please note that LSTM layers return data with one dimension more than
# the other types of layers. So when we stack such layers, all layers
# except the last one must be called with return_sequences = True
#
# Output layer
model.add(Dense(1))
# Compile the model
# ADAM optimizer seems to work better than SGD with all
# tested scenarios about lookback and depth of our network.
model.compile(loss='mean_squared_error', optimizer='adam')
# If we need (during the development) to load and train our model
# a little bit more to determine the number of epochs needed.
#model = load_model('bitcoin.h5')
# This is the time-consuming step - train the model
model.fit(trainX, trainY, epochs=60, verbose=2)
# We save the model at development time to avoid having to train it
# repeatedly while hammering the code that follows.
model.save('bitcoin.h5')
# When we have saved model and need fo fix the code below we e comment out
# the previous lines from "model = Sequential()" on and load the trained model
# from disk instead.
#model = load_model('bitcoin.h5')
# The neural network makes its predictions about *one* period of time
# in the future for each timeframe we feed in.
predictY = model.predict(testX, verbose = 1)
# "De-scale", "De-center" our data
predictY = (predictY + 0.5) * scalefactor
testY = (testY + 0.5) * scalefactor
# Some data visualisations
# Compute MAPE / DS
mape = compute_mape(testY, predictY)
print("MAPE is %f percent"%mape)
ds = compute_ds(testY, predictY)
print("DS is %s percent"%ds)
# predictY and testY on all validation points
# Too many datapoints (~2000) and the scale will hide the differences so
# things will look much better than they actually are.
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[:-30], label='True price')
plt.plot(predictY[:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Bitcoin prices on the test set\n ~2000
points from 12.03.2018 until 23.03.2018\nTeam: The Diggers /Tsenov Academy of
Economics/\n \nMAPE: %.2f%% DS: %.2f%%'%(mape,ds))
plt.legend()
plt.xlabel('Time')
plt.ylabel('BTC/USD')
plt.tight_layout()
# Save to disk, so we can include this in the team article or something.
plt.savefig('bitcoin_predictvstest_2000.png', dpi = 1200)
plt.show()
# So plot last 100 datapoints of testY and PredictY instead
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[-130:-30], label='True price')
plt.plot(predictY[-130:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Bitcoin prices on the test set\n 100
points from 2018-03-21 12:15:00 until 2018-03-23 11:15:00\nTeam: The Diggers
/Tsenov Academy of Economics/\n \nMAPE: %.2f%% DS: %.2f%%'%(mape,ds))
plt.legend()
plt.xlabel('Time')
plt.ylabel('BTC/USD')
plt.tight_layout()
plt.savefig('bitcoin_predictvstest_100.png', dpi = 1200)
plt.show()
# Last 10 datapoints of TestY and PredictY
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[-40:-30], label='True price')
plt.plot(predictY[-40:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Bitcoin prices on 10 points in the test
set\nTeam: The Diggers /Tsenov Academy of Economics/\n \nMAPE: %.2f%% DS: %.2f%
%'%(mape,ds))
plt.grid()
plt.legend()
plt.xlabel('Time')
plt.ylabel('BTC/USD')
plt.xticks(range(10), time[-40:-30], rotation='vertical')
plt.tight_layout()
plt.savefig('bitcoin_predictvstest_10.png', dpi = 1200)
plt.show()
# We export testY and PredictY as CSV so we can share with
# teammates which try ARIMA models. This way we can cross-check
# MAPE and DS of each model.
p = pd.DataFrame(data=np.vstack((testY, predictY[:,0])).T)
p.columns = ['testY', 'predictY']
p.to_csv('bitcoinmape_ds_bitcoin.csv')
etherium.py
#########################################################
##################
## Team: The Diggers (D.A.Tsenov Academy of Economics, Svishtov, Bulgaria)
## Students:
## Ivalina Foteva (Marketing)
## Beatris Ljubenova (Finance)
## Ivan Dragomirov (Marketing)
## Mentors:
## Atanaska Reshetkova, PhD (Dept. of Marketing)
## Kostadin Bashev (Dept. of Marketing)
## Angelin Lalev, PhD (Dept. of Business Informatics)
##
## The code below may be used under Creative Commons CC-BY license.
##
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Sequential
from keras.models import load_model
from keras.layers import LSTM
from keras.layers import Dense
import matplotlib.pyplot as plt
# Subroutines
# Computes Mean Absolute Percentage Error
def compute_mape(test, prediction):
return np.mean(np.abs((test - prediction) / test)) * 100
# Computes Directional Symmetry
def compute_ds(test, prediction):
oldtest = test[:-1]
newtest = test[1:]
oldprediction = prediction[:-1]
newprediction = prediction[1:]
tmp1 = newtest-oldtest
tmp2 = newprediction-oldprediction
tmp = np.multiply(tmp1, tmp2.T)
percent = ((np.where(tmp>0)[0].shape[0]) * 100)/(oldtest.shape[0])
return percent
# Before we start, something about cheating.
# The models won't be tested on datapoints in the future, so if we could get
# our hands on some data that is more recent than the last point in our
# dataset we could cheat by including it in our training set covertly.
# This will result overfitting of our model to extremes, but since we
# test with the same data we train, testing process will miss it
# and we will appear to have achieved excelent results in
# predicting the "future".
# Then, to cover all up, we could blame the un-reproducibility of our
# training results on the the random seed generator. For example we may seed
# with our current system time in miliseconds, which is greatly different
# each time we start our program.
# We refuse to do this and seed our random generators with pre-determined
# numbers, which should guarantee total reproducibility of our results,
# including training the model.
# Note: We train on CPU and we are not sure if the rounding "mechanics"
# works the same way on the CPU as on GPU, so we don't know if training our
# net on GPU would change the end results.
# Keras is supposed to use numpy pseudo-random generator
# and tensorflow has its own.
# We seed both.
np.random.seed(1)
tf.set_random_seed(2)
data = pd.read_csv('/home/datacrunch/Downloads/matrix_one_file/price_data.csv')
# We start with the BitCoin model.
# The preliminary analysis indicates that:
# 1. The price of Bitcoin is heavily correlated with the prices of all major
# cryptocurrencies. We are making the educated guess that bitcoin price
# influences the prices of the other cryptocurrencies instead of the opposite
# way around.
# This means that the lesser known crpytocurrencies will just add noise in our
# model. Since we will not trade them, we just do not use the provided data
# about their prices, volumes, etc.
#
# 2. In short terms, the price of Bitcoin is not influenced significantly
# by the daily volumes of the trade and the total number of emited bitcoins,
# so we won't use this data in the model.
#
# Based on these observations, we conclude that the only semi-reliable
# predictor for the current prices, which is included in the dataset,
# is the data for historical prices of each cryptocurrency and especially
# the data about Bitcoin prices.
#
# We will approach the problem "Bollinger way", choosing to look 20 or so
# steps back and feeding them into LSTM neural network. The number is
# chosen based on conventional wisdom about how financial markets operate.
# We actually tried different numbers of periods to look back. Our results
# indicated that there there is no significant difference in the prformance
# of the neural network if we try to look as far as 40 periods back.
# When trying shorter periods (as short as 3), we were able to see small
# increase in success to predict the direction of the next move of the
# price at the cost of worsening the prediction about the scale of the
# movement itself.
# The question which property of the model - ability to predict direction or
# scale of the movement - is more important, depends on the particular
# trading strategy chosen. The assumptions of the competition - no short
# selling, no futures trading, autonomous AI-supported trading decisions
# imply that the direction is important enogh to warrant shorter periods
# of look-back.
# We split the data at row 13251 to get train and test set
splitpoint = 13251
bitcoinid = '1442' # Bitcoin id in the dataset
otherid = '1443' # Etherium id in the dataset
bitcoin = data[bitcoinid]
other = data[otherid]
# And we need the time column for neat data visualisations
# Too bad that in the dataset, the time column sometimes comes with
# milliseconds in different formats, so the column must be cleaned,
# if only to look nice on chart ticks
time = data['time'].str[0:16]
# We look *lookback* periods back in time
lookback = 20
# First we fill the missing data
bitcoin = bitcoin.fillna(method='pad')
other = other.fillna(method='pad')
# Then we scale and center the data
scalefactor_bitcoin = bitcoin.max()
bitcoin = bitcoin / scalefactor_bitcoin
bitcoin = bitcoin - 0.5
scalefactor_other = other.max()
other = other / scalefactor_other
other = other - 0.5
# And we combine both vectors into a numpy array
combined = np.array([bitcoin, other])
combined = combined.T
# Make the timeframes. Each timeframe contains price data about
# lookback periods back in time.
# We shape the timeframes so they can be feed into LSTM layer of Keras
timeframes, drop = np.split((combined), [lookback], axis=0)
timeframes = timeframes.reshape(1, lookback, 2)
# We do not generate the last timeframe since we have no Y for it
for c in range(1, bitcoin.count()-lookback):
drop1, newframe, drop2 = np.split((combined), [c,c+lookback], axis=0)
newframe = newframe.reshape(1, lookback, 2)
timeframes = np.concatenate((timeframes, newframe), axis=0)
# Split the dataset into traindata and testdata
(trainX, testX) = np.split(timeframes, [splitpoint])
(trainY, testY) = np.split(np.array(other[lookback:]), [splitpoint])
# And shape as input to LSTM layer of Keras
#trainX = np. reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
#testX = np. reshape(testX, (testX.shape[0], testX.shape[1], 1))
# Init the model
model = Sequential()
# LSTM layer
model.add(LSTM(32, input_shape=(lookback,2), return_sequences = False))
# More LSTM are added in this fashion:
#model.add(LSTM(32, return_sequences = True))
# Please note that LSTM layers return data with one dimension more than
# the other types of layers. So when we stack such layers, all layers
# except the last one must be called with return_sequences = True
#
# Output layer
model.add(Dense(1))
# Compile the model
# ADAM optimizer seems to work better than SGD with all
# tested scenarios about lookback and depth of our network.
model.compile(loss='mean_squared_error', optimizer='adam')
# If we need (during the development) to load and train our model
# a little bit more to determine the number of epochs needed.
#model = load_model('etherium.h5')
# This is the time-consuming step - train the model
model.fit(trainX, trainY, epochs=60, verbose=2)
# We save the model at development time to avoid having to train it compute_mape
# repeatedly while hammering the code that follows.
model.save('etherium.h5')
# We comment out the previous lines and load the trained model
#model = load_model('etherium.h5')
predictY = model.predict(testX, verbose = 1)
# "De"-scale, "De-center"
predictY = (predictY + 0.5) * scalefactor_other
testY = (testY + 0.5) * scalefactor_other
# Some data visualisations
# Compute MAPE / DS
mape = compute_mape(testY, predictY)
print("MAPE is %f percent"%mape)
ds = compute_ds(testY, predictY)
print("DS is %s percent"%ds)
# predictY and testY on all validation points
# Too many datapoints (~2000) and the scale will hide the differences so
# things will look much better than they actually are.
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[:-30], label='True price')
plt.plot(predictY[:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Etherium prices on the test set\n ~2000
points from 12.03.2018 until 23.03.2018\nTeam: The Diggers /Tsenov Academy of
Economics/\n \nMAPE: %.2f%% DS: %.2f%%'%(mape,ds))
plt.legend()
plt.xlabel('Time')
plt.ylabel('ETH/USD')
plt.tight_layout()
# Save to disk, so we can include this in the team article or something.
plt.savefig('etherium_predictvstest_2000.png', dpi = 1200)
plt.show()
# So plot last 100 datapoints of testY and PredictY instead
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[-130:-30], label='True price')
plt.plot(predictY[-130:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicteplt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[:-30], label='True price')
plt.plot(predictY[:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Etherium prices on the test set\n ~2000
points from 12.03.2018 until 23.03.2018\nTeam: The Diggers /Tsenov Academy of
Economics/\n \nMAPE: %.2f%% DS: %.2f%%'%(mape,ds))
plt.legend()
plt.xlabel('Time')
plt.ylabel('ETH/USD')
plt.tight_layout()
# Save to disk, so we can include this in the team article or something.
plt.savefig('etherium_predictvstest_2000.png', dpi = 1200)
plt.show()
# So plot last 100 datapoints of testY and PredictY instead
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[-130:-30], label='True price')
plt.plot(predictY[-130:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Etherium prices on the test set\n 100
points from 2018-03-21 12:15:00 until 2018-03-23 11:15:00\nTeam: The Diggers
/Tsenov Academy of Economics/\n \nMAPE: %.2f%% DS: %.2f%%'%(mape,ds))
plt.legend()
plt.xlabel('Time')
plt.ylabel('ETH/USD')
plt.tight_layout()
plt.savefig('etherium_predictvstest_100.png', dpi = 1200)
plt.show()
# Last 10 datapoints of TestY and PredictY
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[-40:-30], label='True price')
plt.plot(predictY[-40:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Etherium prices on 10 points in the test
set\nTeam: The Diggers /Tsenov Academy of Economics/\n \nMAPE: %.2f%% DS: %.2f%
%'%(mape,ds))
plt.grid()
plt.legend()
plt.xlabel('Time')
plt.ylabel('ETH/USD')
plt.xticks(range(10), time[-40:-30], rotation='vertical')
plt.tight_layout()
plt.savefig('etherium_predictvstest_10.png', dpi = 1200)
plt.show()d and real values of Etherium prices on the test set\n 100 points from
2018-03-21 12:15:00 until 2018-03-23 11:15:00\nTeam: The Diggers /Tsenov Academy
of Economics/\n \nMAPE: %.2f%% DS: %.2f%%'%(mape,ds))
plt.legend()
plt.xlabel('Time')
plt.ylabel('ETH/USD')
plt.tight_layout()
plt.savefig('etherium_predictvstest_100.png', dpi = 1200)
plt.show()
# Last 10 datapoints of TestY and PredictY
plt.rc('xtick', labelsize=8)
plt.rc('axes', labelsize=14)
plt.rc('figure', titlesize=14)
plt.plot(testY[-40:-30], label='True price')
plt.plot(predictY[-40:-30], label='Predicted price\n/1 period in the future/')
plt.title('Predicted and real values of Etherium prices on 10 points in the test
set\nTeam: The Diggers /Tsenov Academy of Economics/\n \nMAPE: %.2f%% DS: %.2f%
%'%(mape,ds))
plt.grid()
plt.legend()
plt.xlabel('Time')
plt.ylabel('ETH/USD')
plt.xticks(range(10), time[-40:-30], rotation='vertical')
plt.tight_layout()
plt.savefig('etherium_predictvstest_10.png', dpi = 1200)
plt.show()
p = pd.DataFrame(data=np.vstack((testY, predictY[:,0])).T)
p.columns = ['testY', 'predictY']
p.to_csv('mape_ds_etherium.csv')



R

The Diggers

Sat, Apr 28 2018

###################################################
#       Experiment with cryptocurrency data       #
#                                                 #
#              The Diggers Team                   #
#    at D. A. Tsenov Academy of Economics         #
#                                                 #
###################################################

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(forecast)
library(tseries)
library(foreign)
library(anytime)

data <- read.spss("http://data.eacademybg.com/price_data_20only1.sav", to.data.frame=TRUE)
## Warning in read.spss("http://data.eacademybg.com/price_data_20only1.sav", :
## C:\Users\krst\AppData\Local\Temp\RtmpQNGv5o\file4d8416b93d49: Unrecognized
## record type 7, subtype 25 encountered in system file
data$time <- as.Date(anytime(data$time))


# 0 Read all price data for all 20 Cryptos
dim(data)
## [1] 15266    21
data[,2:21] <- sapply(data[,2:21], as.numeric)

# 1. Bitcoin BTC forecasting
dataBTC.ts <- ts(data$Bitcoin)
head(dataBTC.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 10756.0 10788.1 10807.5 10776.1 10729.7 10653.3
# 1.1. Training set BTC
xt <- window(dataBTC.ts, end=c(13251))
plot(xt, type="l", main="Bitcoin prices b/w 1/17/2018 and 3/12/2018", ylab="BTC Price",xlab="Time", bty="l")

# 1.2. Evaluation set
xf <- window(dataBTC.ts, start=c(13252))
plot(xf, type="l", main="Bitcoin prices b/w 3/12/2018 and 3/23/2018", ylab="BTC Price",xlab="Time", bty="l")

# 1.3. Choosing smoothing models with etc
sbestBTC <- ets(xt)
## Warning in ets(xt): Missing values encountered. Using longest contiguous
## portion of time series
sbestBTC
## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = xt) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.4883 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 9663.7677 
##     b = 20.514 
## 
##   sigma:  28.1083
## 
##      AIC     AICc      BIC 
## 11267.63 11267.73 11296.03
plot(ets(xt))
## Warning in ets(xt): Missing values encountered. Using longest contiguous
## portion of time series

accuracy(forecast(sbestBTC,h=2015), xf)
##                         ME       RMSE        MAE           MPE       MAPE
## Training set -3.361973e-02   28.02448   19.90556 -1.531224e-04  0.2157092
## Test set     -1.163308e+03 1288.01006 1172.06023 -1.416167e+01 14.2504197
##                    MASE       ACF1 Theil's U
## Training set  0.9704516 0.07120127        NA
## Test set     57.1412115 0.99903345  51.71741
plot(forecast(sbestBTC))

BTC1 <- forecast(sbestBTC, h=288)
# 1.4 Choosing ARIMA models...
abestBTC <- auto.arima(xt)
abestBTC
## Series: xt 
## ARIMA(2,1,0) 
## 
## Coefficients:
##          ar1     ar2
##       0.3416  0.0501
## s.e.  0.0087  0.0087
## 
## sigma^2 estimated as 762.4:  log likelihood=-62459
## AIC=124924   AICc=124924   BIC=124946.5
summary(abestBTC)
## Series: xt 
## ARIMA(2,1,0) 
## 
## Coefficients:
##          ar1     ar2
##       0.3416  0.0501
## s.e.  0.0087  0.0087
## 
## sigma^2 estimated as 762.4:  log likelihood=-62459
## AIC=124924   AICc=124924   BIC=124946.5
## 
## Training set error measures:
##                       ME     RMSE      MAE           MPE      MAPE
## Training set -0.05307925 27.69587 17.69514 -0.0006550227 0.1775618
##                   MASE        ACF1
## Training set 0.9197179 0.005301944
accuracy(forecast(abestBTC,h=2015), xf)
##                         ME       RMSE        MAE           MPE       MAPE
## Training set -5.307925e-02   27.69587   17.69514 -6.550227e-04  0.1775618
## Test set     -1.152612e+03 1274.51224 1161.42930 -1.401642e+01 14.1058351
##                    MASE        ACF1 Theil's U
## Training set  0.9197179 0.005301944        NA
## Test set     60.3661348 0.999579131   50.2545
plot(forecast(abestBTC, h=288))

BTC2 <- forecast(abestBTC, h=288)
# 1.5 Write results as csv
write.csv(BTC2, file = "BTC_predict.csv", row.names = FALSE)
# 2. Ethereum ETH forecasting
dataETH.ts <- ts(data$Ethereum)
head(dataETH.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 960.93 961.11 961.68 954.97 953.37 946.03
# 2.1. Training set
xt <- window(dataETH.ts, end=c(13251))
plot(xt, type="l", main="Ethereum prices b/w 1/17/2018 and 3/12/2018", ylab="ETH Price",xlab="Time", bty="l")

# 2.2. Evaluation set
xf <- window(dataETH.ts, start=c(13252))
plot(xf, type="l", main="Ethereum prices b/w 3/12/2018 and 3/23/2018", ylab="ETH Price",xlab="Time", bty="l")

# 2.3. Choosing smoothing models with etc
sbestETH <- ets(xt)
## Warning in ets(xt): Missing values encountered. Using longest contiguous
## portion of time series
sbestETH
## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = xt) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.3994 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 737.8782 
##     b = 1.0635 
## 
##   sigma:  2.0781
## 
##      AIC     AICc      BIC 
## 6891.851 6891.952 6920.251
plot(ets(xt))
## Warning in ets(xt): Missing values encountered. Using longest contiguous
## portion of time series

accuracy(forecast(sbestETH,h=2015), xf)
##                         ME       RMSE        MAE           MPE       MAPE
## Training set -5.005183e-03   2.071863   1.492355 -5.745436e-04  0.2108739
## Test set     -1.267788e+02 143.089516 127.034676 -2.243523e+01 22.4698857
##                   MASE       ACF1 Theil's U
## Training set  0.963232 0.08242392        NA
## Test set     81.993794 1.00000000  66.79193
plot(forecast(sbestETH))

ETH1 <- forecast(sbestETH, h=288)
# 2.4 Choosing ARIMA models...
abestETH <- auto.arima(xt)
abestETH
## Series: xt 
## ARIMA(2,1,0) 
## 
## Coefficients:
##          ar1      ar2
##       0.3739  -0.0095
## s.e.  0.0087   0.0087
## 
## sigma^2 estimated as 8.065:  log likelihood=-32512.52
## AIC=65031.05   AICc=65031.05   BIC=65053.52
summary(abestETH)
## Series: xt 
## ARIMA(2,1,0) 
## 
## Coefficients:
##          ar1      ar2
##       0.3739  -0.0095
## s.e.  0.0087   0.0087
## 
## sigma^2 estimated as 8.065:  log likelihood=-32512.52
## AIC=65031.05   AICc=65031.05   BIC=65053.52
## 
## Training set error measures:
##                       ME     RMSE      MAE          MPE      MAPE
## Training set -0.01082069 2.848426 1.706706 -0.001518769 0.1884465
##                   MASE        ACF1
## Training set 0.9146436 0.002693457
accuracy(forecast(abestETH,h=2015), xf)
##                         ME       RMSE        MAE           MPE       MAPE
## Training set   -0.01082069   2.848426   1.706706  -0.001518769  0.1884465
## Test set     -126.24019140 142.802434 126.627106 -22.463360612 22.5158008
##                    MASE        ACF1 Theil's U
## Training set  0.9146436 0.002693457        NA
## Test set     67.8609471 0.999952791  66.08761
plot(forecast(abestETH))

ETH2 <- forecast(abestETH, h=288)
# 2.5 Write results as csv
write.csv(ETH2, file = "ETH_predict.csv", row.names = FALSE)

# 3. Ripple XRP forecasting
dataXRP.ts <- ts(data$Ripple)
head(dataXRP.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 1.13 1.13 1.14 1.12 1.11 1.09
# 3.1. Training set XRP
xt <- window(dataETH.ts, end=c(13251))
plot(xt, type="l", main="Ripple prices b/w 1/17/2018 and 3/12/2018", ylab="XRP Price",xlab="Time", bty="l")