-
-
Save chrisoei/7e12474e069f6641267d234364d979a8 to your computer and use it in GitHub Desktop.
Machine learning on economic time series example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training data available (for free) at the Federal Reserve website: | |
# https://fred.stlouisfed.org/series/CPIAUCNS | |
# | |
# According to the Universal Approximation Theorem | |
# (https://en.wikipedia.org/wiki/Universal_approximation_theorem), | |
# a neural network should be able to approximate the CPI | |
# as a function of time with arbitrary accuracy. | |
import CSV | |
import Flux | |
import Plots | |
import Stardates | |
import TimeZones | |
using CUDA | |
@info "Loading data" | |
# Consumer price index as a function of time | |
cpiaucns = CSV.File("CPIAUCNS.csv") | |
@info "Transforming data" | |
y0 = 1970 | |
yspan = 100 | |
# Convert dates into a real number that can be fed as input to a neural net. | |
# Also, use the log of the CPI value as the desired output. | |
data1 = map(x->( | |
[(Stardates.Stardate(x.DATE, 16, 0, 0, TimeZones.tz"America/New_York").sd | |
- y0)/yspan], | |
[log10(x.CPIAUCNS)]), cpiaucns) .|> Flux.gpu | |
n_hidden = 3 | |
@info "Hidden layer: $n_hidden" | |
@info "Creating model" | |
model1 = Flux.Chain( | |
Flux.Dense(1, n_hidden, Flux.σ), | |
Flux.Dense(n_hidden, 1, Flux.identity)) |> Flux.gpu | |
@info "Extracting parameters" | |
p1 = Flux.params(model1) | |
@info "Defining loss function" | |
loss1(x, y) = Flux.mse(model1(x), y) | |
@info "Defining optimization" | |
opt1 = Flux.Descent() | |
# Flux.train!(loss1, p1, data1, opt1); p1 | |
for i1 in 1:500 | |
@info "Training iteration $i1" | |
Flux.train!(loss1, p1, data1, opt1) | |
end | |
# Undo the transformations we did at the start to get | |
# a function that maps years into the actual CPI value. | |
function cpiest(x) | |
exp10(model1([(x - y0)/yspan])[1]) | |
end | |
plot1 = Plots.plot(cpiest, 1913, 2021) | |
data2x = map(x->Stardates.Stardate(x.DATE, 16, 0, 0, | |
TimeZones.tz"America/New_York").sd, cpiaucns) | |
data2y = map(x->x.CPIAUCNS, cpiaucns) | |
Plots.plot!(plot1, data2x, data2y) | |
Plots.savefig("output.png") | |
# The result isn't too shabby: | |
# https://www.christopheroei.com/b/930ae419216d18305f4243c501e1919642d873702038aa3cd65c38ff6be94629.png | |
# Problem is, increasing the number of neurons and hidden layers | |
n_hidden1 = 30 | |
n_hidden2 = 30 | |
n_hidden3 = 30 | |
@info "Creating model" | |
model2 = Flux.Chain( | |
Flux.Dense(1, n_hidden1, Flux.σ), | |
Flux.Dense(n_hidden1, n_hidden2, Flux.σ), | |
Flux.Dense(n_hidden2, n_hidden3, Flux.σ), | |
Flux.Dense(n_hidden3, 1, Flux.identity)) |> Flux.gpu | |
# doesn't seem to improve the result: | |
# https://www.christopheroei.com/b/0b1d033d8eac480735d78dfe8f1ef4aa1e1991e655968ddce735c4ee5e1580a8.png | |
# The resulting estimate looks way too smooth -- no overfitting despite increasing the number of | |
# model parameters. My guess is that it got trapped in a local optima. | |
# I tried increasing the number of neurons even higher in the hopes of escaping the local | |
# optima with increased dimensionality, or at least getting into a situation where I was | |
# seeing overfitting: | |
n_hidden1 = 300 | |
n_hidden2 = 300 | |
n_hidden3 = 300 | |
@info "Creating model" | |
model = Flux.Chain( | |
Flux.Dense(1, n_hidden1, Flux.σ), | |
Flux.Dense(n_hidden1, n_hidden2, Flux.σ), | |
Flux.Dense(n_hidden2, n_hidden3, Flux.σ), | |
Flux.Dense(n_hidden3, 1, Flux.identity)) |> Flux.gpu | |
# but ended up with a total failure: | |
# https://www.christopheroei.com/b/a03738e62ba2b158acd8353d9111b6f4db7cdda37d8a9990a68618110c5d8dc5.png | |
# Not sure what's wrong or how to fix it. I had expected the opposite result. | |
# vim: set et ff=unix ft=julia nocp sts=4 sw=4 ts=4: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment