This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install gdelt | |
import gdelt | |
gd = gdelt.gdelt(version=1) | |
import os, datetime | |
os.makedirs("data",exist_ok=True) | |
#starting 60 days before Oct7 | |
cur_date = datetime.datetime(2019,10,7)-datetime.timedelta(days=60) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mySources = ["nytimes.com", "washingtonpost.com", "foxnews.com", "cnn.com"] | |
df = pd.DataFrame() | |
k = os.listdir("data") | |
for i in k: | |
print(i) | |
if i.endswith(".pkl"): | |
tmp = pd.read_pickle("data/"+i) | |
tmp = tmp[tmp["SOURCES"].apply(lambda x: x in mySources)] | |
df = pd.concat([df, tmp]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_supervised_df(df, issue, maxlag=1): | |
original = df.copy() | |
#for each day of lag, we add a new column to the dataframe with the previous values | |
for i in range(maxlag): | |
print(i) | |
original= pd.concat([original, df.shift(i+1).add_suffix("_(t-%s)"%(i+2))], axis=1).dropna() | |
#save original issue to add back in |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastai.tabular import * | |
def genNN(myDF, issue): | |
reframed = get_supervised_df(myDF, issue, 7) | |
days = [] | |
myOrder = reframed.columns.to_list() | |
for i in range(7): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
h_predictions, *_ = learn.get_preds(DatasetType.Test) | |
#h_predictions = h_predictions.numpy() | |
v_predictions, *_ = learn.get_preds(DatasetType.Valid) | |
#h_predictions = h_predictions.numpy() | |
predictions, *_ = learn.get_preds(DatasetType.Train) | |
#predictions = predictions.numpy() | |
issue = "foxnews.com_russia" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import mean_squared_error | |
from math import sqrt | |
testing = results.copy() | |
print("RMSE training using mean: {}".format(sqrt(mean_squared_error(testing.actual, testing.m)))) | |
print("RMSE training using model: {}".format(sqrt(mean_squared_error(testing.actual, testing.predicted)))) | |
testing = results[-len(h_predictions)-1:].copy() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
train_l = 55 | |
s_model = SARIMAX(endog = time_series[[issue]][:train_l][1:], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
myCountries = ["dprk", "ukraine", "russia", "iran", "china"] | |
fig, axs = plt.subplots(1,5, sharey=True, gridspec_kw={'wspace': 0}) | |
fig.set_facecolor("white") | |
fig.set_size_inches(24,6) | |
idx = 0 | |
for country in myCountries: | |
tmp = time_series[[x for x in time_series.columns if (x.find(country) > -1) ]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(1, 5, sharey=True, gridspec_kw={'wspace': 0}) | |
fig.set_size_inches(16,6) | |
x = y = 0 | |
for issue in myCountries: | |
train_l = len(time_series)-5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a DF to hold errors | |
search_df = pd.DataFrame() | |
#set up the grid (1 row x 5 columns) | |
fig, axs = plt.subplots(1, 5, sharey=True, gridspec_kw={'wspace': 0}) | |
fig.set_size_inches(16,6) | |
x = y = 0 |
OlderNewer