This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install gdelt | |
import gdelt | |
gd = gdelt.gdelt(version=1) | |
import os, datetime | |
os.makedirs("data",exist_ok=True) | |
#starting 60 days before Oct7 | |
cur_date = datetime.datetime(2019,10,7)-datetime.timedelta(days=60) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mySources = ["nytimes.com", "washingtonpost.com", "foxnews.com", "cnn.com"] | |
df = pd.DataFrame() | |
k = os.listdir("data") | |
for i in k: | |
print(i) | |
if i.endswith(".pkl"): | |
tmp = pd.read_pickle("data/"+i) | |
tmp = tmp[tmp["SOURCES"].apply(lambda x: x in mySources)] | |
df = pd.concat([df, tmp]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_supervised_df(df, issue, maxlag=1): | |
original = df.copy() | |
#for each day of lag, we add a new column to the dataframe with the previous values | |
for i in range(maxlag): | |
print(i) | |
original= pd.concat([original, df.shift(i+1).add_suffix("_(t-%s)"%(i+2))], axis=1).dropna() | |
#save original issue to add back in |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastai.tabular import * | |
def genNN(myDF, issue): | |
reframed = get_supervised_df(myDF, issue, 7) | |
days = [] | |
myOrder = reframed.columns.to_list() | |
for i in range(7): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
h_predictions, *_ = learn.get_preds(DatasetType.Test) | |
#h_predictions = h_predictions.numpy() | |
v_predictions, *_ = learn.get_preds(DatasetType.Valid) | |
#h_predictions = h_predictions.numpy() | |
predictions, *_ = learn.get_preds(DatasetType.Train) | |
#predictions = predictions.numpy() | |
issue = "foxnews.com_russia" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import mean_squared_error | |
from math import sqrt | |
testing = results.copy() | |
print("RMSE training using mean: {}".format(sqrt(mean_squared_error(testing.actual, testing.m)))) | |
print("RMSE training using model: {}".format(sqrt(mean_squared_error(testing.actual, testing.predicted)))) | |
testing = results[-len(h_predictions)-1:].copy() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
train_l = 55 | |
s_model = SARIMAX(endog = time_series[[issue]][:train_l][1:], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
## This assumes a jupyter notebook file to install gdelt | |
## if not using notebook, remove and run on the command line for your OS | |
!pip install gdelt | |
## (the ! gives command line capabilities to notebook) | |
import gdelt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
myCountries = ["dprk", "ukraine", "russia", "iran", "china"] | |
fig, axs = plt.subplots(1,5, sharey=True, gridspec_kw={'wspace': 0}) | |
fig.set_facecolor("white") | |
fig.set_size_inches(24,6) | |
idx = 0 | |
for country in myCountries: | |
tmp = time_series[[x for x in time_series.columns if (x.find(country) > -1) ]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a DF to hold errors | |
search_df = pd.DataFrame() | |
#set up the grid (1 row x 5 columns) | |
fig, axs = plt.subplots(1, 5, sharey=True, gridspec_kw={'wspace': 0}) | |
fig.set_size_inches(16,6) | |
x = y = 0 |
OlderNewer