This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(1, 5, sharey=True, gridspec_kw={'wspace': 0}) | |
fig.set_size_inches(16,6) | |
x = y = 0 | |
for issue in myCountries: | |
train_l = len(time_series)-5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
train_l = len(time_series)-5 | |
selected_series = time_series[[col for col in time_series.columns if (col.find(issue[:issue.find("_")]) > -1)]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
if not issue.find(".com") > -1: | |
continue |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
!pip install gdelt | |
import gdelt | |
gd = gdelt.gdelt(version=1) | |
import os | |
os.makedirs("data",exist_ok=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mape_df = pd.DataFrame() | |
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
if not issue.find(".com") > -1: | |
continue | |
train_l = len(time_series)-5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install gdelt #make sure gdelt installed | |
import pandas as pd, numpy as np, matplotlib.pyplot as plt, gdelt, os, datetime, warnings #imports | |
gd = gdelt.gdelt(version=1) #instantiate object to pull gdelt files | |
os.makedirs("data",exist_ok=True) #check if there's a data folder | |
cur_date = datetime.datetime(2019,10,7)-datetime.timedelta(days=60) #start pulling from 60 days prior to 10/7 | |
while cur_date < datetime.datetime(2019,10,7): #pull until 10/7 | |
if not os.path.exists("data/%s-%s-%s.pkl"%(cur_date.year, cur_date.month, cur_date.day)): #if don't have |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mySources = ["cnn.com", "washingtonpost.com", "nytimes.com", "foxnews.com"] | |
#unnest the entries with multiple sources in them | |
df = df.set_index(df.columns.drop('SOURCES',1).tolist()).SOURCES.str.split(';', expand=True).stack().reset_index().rename(columns={0:'SOURCES'}).loc[:, df.columns] | |
df.DATE = df.DATE.apply(lambda x: str(x)) #convert date | |
df.DATE = pd.to_datetime(df.DATE) | |
df.fillna("", inplace=True) | |
df.set_index("DATE", drop=True, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from statsmodels.tsa.statespace.sarimax import SARIMAX | |
def gen_SARIMA_result(p,d,q, df, issue, test_length): #takes pdq, data, issue to use | |
s_model = SARIMAX(endog = df[issue][:-test_length], | |
exog = df[[x for x in df.columns if x != issue]][:-test_length], | |
order=(p,d,q), seasonal_order=(1,0,1,7)).fit() | |
f_ru = df[[issue]].copy()[1:] #haven't bothered to change this, but it's the results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(1,3, figsize=(12,8)) | |
issue = time_series.columns[0] | |
test_length = 10 | |
selected_series = time_series[[col for col in time_series.columns if (col.find(issue[issue.find("_"):]) > -1)]].shift()[1:].drop(columns=issue).add_suffix("_l1") | |
pub_series = time_series[[col for col in time_series.columns if (col.find(issue[:issue.find("_")]) > -1)]].drop(columns=issue).shift()[1:].add_suffix("_l1") | |
selected_series = selected_series.join(pub_series).join(time_series[issue]) | |
x = 0 | |
for p in [1,5,10]: |