This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
train_l = len(time_series)-5 | |
selected_series = time_series[[col for col in time_series.columns if (col.find(issue[:issue.find("_")]) > -1)]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
## This assumes a jupyter notebook file to install gdelt | |
## if not using notebook, remove and run on the command line for your OS | |
!pip install gdelt | |
## (the ! gives command line capabilities to notebook) | |
import gdelt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
if not issue.find(".com") > -1: | |
continue |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mape_df = pd.DataFrame() | |
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, sharex=True) | |
fig.set_size_inches(16,12) | |
x = y = 0 | |
for issue in time_series: | |
if not issue.find(".com") > -1: | |
continue | |
train_l = len(time_series)-5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mySources = ["cnn.com", "washingtonpost.com", "nytimes.com", "foxnews.com"] | |
#unnest the entries with multiple sources in them | |
df = df.set_index(df.columns.drop('SOURCES',1).tolist()).SOURCES.str.split(';', expand=True).stack().reset_index().rename(columns={0:'SOURCES'}).loc[:, df.columns] | |
df.DATE = df.DATE.apply(lambda x: str(x)) #convert date | |
df.DATE = pd.to_datetime(df.DATE) | |
df.fillna("", inplace=True) | |
df.set_index("DATE", drop=True, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from statsmodels.tsa.statespace.sarimax import SARIMAX | |
def gen_SARIMA_result(p,d,q, df, issue, test_length): #takes pdq, data, issue to use | |
s_model = SARIMAX(endog = df[issue][:-test_length], | |
exog = df[[x for x in df.columns if x != issue]][:-test_length], | |
order=(p,d,q), seasonal_order=(1,0,1,7)).fit() | |
f_ru = df[[issue]].copy()[1:] #haven't bothered to change this, but it's the results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(1,3, figsize=(12,8)) | |
issue = time_series.columns[0] | |
test_length = 10 | |
selected_series = time_series[[col for col in time_series.columns if (col.find(issue[issue.find("_"):]) > -1)]].shift()[1:].drop(columns=issue).add_suffix("_l1") | |
pub_series = time_series[[col for col in time_series.columns if (col.find(issue[:issue.find("_")]) > -1)]].drop(columns=issue).shift()[1:].add_suffix("_l1") | |
selected_series = selected_series.join(pub_series).join(time_series[issue]) | |
x = 0 | |
for p in [1,5,10]: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model_dict = {} | |
for issue in time_series: | |
model_dict[issue]=[] | |
test_length = 5 | |
selected_series = time_series[[col for col in time_series.columns if (col.find(issue[issue.find("_"):]) > -1)]].shift()[1:].drop(columns=issue).add_suffix("_l1") | |
pub_series = time_series[[col for col in time_series.columns if (col.find(issue[:issue.find("_")]) > -1)]].drop(columns=issue).shift()[1:].add_suffix("_l1") | |
selected_series = selected_series.join(pub_series).join(time_series[issue]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, axs = plt.subplots(4, 5, figsize=(20,12)) | |
x = y = 0 | |
for issue in time_series: | |
pd.DataFrame(model_dict[issue]).plot(x="aic", y="test_mape", style=".", title="{}".format(issue), ax=axs[x,y]) | |
pd.DataFrame(model_dict[issue]).plot(x="aic", y="train_mape", style=".", title="{}".format(issue), ax=axs[x,y]) | |
if x == 0 and y==0: | |
handles, labels = axs[0,0].get_legend_handles_labels() |