This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| !pip install gdelt | |
| import gdelt | |
| gd = gdelt.gdelt(version=1) | |
| import os, datetime | |
| os.makedirs("data",exist_ok=True) | |
| #starting 60 days before Oct7 | |
| cur_date = datetime.datetime(2019,10,7)-datetime.timedelta(days=60) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| mySources = ["nytimes.com", "washingtonpost.com", "foxnews.com", "cnn.com"] | |
| df = pd.DataFrame() | |
| k = os.listdir("data") | |
| for i in k: | |
| print(i) | |
| if i.endswith(".pkl"): | |
| tmp = pd.read_pickle("data/"+i) | |
| tmp = tmp[tmp["SOURCES"].apply(lambda x: x in mySources)] | |
| df = pd.concat([df, tmp]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_supervised_df(df, issue, maxlag=1): | |
| original = df.copy() | |
| #for each day of lag, we add a new column to the dataframe with the previous values | |
| for i in range(maxlag): | |
| print(i) | |
| original= pd.concat([original, df.shift(i+1).add_suffix("_(t-%s)"%(i+2))], axis=1).dropna() | |
| #save original issue to add back in |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from fastai.tabular import * | |
| def genNN(myDF, issue): | |
| reframed = get_supervised_df(myDF, issue, 7) | |
| days = [] | |
| myOrder = reframed.columns.to_list() | |
| for i in range(7): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| h_predictions, *_ = learn.get_preds(DatasetType.Test) | |
| #h_predictions = h_predictions.numpy() | |
| v_predictions, *_ = learn.get_preds(DatasetType.Valid) | |
| #h_predictions = h_predictions.numpy() | |
| predictions, *_ = learn.get_preds(DatasetType.Train) | |
| #predictions = predictions.numpy() | |
| issue = "foxnews.com_russia" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.metrics import mean_squared_error | |
| from math import sqrt | |
| testing = results.copy() | |
| print("RMSE training using mean: {}".format(sqrt(mean_squared_error(testing.actual, testing.m)))) | |
| print("RMSE training using model: {}".format(sqrt(mean_squared_error(testing.actual, testing.predicted)))) | |
| testing = results[-len(h_predictions)-1:].copy() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, axs = plt.subplots(4, 5, sharex=True) | |
| fig.set_size_inches(16,12) | |
| x = y = 0 | |
| for issue in time_series: | |
| train_l = 55 | |
| s_model = SARIMAX(endog = time_series[[issue]][:train_l][1:], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| myCountries = ["dprk", "ukraine", "russia", "iran", "china"] | |
| fig, axs = plt.subplots(1,5, sharey=True, gridspec_kw={'wspace': 0}) | |
| fig.set_facecolor("white") | |
| fig.set_size_inches(24,6) | |
| idx = 0 | |
| for country in myCountries: | |
| tmp = time_series[[x for x in time_series.columns if (x.find(country) > -1) ]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, axs = plt.subplots(1, 5, sharey=True, gridspec_kw={'wspace': 0}) | |
| fig.set_size_inches(16,6) | |
| x = y = 0 | |
| for issue in myCountries: | |
| train_l = len(time_series)-5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #create a DF to hold errors | |
| search_df = pd.DataFrame() | |
| #set up the grid (1 row x 5 columns) | |
| fig, axs = plt.subplots(1, 5, sharey=True, gridspec_kw={'wspace': 0}) | |
| fig.set_size_inches(16,6) | |
| x = y = 0 |
OlderNewer