Last active
October 10, 2019 03:35
-
-
Save brockmanmatt/2e21d46adc194a343f9da78f00af84af to your computer and use it in GitHub Desktop.
shift_daily_article_counts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_supervised_df(df, issue, maxlag=1): | |
original = df.copy() | |
#for each day of lag, we add a new column to the dataframe with the previous values | |
for i in range(maxlag): | |
print(i) | |
original= pd.concat([original, df.shift(i+1).add_suffix("_(t-%s)"%(i+2))], axis=1).dropna() | |
#save original issue to add back in | |
myIssue = original[issue] | |
original = original.shift().dropna() | |
rename_dict = {} | |
#I guess I shifted the first layer here, don't remember why | |
for column in df: | |
rename_dict[column] = column+"(t-1)" | |
original.rename(columns=rename_dict,inplace=True) | |
#put the original issue back in | |
original[issue] = myIssue | |
return original.dropna() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment