Created
January 19, 2018 00:57
-
-
Save nickwan/a296f1cbf7a3d4ff510317f8eb434468 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
%matplotlib inline | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import pandas as pd | |
import numpy as np | |
def convert_date(ser): | |
return ser[:10] | |
path = 'C:\\Users\\hello\\Documents\\twitch\\nw\\data\\' | |
extension = 'csv' | |
os.chdir(path) | |
result = [i for i in glob.glob('*.{}'.format(extension))] | |
df = pd.DataFrame() | |
for data in result: | |
data_in = pd.read_csv(data) | |
df = df.append(data_in, ignore_index=True) | |
df.drop(['promoted impressions', 'promoted engagements', | |
'promoted engagement rate', 'promoted retweets', 'promoted replies', | |
'promoted likes', 'promoted user profile clicks', 'promoted url clicks', | |
'promoted hashtag clicks', 'promoted detail expands', | |
'promoted permalink clicks', 'promoted app opens', | |
'promoted app installs', 'promoted follows', 'promoted email tweet', | |
'promoted dial phone', 'promoted media views', | |
'promoted media engagements'], axis=1, inplace=True) | |
df['date'] = df['time'].apply(convert_date) | |
df['date'] = pd.to_datetime(df['date']) | |
df['time'] = pd.to_datetime(df['time']) | |
df['daycode'] = df['time'].dt.weekday | |
df['day'] = df['time'].dt.weekday_name | |
int_cols = ['impressions', 'engagements', 'engagement rate', 'retweets', 'replies', 'likes', | |
'user profile clicks', 'url clicks', 'hashtag clicks', 'detail expands', | |
'permalink clicks', 'app opens', 'app installs', 'follows', | |
'email tweet', 'dial phone', 'media engagements'] | |
for c in int_cols: | |
df[c] = df[c].astype(int) | |
df['week'] = df['date'].dt.week + (52 * (df['date'].dt.year - df['date'].dt.year.min())) | |
df['week'] = df['week'] - df['week'].min() | |
df.sort_values(['date', 'time'], inplace=True) | |
df = df.reset_index(drop=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment