Skip to content

Instantly share code, notes, and snippets.

# set up our plots
fig, axes = plt.subplots(nrows=2, ncols=3)
df['views_cumsum'].plot(ax=axes[0,0], title='Views Over Time', c='blue', grid=True).set(ylabel='# of Views')
df['reads_cumsum'].plot(ax=axes[0,1], title='Reads Over Time', c='green', grid=True).set(ylabel='# of Reads')
df['fans_cumsum'].plot(ax=axes[0,2], title='Fans Over Time', c='red', grid=True).set(ylabel='# of Fans')
df[['reads', 'fans']].plot(ax=axes[1,0], title='Reads/Fans by Story', kind='bar', stacked=True, grid=True).legend(['# of Reads', '# of Fans'])
df[['reads_cumsum', 'fans_cumsum']].plot.area(ax=axes[1,1], title='Views/Fans Over Time', grid=True).legend(['# of Reads', '# of Fans'])
df.groupby('year')['views'].sum().plot.bar(ax=axes[1,2], x='year', y='views', title='Yearly Views', grid=True)
# set subplot style
# add cumulative sum columns to the dataframe
df['views_cumsum'] = df['views'].cumsum()
df['reads_cumsum'] = df['reads'].cumsum()
df['fans_cumsum'] = df['fans'].cumsum()
# convert numeric data types to float
df['views'] = df['views'].astype(float)
df['reads'] = df['reads'].astype(float)
df['fans'] = df['fans'].astype(float)
# combine all yearly dataframes into one dataframe
df = pd.concat(df_yearly)
# reverse dataframe so that oldest story has the smallest index
df = df.iloc[::-1]
# reset index to remove overlapping indices across dataframes
df.reset_index(drop=True, inplace=True)
# create a list of dataframes for each year
df_yearly = []
for year in stats.keys():
# create dataframe
tmp_df = pd.DataFrame(stats[year])
# add 'year' column to dataframe
tmp_df['year'] = year
df_yearly.append(tmp_df)
stats = {
'2019': [
{
'title': 'Creating The Twitter Sentiment Analysis Program in Python with Naive Bayes Classification',
'views': '89434',
'reads': '23128',
'ratio': '26%',
'fans': '115'
}
],
current_year = 0
# loop through table rows retrieving the data and adding it to our stats dictionary
for row in stats_table.find_elements_by_tag_name('tr'):
if (len(row.get_attribute('class')) > 0):
if ('sortableTable-row--dateBucket' in row.get_attribute('class')): # when encountering a year row
for cell in row.find_elements_by_tag_name('td'):
current_year = cell.text
stats[current_year] = []
elif ('js-statsTableRow' in row.get_attribute('class')): # when encountering a story
td_idx = 1
# get stats table
stats_table = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[3]/div/div[4]/table'))
time.sleep(sleep_duration)
# scroll a couple of times to make sure the whole table is rendered
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(nap_duration)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(nap_duration)
# fill out login form
username = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@id="username_or_email"]'))
password = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@id="password"]'))
login = WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@id="allow"]'))
time.sleep(sleep_duration)
username.clear()
username.send_keys(email_cred)
time.sleep(nap_duration)
# choose twitter
WebDriverWait(driver, 10).until(lambda driver: driver.find_element_by_xpath('//*[@data-action="twitter-auth"]')).click()