-
-
Save hohl/532b7edb85f09a7758172865aaf7dc94 to your computer and use it in GitHub Desktop.
LowEndSpirit got some traction: numbers and figures
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
def load_stats(filename): | |
discussions = pd.read_csv(filename, parse_dates=['created_at']) | |
months = discussions.created_at.dt.strftime('%Y/%m') | |
# date_fields = [discussions.created_at.dt.year, discussions.created_at.dt.month] | |
counts_per_month = discussions.created_at.groupby(months).agg('count') | |
counts_per_month = counts_per_month[0:-1] # drop ongoing month | |
return counts_per_month | |
stats_les = load_stats('discussions_les.csv') | |
stats_les = stats_les.rename('talk.lowendspirit.com') | |
stats_let = load_stats('discussions_let.csv') | |
stats_let = stats_let.rename('lowendtalk.com') | |
stats = pd.concat([stats_les, stats_let], axis=1).dropna() | |
stats.plot.bar(color=['#3D7D99', '#993d4c']) | |
plt.grid(axis='y', alpha=0.75) | |
plt.ylabel('Number of Discussions') | |
plt.xlabel('Month') | |
plt.title('How many new discussions are there on LowEndSpirit?') | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, os, time | |
import pandas as pd | |
# before running this script make sure to set the following environment vars: | |
# - USER_AGENT: to some real user agent (needed for CloudFlare bot protection) | |
# - FORUM_DOMAIN: to either | |
# -- talk.lowendspirit.com - to fetch LES | |
# -- www.lowendtalk.com - to fetch LET | |
# - OUTPUT_FILE: to either discussion_let.csv or discussion_les.csv | |
def get_discussions(page): | |
session = requests.Session() | |
session.headers.update({"User-Agent": os.environ['USER_AGENT']}) | |
discussions_url = f'https://{os.environ["FORUM_DOMAIN"]}/discussions' | |
if page != 1: | |
discussions_url += f'/p{page}' | |
discussions_url += '.json' | |
response = session.get(discussions_url) | |
if response.status_code != 200: | |
raise ValueError(f'Discussions page #{page} does not exist') | |
return response.json() | |
def to_csv(filename, data): | |
users = pd.DataFrame(data) | |
users.to_csv(filename) | |
page_n = 0 | |
discussions_checked = 0 | |
discussions_found = [] | |
try: | |
while True: | |
page_n += 1 | |
print(f'Fetch page {page_n}...') | |
response = get_discussions(page_n) | |
discussions = response['Discussions'] | |
discussions_checked += len(discussions) | |
for discussion in discussions: | |
discussions_found.append({ | |
'created_by': discussion['FirstName'], | |
'created_at': discussion['DateInserted'], | |
'category': discussion['Category'], | |
'url': discussion['Url'], | |
'title': discussion['Name'], | |
}) | |
time.sleep(1) | |
if page_n > 100: | |
print(f'We\'re done here, got more than enough discussions already.') | |
break | |
except ValueError: | |
print(f'We\'re done here. Found {discussions_checked} discussions') | |
to_csv(os.environ["OUTPUT_FILE"], discussions_found) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment