Skip to content

Instantly share code, notes, and snippets.

@hohl

hohl/analyze.py Secret

Created August 2, 2020 08:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hohl/532b7edb85f09a7758172865aaf7dc94 to your computer and use it in GitHub Desktop.
Save hohl/532b7edb85f09a7758172865aaf7dc94 to your computer and use it in GitHub Desktop.
LowEndSpirit got some traction: numbers and figures
import pandas as pd
import matplotlib.pyplot as plt
def load_stats(filename):
discussions = pd.read_csv(filename, parse_dates=['created_at'])
months = discussions.created_at.dt.strftime('%Y/%m')
# date_fields = [discussions.created_at.dt.year, discussions.created_at.dt.month]
counts_per_month = discussions.created_at.groupby(months).agg('count')
counts_per_month = counts_per_month[0:-1] # drop ongoing month
return counts_per_month
stats_les = load_stats('discussions_les.csv')
stats_les = stats_les.rename('talk.lowendspirit.com')
stats_let = load_stats('discussions_let.csv')
stats_let = stats_let.rename('lowendtalk.com')
stats = pd.concat([stats_les, stats_let], axis=1).dropna()
stats.plot.bar(color=['#3D7D99', '#993d4c'])
plt.grid(axis='y', alpha=0.75)
plt.ylabel('Number of Discussions')
plt.xlabel('Month')
plt.title('How many new discussions are there on LowEndSpirit?')
plt.show()
import requests, os, time
import pandas as pd
# before running this script make sure to set the following environment vars:
# - USER_AGENT: to some real user agent (needed for CloudFlare bot protection)
# - FORUM_DOMAIN: to either
# -- talk.lowendspirit.com - to fetch LES
# -- www.lowendtalk.com - to fetch LET
# - OUTPUT_FILE: to either discussion_let.csv or discussion_les.csv
def get_discussions(page):
session = requests.Session()
session.headers.update({"User-Agent": os.environ['USER_AGENT']})
discussions_url = f'https://{os.environ["FORUM_DOMAIN"]}/discussions'
if page != 1:
discussions_url += f'/p{page}'
discussions_url += '.json'
response = session.get(discussions_url)
if response.status_code != 200:
raise ValueError(f'Discussions page #{page} does not exist')
return response.json()
def to_csv(filename, data):
users = pd.DataFrame(data)
users.to_csv(filename)
page_n = 0
discussions_checked = 0
discussions_found = []
try:
while True:
page_n += 1
print(f'Fetch page {page_n}...')
response = get_discussions(page_n)
discussions = response['Discussions']
discussions_checked += len(discussions)
for discussion in discussions:
discussions_found.append({
'created_by': discussion['FirstName'],
'created_at': discussion['DateInserted'],
'category': discussion['Category'],
'url': discussion['Url'],
'title': discussion['Name'],
})
time.sleep(1)
if page_n > 100:
print(f'We\'re done here, got more than enough discussions already.')
break
except ValueError:
print(f'We\'re done here. Found {discussions_checked} discussions')
to_csv(os.environ["OUTPUT_FILE"], discussions_found)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment