Skip to content

Instantly share code, notes, and snippets.

@sornars
Last active December 8, 2018 04:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sornars/581c940c002605766dba4a74bf8a738c to your computer and use it in GitHub Desktop.
Save sornars/581c940c002605766dba4a74bf8a738c to your computer and use it in GitHub Desktop.
Script for archiving PMs from therewillbe.games
import getpass
import requests
from lxml import html
username = input('username: ')
password = getpass.getpass()
session = requests.Session()
# Authorize session
forum = session.get('https://therewillbe.games/forum/recent')
forum_root = html.fromstring(forum.content)
csrf_form = forum_root.xpath('//form[@action="/forum" and @name="login"]/input')[-1]
csrf_token = csrf_form.attrib['name']
csrf_token_value = csrf_form.attrib['value']
login = session.post('https://therewillbe.games/forum',
data={'view': 'user', 'task': 'login', csrf_token: csrf_token_value, 'username': username,
'password': password, 'remember': '1', 'submit': 'Login'})
# Scrape inbox
inbox = session.get('https://therewillbe.games/site-tools/trash-mail?task=inbox')
inbox_root = html.fromstring(inbox.content)
show_all_link = inbox_root.xpath('//a[text()="show all"]')[0].attrib['href']
full_inbox = session.get(show_all_link)
full_inbox_root = html.fromstring(full_inbox.content)
message_links = [a.attrib['href'] for a in full_inbox_root.xpath(
'//form[@name="messages"]//td/a[contains(@href, "task=show") and contains(@href, "messageid=")]')]
archive_links = [a.attrib['href'] for a in full_inbox_root.xpath(
'//form[@name="messages"]//td/a[contains(@href, "task=archive") and contains(@href, "messageid=")]')]
# Capture message content
with open('./therewillbegames_messages.txt', 'w', newline='') as o:
for ml in message_links:
message = session.get(ml)
message_root = html.fromstring(message.content)
from_div = message_root.xpath('//div[@class="uddeim-messagefrom"]')[0]
from_profile = from_div.xpath('//a[contains(@href, "userprofile")]')[0].attrib['href']
from_username = from_profile.split('/')[-1]
sent_time_string = list(from_div.itertext())[-1]
body_div = message_root.xpath('//div[@class="uddeim-messagebody"]')[0]
message = body_div.text_content()
logged_message = """MESSAGE BEGIN: {sent_time_string} - {from_username} - {from_profile}
{message}
MESSAGE END
""".format(sent_time_string=sent_time_string, from_username=from_username, from_profile=from_profile,
message=message)
print('Saving PM from: {from_username} sent at {sent_time_string}'.format(from_username=from_username,
sent_time_string=sent_time_string))
o.write(logged_message)
# Archive messages
for al in archive_links:
print('Archiving {al}'.format(al))
session.get(al)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment