Skip to content

Instantly share code, notes, and snippets.

@Shiroizu
Created January 5, 2024 19:48
Show Gist options
  • Save Shiroizu/b571ea87cd508cc050338d8fe654e596 to your computer and use it in GitHub Desktop.
Save Shiroizu/b571ea87cd508cc050338d8fe654e596 to your computer and use it in GitHub Desktop.
''' Output (sorted by date):
λ "VocaDB Wiki revision history.py"
Found 77 pages
2024-01-03 10:22:51+00:00: wiki/59 'Content policy' (version 28) edit by Shiroizu
2024-01-01 13:54:17+00:00: wiki/86 'Album entry editing' (version 11) edit by Shiroizu
2024-01-01 13:54:02+00:00: wiki/83 'Artist entry editing' (version 19) edit by Shiroizu
2024-01-01 13:53:30+00:00: wiki/89 'Song entry editing ' (version 19) edit by Shiroizu
...
2023-02-16 03:32:05+00:00: wiki/53 'Management guidelines / golden rules' (version 24) edit by andreoda
2023-02-16 03:31:50+00:00: wiki/29 'License' (version 14) edit by andreoda
2023-02-16 01:56:24+00:00: wiki/22 'VocaDB domains & login process' (version 6) edit by andreoda
'''
import requests
import time
from bs4 import BeautifulSoup
from datetime import datetime
all_wiki_pages_url = "https://wiki.vocadb.net/pages/allpages"
page_edits = [] # edit_date, wiki_page_title, wiki_page_id, version, editor
r = requests.get(all_wiki_pages_url)
all_wiki_pages = BeautifulSoup(r.text, "html.parser").select("table td.pagename a")
print(f"Found {len(all_wiki_pages)} pages\n")
for wiki_page_link in all_wiki_pages:
# <a href="/wiki/62/artist-merging-and-splitting-guidelines"> Artist merging and splitting guidelines</a>
wiki_page_title = wiki_page_link.text
wiki_page_id = str(wiki_page_link).split("/wiki/")[1].split("/")[0]
wiki_page_history_url = f"https://wiki.vocadb.net/pages/history/{wiki_page_id}"
r = requests.get(wiki_page_history_url)
most_recent_edit = BeautifulSoup(r.text, "html.parser").select(
"tbody tr:first-child td"
)
version, edit_date, editor, _ = most_recent_edit
edit_date = datetime.fromisoformat(edit_date.text)
# print(f"Version {version.text}, {edit_date}, by {editor.text}")
page_edits.append(
[edit_date, wiki_page_title, wiki_page_id, version.text, editor.text]
)
time.sleep(0.5)
page_edits.sort(key=lambda x: x[0], reverse=True)
for page_edit in page_edits:
edit_date, wiki_page_title, wiki_page_id, version, editor = page_edit
print(
f"{edit_date}: wiki/{wiki_page_id} '{wiki_page_title}' (version {version}) edit by {editor}"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment