Skip to content

Instantly share code, notes, and snippets.

@daknuett
Created April 23, 2019 19:34
Show Gist options
  • Save daknuett/9dbdad18bfe78e4bdb56a05c114f2ed9 to your computer and use it in GitHub Desktop.
Save daknuett/9dbdad18bfe78e4bdb56a05c114f2ed9 to your computer and use it in GitHub Desktop.
import requests
import time
import json
url = "https://de.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Jan_B%C3%B6hmermann&rvslots=*&rvprop=timestamp|user|content&rvlimit=max&format=json"
url2 = "https://de.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Jan_B%C3%B6hmermann&rvslots=*&rvprop=timestamp|user|content&rvlimit=max&format=json&rvstart={}"
s = requests.Session()
USE_LOADED_DATA = False
if(not USE_LOADED_DATA):
data = s.get(url=url).json()
query_result = data["query"]
query_result = query_result["pages"]
query_result = query_result['2935829']
revisions = query_result["revisions"]
complete_revisions = []
while(len(revisions) != 1):
complete_revisions.extend(revisions)
data = s.get(url=url2.format(complete_revisions[-1]["timestamp"])).json()
revisions = data["query"]["pages"]["2935829"]["revisions"]
print("fetched", len(revisions), "more revisions")
time.sleep(0.4)
with open("revisions.json", "w") as fout:
json.dump(complete_revisions, fout)
else:
with open("revisions.json", "r") as fin:
complete_revisions = json.load(fin)
def contains_rainer(rev):
if(not "*" in rev["slots"]["main"]):
print(rev)
return False
return "Rainer" in rev["slots"]["main"]["*"]
containing_rainer = [rev for rev in complete_revisions if contains_rainer(rev)]
print("\n".join([c["timestamp"] for c in containing_rainer]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment