Skip to content

Instantly share code, notes, and snippets.

@Terrance
Created March 30, 2024 22:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Terrance/0bd495d630803454165c27aa9802778d to your computer and use it in GitHub Desktop.
Save Terrance/0bd495d630803454165c27aa9802778d to your computer and use it in GitHub Desktop.
Script to download all discussions and comments from a Vanilla forum as a set of JSON representation files.
#!/usr/bin/env python
from itertools import count
from pathlib import Path
import os
import sys
import requests
class End(Exception):
pass
def main(host, start=1, update=True):
sess = requests.Session()
for d in count(start):
group = f"{d // 100 * 100}"
os.makedirs(group, exist_ok=True)
try:
for p in count(1):
url = f"{host}/en/discussion/{d}/-/p{p}.json"
print(url, end=" ")
sys.stdout.flush()
if Path(f"{group}/{d}.skip").exists():
print("skip")
raise End
if Path(f"{group}/{d}.{p}.json").exists():
if Path(f"{group}/{d}.{p + 1}.json").exists():
print("done")
continue
elif not update:
print("done")
raise End
resp = sess.get(url)
if resp.status_code != 200:
print(resp.status_code)
Path(f"{group}/{d}.skip").touch()
raise End
if not resp.headers["Content-Type"].startswith("application/json"):
print("???")
with open(f"{group}/{d}.{p}.html", "wb") as fp:
fp.write(resp.content)
Path(f"{group}/{d}.skip").touch()
raise End
print("ok")
with open(f"{group}/{d}.{p}.json", "wb") as fp:
fp.write(resp.content)
try:
if len(resp.json()["Comments"]) < 30:
raise End
except End:
raise
except Exception as ex:
print(ex)
raise End
except End:
pass
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("host")
parser.add_argument("-s", "--start", type=int, default=1)
parser.add_argument("-u", "--update", action="store_true")
args = parser.parse_args()
main(args.host, args.start, args.update)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment