Skip to content

Instantly share code, notes, and snippets.

@WWelna
Created February 10, 2021 11:57
Show Gist options
  • Save WWelna/cda4ecb6752fc2a05562cbd17fb8b1be to your computer and use it in GitHub Desktop.
Save WWelna/cda4ecb6752fc2a05562cbd17fb8b1be to your computer and use it in GitHub Desktop.
Loading Gab dump into Redis
#!/bin/python3
import json
import lzma
import redis
import glob
from datetime import timedelta
r = redis.Redis()
ids = []
for e in glob.glob('*.xz'):
print("OPENING -> "+e)
with lzma.open(e, 'r') as f:
for line in f:
j = json.loads(line)
if j['is_reply'] == True and 'parent' in j:
j2 = j['parent']
if r.exists('GABPOST'+str(j2['id'])) == 0:
ids.append(j2['id'])
r.setex('GABPOST'+str(j2['id']), timedelta(hours=24), json.dumps(j2))
j['is_reply'] = j2['id']
if r.exists('GABPOST'+str(j['id'])) == 0:
ids.append(j['id'])
r.setex('GABPOST'+str(j['id']), timedelta(hours=24), json.dumps(j))
with open('gabpost.ids', 'w') as o:
for i in ids:
o.write(str(i)+"\n")
print("FOUND -> "+str(len(ids))+" POSTS")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment