Skip to content

Instantly share code, notes, and snippets.

@WWelna
Created February 2, 2021 03:53
Show Gist options
  • Save WWelna/be2f21b17e6bdf8be67878cfda0a7adc to your computer and use it in GitHub Desktop.
Save WWelna/be2f21b17e6bdf8be67878cfda0a7adc to your computer and use it in GitHub Desktop.
Grab posts of User from Gab Scrape
#!/bin/python3
# Terrible Data Dumper for Gab Data
#
# https://archive.org/details/Gab-Scrape-01092017-050518
# https://archive.org/details/Gab-Scrape-03272017
#
# Put script in same directory as download, and use.
import json
import glob
import argparse
import lzma
parser = argparse.ArgumentParser(description='Search and Dump Gab Data', epilog='Trust The Plan')
parser.add_argument('--user', help='Username to Dump', type=str)
parser.add_argument('output', help='Output Results', type=argparse.FileType('wb'))
args = parser.parse_args()
posts = {}
for e in glob.glob('*.xz'):
with lzma.open(e, 'rb') as f:
for line in f:
j = json.loads(line)
if j['user']['username'] == args.user:
if j['id'] not in posts.keys():
posts[j['id']] = j
index = list([i for i in posts.keys()])
index.sort()
for i in index:
j = json.dumps(posts[i])+"\n"
args.output.write(j.encode(encoding='UTF-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment