Last active
March 5, 2021 04:00
-
-
Save WWelna/48c52fab01b61470247c06f40c4155de to your computer and use it in GitHub Desktop.
DDoSecrets - Gab Stats - Estimate of Missing Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python3 | |
import orjson as json | |
usercounts = {} | |
with open('/xs/Archive-Gab/ddosecrets_gabdump.json') as f: | |
for line in f: | |
try: | |
j = json.loads(line) | |
if j['__DBEXPORT__'] == 'accounts': | |
j2 = json.loads(j['data']) | |
if j['id'] not in usercounts: | |
usercounts.update({j['id']:{'accounts_statuses_count':j2['statuses_count'], 'actual_statuses_count':0}}) | |
else: | |
usercounts[j['id']]['accounts_statuses_count'] = j2['statuses_count'] | |
elif j['__DBEXPORT__'] == 'statuses': | |
j2 = json.loads(j['data']) | |
if j['account_id'] not in usercounts: | |
usercounts.update({j['account_id']:{'accounts_statuses_count':-1, 'actual_statuses_count':1}}) | |
else: | |
usercounts[j['account_id']]['actual_statuses_count'] += 1 | |
except: pass # Some entries have zero data attached, faster + easier just to catch exception | |
total_missing = 0 | |
no_attached_accounts = 0 | |
for id in usercounts.keys(): | |
user = usercounts[id] | |
# No data to calculate, one of the missing accounts | |
if user['accounts_statuses_count'] == -1: | |
no_attached_accounts += 1 | |
continue | |
# If diff is negative, that means all the user account posts was likely captured | |
# and the account metadata is older than when all the posts got fetched. Otherwise, tally. | |
diff = user['accounts_statuses_count'] - user['actual_statuses_count'] | |
if diff >= 0: | |
total_missing += diff | |
print(f"There is an estimated {total_missing} missing posts in the Gab Dump and an estimated {no_attached_accounts} missing accounts") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
So, final estimate and stats via this code: