Skip to content

Instantly share code, notes, and snippets.

@deltronzero
Last active September 14, 2023 10:06
Show Gist options
  • Save deltronzero/7c23bacf97b4b61c7a2f2950ef6f35d8 to your computer and use it in GitHub Desktop.
Save deltronzero/7c23bacf97b4b61c7a2f2950ef6f35d8 to your computer and use it in GitHub Desktop.
split one large bloodhound file into multiple smaller files to work around memory limitations
import json
file_name = "20210312152708_computers.json"
type = "computers"
object_limit = 10000
print(f"[*] loading {file_name}")
data = json.loads(open(file_name,'r').read().encode().decode('utf-8-sig'))
total_objects = data['meta']['count']
object_count = 0
file_count = 0
while object_count < total_objects:
a = {}
a[type] = data[type][object_count:][:object_limit]
object_count += len(a[type])
a['meta'] = data['meta']
a['meta']['count'] = object_count
f_split = file_name.split(".")
file_name_out = f"{f_split[0]}_{file_count}.{f_split[1]}"
print(f"[*] writing {file_name_out} - {object_count} of {total_objects}")
f_out = open(file_name_out, "w")
json.dump(a, f_out)
f_out.close()
file_count += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment