matejsarlija/test.py

## test.py
"""In essence, here is how I would do it to reduce memory consumption and improve performance:

Load json file (no way to stream it in python AFAIK)
Chunk the array of dictionaries into smaller chunks
Convert chunk into objects
Call bulk_create
Garbage collect after every loop iteration"""

import json
import gc
from myapp.models import MyModel

filename = '/path/to/data.json'
with open(filename, 'r') as f:
    data = json.load(f)
chunk_size = 100
while data:
    chunk = data[:chunk_size]
    data = data[chunk_size:]
    chunk = [ MyModel(**x) for x in chunk ]
    MyModel.objects.bulk_create(chunk)
    gc.collect()
You can play with chunk_size to optimize performance / memory consumption.
	"""In essence, here is how I would do it to reduce memory consumption and improve performance:

	Load json file (no way to stream it in python AFAIK)
	Chunk the array of dictionaries into smaller chunks
	Convert chunk into objects
	Call bulk_create
	Garbage collect after every loop iteration"""

	import json
	import gc
	from myapp.models import MyModel

	filename = '/path/to/data.json'
	with open(filename, 'r') as f:
	data = json.load(f)
	chunk_size = 100
	while data:
	chunk = data[:chunk_size]
	data = data[chunk_size:]
	chunk = [ MyModel(**x) for x in chunk ]
	MyModel.objects.bulk_create(chunk)
	gc.collect()
	You can play with chunk_size to optimize performance / memory consumption.