Skip to content

Instantly share code, notes, and snippets.

@spacelis
Created July 6, 2013 12:18
Show Gist options
  • Save spacelis/5939744 to your computer and use it in GitHub Desktop.
Save spacelis/5939744 to your computer and use it in GitHub Desktop.
A small script for importing file with json per line into mongodb.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
File: ljson2mongo.py
Author: SpaceLis
Email: Wen.Li@tudelft.nl
Github: none
Description:
Importing tweets into a Mongodb.
"""
import sys
import gzip
import json
from pymongo import MongoClient
def import_data(filename, dbname, collectionname):
""" Import the data in the file to db.collection
"""
cnt = 0
if filename.endswith('.gz'):
fin = gzip.open(filename)
else:
fin = open(filename)
client = MongoClient()
collection = client[dbname][collectionname]
for line in fin:
collection.insert(json.loads(line))
if cnt % 10000 == 0:
print >> sys.stderr, cnt, 'imported'
cnt += 1
fin.close()
if __name__ == '__main__':
if len(sys.argv) < 3 or sys.argv[1] == '-h':
print >> sys.stderr, 'Usage: ljson2mongodb.py <file> <db> <collection>'
import_data(sys.argv[1], sys.argv[2], sys.argv[3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment