Created
July 6, 2013 12:18
-
-
Save spacelis/5939744 to your computer and use it in GitHub Desktop.
A small script for importing file with json per line into mongodb.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
File: ljson2mongo.py | |
Author: SpaceLis | |
Email: Wen.Li@tudelft.nl | |
Github: none | |
Description: | |
Importing tweets into a Mongodb. | |
""" | |
import sys | |
import gzip | |
import json | |
from pymongo import MongoClient | |
def import_data(filename, dbname, collectionname): | |
""" Import the data in the file to db.collection | |
""" | |
cnt = 0 | |
if filename.endswith('.gz'): | |
fin = gzip.open(filename) | |
else: | |
fin = open(filename) | |
client = MongoClient() | |
collection = client[dbname][collectionname] | |
for line in fin: | |
collection.insert(json.loads(line)) | |
if cnt % 10000 == 0: | |
print >> sys.stderr, cnt, 'imported' | |
cnt += 1 | |
fin.close() | |
if __name__ == '__main__': | |
if len(sys.argv) < 3 or sys.argv[1] == '-h': | |
print >> sys.stderr, 'Usage: ljson2mongodb.py <file> <db> <collection>' | |
import_data(sys.argv[1], sys.argv[2], sys.argv[3]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment