Skip to content

Instantly share code, notes, and snippets.

@saurabhariyan
Last active April 22, 2019 11:39
Show Gist options
  • Save saurabhariyan/5cf1ee3a5ef634b4b61adeb17c80d083 to your computer and use it in GitHub Desktop.
Save saurabhariyan/5cf1ee3a5ef634b4b61adeb17c80d083 to your computer and use it in GitHub Desktop.
mongo

// read a mongoexport json file as dataframe in pandas.

*** This method seems to be exceptionally slow. Need to investigate about that. But works perfectly ***

db_df = pd.DataFrame()
for line in data_file:
    line_item_json  = pd.DataFrame([json.loads(line)], columns=json.loads(line).keys())
    db_df = pd.concat([db_df, line_item_json], axis=0)
data_file.close()

import pandas as pd import json from pymongo import MongoClient

def _connect_mongo(): """ A util for making a connection to mongo """ // if username and password: mongo_uri = '' #% (username, password, host, port, db) #add mongo uri conn = MongoClient(mongo_uri) // else: // conn = MongoClient(host, port)

return conn[""] #add db name

def read_mongo(collection, query={}, no_id=True): """ Read from Mongo and Store into DataFrame """

# Connect to MongoDB
db = _connect_mongo()

# Make a query to the specific DB and Collection
cursor = db[collection].find(query)

# Expand the cursor and construct the DataFrame
df =  pd.DataFrame(list(cursor))

# Delete the mongo._id
if no_id:
    del df['_id']

return df

store_df.reset_index(level=0, inplace=True) xx = store_df.to_json(orient='records') with open('./mongo/xx.json', 'w') as f: f.write(xx)

mongoimport --db db_name --collection c_name --file xx.json --jsonArray // Note: jsonArray is always passed as an option and not a value for the --files options.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment