Skip to content

Instantly share code, notes, and snippets.

@regispires
Last active April 13, 2020 23:05
Show Gist options
  • Save regispires/0ba6b8483f229ebcf400be1da781e766 to your computer and use it in GitHub Desktop.
Save regispires/0ba6b8483f229ebcf400be1da781e766 to your computer and use it in GitHub Desktop.
Easily connect Pandas and MongoDB in Python
# for security reasons this file must be added to your .gitignore file
MONGO_HOST=<my_host_name_or_address>
MONGO_PORT=27017
MONGO_DB=<my_database>
MONGO_USER=<my_user>
MONGO_PASSWORD=<my_password>
# You may connect to mongodb and do all operations using only the following 2 functions:
# - get_collection and read_mongo
import pandas as pd
from pymongo import MongoClient
import settings
def _connect_mongo(host=settings.MONGO_HOST, port=settings.MONGO_PORT,
username=settings.MONGO_USER, password=settings.MONGO_PASSWORD, db=settings.MONGO_DB):
""" A util for making a connection to mongo """
if username and password:
mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)
conn = MongoClient(mongo_uri)
else:
conn = MongoClient(host, port)
return conn[db]
def get_collection(collection, host=settings.MONGO_HOST, port=settings.MONGO_PORT,
username=settings.MONGO_USER, password=settings.MONGO_PASSWORD, db=settings.MONGO_DB):
db = _connect_mongo(host=host, port=port, username=username, password=password, db=db)
return db[collection]
def read_mongo(collection, filter_={}, projection={}, host=settings.MONGO_HOST, port=settings.MONGO_PORT,
username=settings.MONGO_USER, password=settings.MONGO_PASSWORD, db=settings.MONGO_DB, no_id=True):
""" Read from Mongo and Store into DataFrame """
my_collection = get_collection(collection, host=host, port=port, username=username, password=password,
db=settings.MONGO_DB)
# Make a query to the specific DB and Collection
cursor = my_collection.find(filter_, projection)
# Expand the cursor and construct the DataFrame
df = pd.DataFrame(list(cursor))
# Delete the _id
if no_id:
del df['_id']
if '__v' in df:
del df['__v']
return df
...
my_collection = get_collection('my_collection')
dict_ = my_df.to_dict(orient='records')
result = my_collection.insert_many(dict_)
...
# dependency: module python-dotenv
# variables must be stored in .env file
# more information in: https://pypi.org/project/python-dotenv/
from dotenv import load_dotenv
import os
load_dotenv()
MONGO_HOST = os.getenv("MONGO_HOST")
MONGO_PORT = os.getenv("MONGO_PORT")
MONGO_DB = os.getenv("MONGO_DB")
MONGO_USER = os.getenv("MONGO_USER")
MONGO_PASSWORD = os.getenv("MONGO_PASSWORD")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment