Last active
July 16, 2020 13:58
-
-
Save dwinston/59c0a6ae40eb04c7c4a7816943e14941 to your computer and use it in GitHub Desktop.
bulk query of Materials Project data via pymatgen MPRester, and saving to local MongoDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymatgen import MPRester | |
try: | |
from pydash import chunk as get_chunks | |
except ImportError: | |
from math import ceil | |
def get_chunks(array, size=1): | |
chunks = int(ceil(len(array) / float(size))) | |
return [array[i * size:(i + 1) * size] | |
for i in range(chunks)] | |
try: | |
# import `tqdm_notebook` for prettier output in Jupyter Notebook | |
from tqdm import tqdm as PBar | |
except ImportError: | |
class PBar(): | |
def __init__(self, total): | |
self.total = total | |
self.done = 0 | |
self.report() | |
def update(self, amount): | |
self.done += amount | |
self.report() | |
def report(self): | |
print("{} of {} done {:.1%}".format( | |
self.done, self.total, self.done/self.total)) | |
def bulk_query(self, criteria, properties, chunk_size=100, **kwargs): | |
data = [] | |
mids = [d["material_id"] for d in | |
self.query(criteria, ["material_id"])] | |
chunks = get_chunks(mids, size=chunk_size) | |
progress_bar = PBar(total=len(mids)) | |
if not isinstance(criteria, dict): | |
criteria = self.parse_criteria(criteria) | |
for chunk in chunks: | |
chunk_criteria = criteria.copy() | |
chunk_criteria.update({"material_id": {"$in": chunk}}) | |
data.extend(self.query(chunk_criteria, properties, **kwargs)) | |
progress_bar.update(len(chunk)) | |
return data | |
MPRester.bulk_query = bulk_query | |
# Now, instantiate an MPRester object and use `bulk_query` as you would `query`. | |
# | |
# Example usage: | |
mpr = MPRester() | |
# Download strutures for all lithium oxide ternaries (>1000 structures): | |
data = mpr.bulk_query("Li-O-*", ["structure", "material_id"]) | |
# Finally, you can save to a local MongoDB. | |
# Queries to MPRester.query use MongoDB syntax. | |
# However, interacting with MongoDB is not the same as MPRester. | |
# Some example usage follows. Consult the MongoDB docuemntation for more info. | |
# | |
# Example usage: | |
import json | |
from monty.json import MontyDecoder # installed with pymatgen | |
from pymongo import MongoClient | |
client = MongoClient() | |
db = client["whatever_database_name_you_want"] | |
# Re-fetch data here to illustrate that you cannot import e.g. pymatgen Structure | |
# objects directly into MongoDB. | |
data = mpr.bulk_query("Li-O-*", ["structure", "material_id"], mp_decode=False) | |
# Comment out below to *not* clean out your local database's "materials" collection | |
# before inserting new documents. | |
db.materials.delete_many({}) | |
db.materials.insert_many(data) | |
# Now, all data is saved to your disk, accessible anytime without using our API. | |
# Depending on your use case, be sure to periodically query our API for updated data. | |
decoder = MontyDecoder() | |
doc = db.materials.find_one({"material_id": "mp-1020014"}) | |
print(type(doc["structure"])) # <class 'dict'> | |
doc = decoder.process_decoded(doc) | |
print(type(doc["structure"])) # <class 'pymatgen.core.structure.Structure'> | |
docs = list(db.materials.find( | |
{"material_id": {"$in": ["mp-1020014", "mp-1098011"]}}, | |
["structure"])) |
cyim059
commented
Jul 16, 2020
via email
Thank you.
…On Wed, Jul 15, 2020 at 6:01 PM Donny Winston ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
See these
<https://matsci.org/t/accessing-battery-database-through-pymatgen/295>
posts
<https://matsci.org/t/accessing-all-batteries-using-materials-project-api-or-all-possible-battids/399/3>
for a start. More discussion on the matsci.org
<https://matsci.org/search?q=battery> forum.
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://gist.github.com/59c0a6ae40eb04c7c4a7816943e14941#gistcomment-3378622>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/APVEHBLWYICYDRXGR5RPTLTR3YRMHANCNFSM4ILPPRPA>
.
--
Chae-Ho Yim, EIT, M.A.Sc.
Email: cyim059@gmail.com
LinkedIn: http://ca.linkedin.com/in/cyim059
Tel. +1 613 863 0990
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment