Created
July 27, 2018 00:34
-
-
Save mhoffman/59875caaabb1d63647055863bcb9ab22 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import pprint\n", | |
"import sys\n", | |
"import string\n", | |
"import json\n", | |
"import io\n", | |
"import copy\n", | |
"import collections\n", | |
"\n", | |
"import ase.io\n", | |
"import ase.calculators.singlepoint\n", | |
"\n", | |
"GRAPHQL = 'http://api.catalysis-hub.org/graphql'\n", | |
"\n", | |
"def fetch(query):\n", | |
" return requests.get(\n", | |
" GRAPHQL, {'query': query}\n", | |
" ).json()['data']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"True YXJyYXljb25uZWN0aW9uOjk5 100 250\n", | |
"True YXJyYXljb25uZWN0aW9uOjE5OQ== 200 250\n", | |
"False YXJyYXljb25uZWN0aW9uOjI0OQ== 300 250\n", | |
"CPU times: user 138 ms, sys: 59.2 ms, total: 198 ms\n", | |
"Wall time: 40.6 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"def reactions_from_dataset(pub_id, page_size=10):\n", | |
" reactions = []\n", | |
" has_next_page = True\n", | |
" start_cursor = ''\n", | |
" page = 0\n", | |
" while has_next_page:\n", | |
" data = fetch(\"\"\"{{\n", | |
" reactions(pubId: \"{pub_id}\", first: {page_size}, after: \"{start_cursor}\") {{\n", | |
" totalCount\n", | |
" pageInfo {{\n", | |
" hasNextPage\n", | |
" hasPreviousPage\n", | |
" startCursor\n", | |
" endCursor \n", | |
" }} \n", | |
" edges {{\n", | |
" node {{\n", | |
" Equation\n", | |
" chemicalComposition\n", | |
" reactionEnergy\n", | |
" reactants\n", | |
" products\n", | |
" systems {{\n", | |
" energy\n", | |
" Formula\n", | |
" Cifdata\n", | |
" }} \n", | |
" }}\n", | |
" }}\n", | |
" }}\n", | |
"}}\"\"\".format(start_cursor=start_cursor,\n", | |
" page_size=page_size,\n", | |
" pub_id=pub_id,\n", | |
" ))\n", | |
" has_next_page = data['reactions']['pageInfo']['hasNextPage']\n", | |
" start_cursor = data['reactions']['pageInfo']['endCursor']\n", | |
" page += 1\n", | |
" print(has_next_page, start_cursor, page_size * page, data['reactions']['totalCount'])\n", | |
" reactions.extend(map(lambda x: x['node'], data['reactions']['edges']))\n", | |
"\n", | |
" return reactions\n", | |
"\n", | |
"raw_reactions = reactions_from_dataset(\"BajdichWO32018\", page_size=100)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 11 µs, sys: 1e+03 ns, total: 12 µs\n", | |
"Wall time: 16.7 µs\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"def aseify(raw_reactions):\n", | |
" reactions = []\n", | |
" for raw_reaction in raw_reactions:\n", | |
" reaction = copy.deepcopy(raw_reaction)\n", | |
" reaction['reactants'] = json.loads(reaction['reactants'])\n", | |
" reaction['products'] = json.loads(reaction['products'])\n", | |
" composition = ''.join(sorted(ase.atoms.string2symbols(reaction['chemicalComposition'])))\n", | |
" c_count = collections.Counter(composition)\n", | |
"\n", | |
" gp_molecules = []\n", | |
" for formula in list(reaction['reactants'].keys()) + list(reaction['products'].keys()):\n", | |
" if formula.endswith('gas'):\n", | |
" gp_molecules.append(formula.split('gas')[0])\n", | |
" reaction['gp_molecules'] = gp_molecules\n", | |
" systems = {}\n", | |
" for system in reaction['systems']:\n", | |
" with io.StringIO() as tmp_file:\n", | |
" tmp_file.write(system['Cifdata'])\n", | |
" tmp_file.seek(0)\n", | |
" atoms = ase.io.read(tmp_file, format='cif')\n", | |
" calculator = ase.calculators.singlepoint.SinglePointCalculator(\n", | |
" atoms,\n", | |
" energy=system['energy']\n", | |
" )\n", | |
" atoms.set_calculator(calculator)\n", | |
" if system['Formula'] in reaction['gp_molecules']:\n", | |
" systems[system['Formula'] + 'gas'] = atoms\n", | |
" else:\n", | |
" formula = ''.join(sorted(ase.atoms.string2symbols(system['Formula'])))\n", | |
" f_count = collections.Counter(formula)\n", | |
" systems[''.join((f_count - c_count).elements()) + 'star'] = atoms\n", | |
" reaction['systems'] = systems\n", | |
" \n", | |
" reactions.append(reaction)\n", | |
" sys.stdout.write('.')\n", | |
" if len(reactions) % 100 == 0:\n", | |
" sys.stdout.write('\\n')\n", | |
" return reactions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"....................................................................................................\n", | |
"....................................................................................................\n", | |
"..................................................CPU times: user 7.35 s, sys: 103 ms, total: 7.45 s\n", | |
"Wall time: 7.69 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"reactions = aseify(raw_reactions)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'H2Ogas': Atoms(symbols='H2O', pbc=True, cell=[14.0, 16.5265, 16.5963], calculator=SinglePointCalculator(...)),\n", | |
" 'H2gas': Atoms(symbols='H2', pbc=True, cell=[14.0, 15.0, 16.7372], calculator=SinglePointCalculator(...)),\n", | |
" 'Ostar': Atoms(symbols='CrO49W15', pbc=True, cell=[[7.59442, 0.0, 0.0], [0.0, 7.69143, 0.0], [-0.019838797190834895, 0.0, 27.25849278063859]], calculator=SinglePointCalculator(...)),\n", | |
" 'star': Atoms(symbols='CrO48W15', pbc=True, cell=[[7.59442, 0.0, 0.0], [0.0, 7.69143, 0.0], [-0.019554517638301183, 0.0, 26.86789288408825]], calculator=SinglePointCalculator(...))}" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"reactions[0]['systems']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment