Created
July 27, 2018 01:12
-
-
Save mhoffman/5ea6253170f1a752727b122d34c0ed3a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import pprint\n", | |
"import sys\n", | |
"import string\n", | |
"import json\n", | |
"import io\n", | |
"import copy\n", | |
"import collections\n", | |
"\n", | |
"import ase.io\n", | |
"import ase.calculators.singlepoint\n", | |
"\n", | |
"GRAPHQL = 'http://api.catalysis-hub.org/graphql'\n", | |
"\n", | |
"def fetch(query):\n", | |
" return requests.get(\n", | |
" GRAPHQL, {'query': query}\n", | |
" ).json()['data']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"True YXJyYXljb25uZWN0aW9uOjk5 100 250\n", | |
"True YXJyYXljb25uZWN0aW9uOjE5OQ== 200 250\n", | |
"False YXJyYXljb25uZWN0aW9uOjI0OQ== 300 250\n", | |
"CPU times: user 75.9 ms, sys: 33.2 ms, total: 109 ms\n", | |
"Wall time: 12.6 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"def reactions_from_dataset(pub_id, page_size=10):\n", | |
" reactions = []\n", | |
" has_next_page = True\n", | |
" start_cursor = ''\n", | |
" page = 0\n", | |
" while has_next_page:\n", | |
" data = fetch(\"\"\"{{\n", | |
" reactions(pubId: \"{pub_id}\", first: {page_size}, after: \"{start_cursor}\") {{\n", | |
" totalCount\n", | |
" pageInfo {{\n", | |
" hasNextPage\n", | |
" hasPreviousPage\n", | |
" startCursor\n", | |
" endCursor \n", | |
" }} \n", | |
" edges {{\n", | |
" node {{\n", | |
" Equation\n", | |
" chemicalComposition\n", | |
" reactionEnergy\n", | |
" reactants\n", | |
" products\n", | |
" systems {{\n", | |
" numbers\n", | |
" cell\n", | |
" positions\n", | |
" energy\n", | |
" }} \n", | |
" }}\n", | |
" }}\n", | |
" }}\n", | |
"}}\"\"\".format(start_cursor=start_cursor,\n", | |
" page_size=page_size,\n", | |
" pub_id=pub_id,\n", | |
" ))\n", | |
" has_next_page = data['reactions']['pageInfo']['hasNextPage']\n", | |
" start_cursor = data['reactions']['pageInfo']['endCursor']\n", | |
" page += 1\n", | |
" print(has_next_page, start_cursor, page_size * page, data['reactions']['totalCount'])\n", | |
" reactions.extend(map(lambda x: x['node'], data['reactions']['edges']))\n", | |
"\n", | |
" return reactions\n", | |
"\n", | |
"raw_reactions = reactions_from_dataset(\"BajdichWO32018\", page_size=100)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs\n", | |
"Wall time: 8.82 µs\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"def aseify(raw_reactions):\n", | |
" reactions = []\n", | |
" for raw_reaction in raw_reactions:\n", | |
" reaction = copy.deepcopy(raw_reaction)\n", | |
" reaction['reactants'] = json.loads(reaction['reactants'])\n", | |
" reaction['products'] = json.loads(reaction['products'])\n", | |
" composition = ''.join(sorted(ase.atoms.string2symbols(reaction['chemicalComposition'])))\n", | |
" c_count = collections.Counter(composition)\n", | |
"\n", | |
" gp_molecules = []\n", | |
" for formula in list(reaction['reactants'].keys()) + list(reaction['products'].keys()):\n", | |
" if formula.endswith('gas'):\n", | |
" gp_molecules.append(formula.split('gas')[0])\n", | |
" reaction['gp_molecules'] = gp_molecules\n", | |
" systems = {}\n", | |
" for system in reaction['systems']:\n", | |
" system['Formula'] = ase.utils.formula.formula_hill(\n", | |
" eval(system['numbers'])\n", | |
" )\n", | |
" atoms = ase.atoms.Atoms(\n", | |
" numbers=eval(system['numbers']),\n", | |
" positions=eval(system['positions']),\n", | |
" cell=eval(system['cell']),\n", | |
" )\n", | |
" calculator = ase.calculators.singlepoint.SinglePointCalculator(\n", | |
" atoms,\n", | |
" energy=system['energy']\n", | |
" )\n", | |
" atoms.set_calculator(calculator)\n", | |
" if system['Formula'] in reaction['gp_molecules']:\n", | |
" systems[system['Formula'] + 'gas'] = atoms\n", | |
" else:\n", | |
" formula = ''.join(sorted(ase.atoms.string2symbols(system['Formula'])))\n", | |
" f_count = collections.Counter(formula)\n", | |
" systems[''.join((f_count - c_count).elements()) + 'star'] = atoms\n", | |
" reaction['systems'] = systems\n", | |
" \n", | |
" reactions.append(reaction)\n", | |
" sys.stdout.write('.')\n", | |
" if len(reactions) % 100 == 0:\n", | |
" sys.stdout.write('\\n')\n", | |
" return reactions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"....................................................................................................\n", | |
"....................................................................................................\n", | |
"..................................................CPU times: user 611 ms, sys: 58.3 ms, total: 669 ms\n", | |
"Wall time: 640 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"reactions = aseify(raw_reactions)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'Equation': '2.0H2O(g) - 1.5H2(g) + * -> OOH*',\n", | |
" 'chemicalComposition': 'TiW15O47',\n", | |
" 'gp_molecules': ['H2', 'H2O'],\n", | |
" 'products': {'OOHstar': 1.0},\n", | |
" 'reactants': {'H2Ogas': 2.0, 'H2gas': -1.5, 'star': 1.0},\n", | |
" 'reactionEnergy': 4.58855445,\n", | |
" 'systems': {'H2Ogas': Atoms(symbols='H2O', pbc=False, cell=[14.0, 16.526478, 16.596309], calculator=SinglePointCalculator(...)),\n", | |
" 'H2gas': Atoms(symbols='H2', pbc=False, cell=[14.0, 15.0, 16.737166], calculator=SinglePointCalculator(...)),\n", | |
" 'HOOstar': Atoms(symbols='HO49TiW15', pbc=False, cell=[[7.59442, 0.0, 0.0], [0.0, 7.69143, 0.0], [-0.020334596, 0.0, 27.93972]], calculator=SinglePointCalculator(...)),\n", | |
" 'star': Atoms(symbols='O47TiW15', pbc=False, cell=[[7.59442, 0.0, 0.0], [0.0, 7.69143, 0.0], [-0.019554523, 0.0, 26.8679]], calculator=SinglePointCalculator(...))}}" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"reactions[2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment