Skip to content

Instantly share code, notes, and snippets.

@mhoffman
Created July 27, 2018 00:34
Show Gist options
  • Save mhoffman/59875caaabb1d63647055863bcb9ab22 to your computer and use it in GitHub Desktop.
Save mhoffman/59875caaabb1d63647055863bcb9ab22 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pprint\n",
"import sys\n",
"import string\n",
"import json\n",
"import io\n",
"import copy\n",
"import collections\n",
"\n",
"import ase.io\n",
"import ase.calculators.singlepoint\n",
"\n",
"GRAPHQL = 'http://api.catalysis-hub.org/graphql'\n",
"\n",
"def fetch(query):\n",
" return requests.get(\n",
" GRAPHQL, {'query': query}\n",
" ).json()['data']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True YXJyYXljb25uZWN0aW9uOjk5 100 250\n",
"True YXJyYXljb25uZWN0aW9uOjE5OQ== 200 250\n",
"False YXJyYXljb25uZWN0aW9uOjI0OQ== 300 250\n",
"CPU times: user 138 ms, sys: 59.2 ms, total: 198 ms\n",
"Wall time: 40.6 s\n"
]
}
],
"source": [
"%%time\n",
"def reactions_from_dataset(pub_id, page_size=10):\n",
" reactions = []\n",
" has_next_page = True\n",
" start_cursor = ''\n",
" page = 0\n",
" while has_next_page:\n",
" data = fetch(\"\"\"{{\n",
" reactions(pubId: \"{pub_id}\", first: {page_size}, after: \"{start_cursor}\") {{\n",
" totalCount\n",
" pageInfo {{\n",
" hasNextPage\n",
" hasPreviousPage\n",
" startCursor\n",
" endCursor \n",
" }} \n",
" edges {{\n",
" node {{\n",
" Equation\n",
" chemicalComposition\n",
" reactionEnergy\n",
" reactants\n",
" products\n",
" systems {{\n",
" energy\n",
" Formula\n",
" Cifdata\n",
" }} \n",
" }}\n",
" }}\n",
" }}\n",
"}}\"\"\".format(start_cursor=start_cursor,\n",
" page_size=page_size,\n",
" pub_id=pub_id,\n",
" ))\n",
" has_next_page = data['reactions']['pageInfo']['hasNextPage']\n",
" start_cursor = data['reactions']['pageInfo']['endCursor']\n",
" page += 1\n",
" print(has_next_page, start_cursor, page_size * page, data['reactions']['totalCount'])\n",
" reactions.extend(map(lambda x: x['node'], data['reactions']['edges']))\n",
"\n",
" return reactions\n",
"\n",
"raw_reactions = reactions_from_dataset(\"BajdichWO32018\", page_size=100)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 11 µs, sys: 1e+03 ns, total: 12 µs\n",
"Wall time: 16.7 µs\n"
]
}
],
"source": [
"%%time\n",
"def aseify(raw_reactions):\n",
" reactions = []\n",
" for raw_reaction in raw_reactions:\n",
" reaction = copy.deepcopy(raw_reaction)\n",
" reaction['reactants'] = json.loads(reaction['reactants'])\n",
" reaction['products'] = json.loads(reaction['products'])\n",
" composition = ''.join(sorted(ase.atoms.string2symbols(reaction['chemicalComposition'])))\n",
" c_count = collections.Counter(composition)\n",
"\n",
" gp_molecules = []\n",
" for formula in list(reaction['reactants'].keys()) + list(reaction['products'].keys()):\n",
" if formula.endswith('gas'):\n",
" gp_molecules.append(formula.split('gas')[0])\n",
" reaction['gp_molecules'] = gp_molecules\n",
" systems = {}\n",
" for system in reaction['systems']:\n",
" with io.StringIO() as tmp_file:\n",
" tmp_file.write(system['Cifdata'])\n",
" tmp_file.seek(0)\n",
" atoms = ase.io.read(tmp_file, format='cif')\n",
" calculator = ase.calculators.singlepoint.SinglePointCalculator(\n",
" atoms,\n",
" energy=system['energy']\n",
" )\n",
" atoms.set_calculator(calculator)\n",
" if system['Formula'] in reaction['gp_molecules']:\n",
" systems[system['Formula'] + 'gas'] = atoms\n",
" else:\n",
" formula = ''.join(sorted(ase.atoms.string2symbols(system['Formula'])))\n",
" f_count = collections.Counter(formula)\n",
" systems[''.join((f_count - c_count).elements()) + 'star'] = atoms\n",
" reaction['systems'] = systems\n",
" \n",
" reactions.append(reaction)\n",
" sys.stdout.write('.')\n",
" if len(reactions) % 100 == 0:\n",
" sys.stdout.write('\\n')\n",
" return reactions"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"....................................................................................................\n",
"....................................................................................................\n",
"..................................................CPU times: user 7.35 s, sys: 103 ms, total: 7.45 s\n",
"Wall time: 7.69 s\n"
]
}
],
"source": [
"%%time\n",
"reactions = aseify(raw_reactions)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'H2Ogas': Atoms(symbols='H2O', pbc=True, cell=[14.0, 16.5265, 16.5963], calculator=SinglePointCalculator(...)),\n",
" 'H2gas': Atoms(symbols='H2', pbc=True, cell=[14.0, 15.0, 16.7372], calculator=SinglePointCalculator(...)),\n",
" 'Ostar': Atoms(symbols='CrO49W15', pbc=True, cell=[[7.59442, 0.0, 0.0], [0.0, 7.69143, 0.0], [-0.019838797190834895, 0.0, 27.25849278063859]], calculator=SinglePointCalculator(...)),\n",
" 'star': Atoms(symbols='CrO48W15', pbc=True, cell=[[7.59442, 0.0, 0.0], [0.0, 7.69143, 0.0], [-0.019554517638301183, 0.0, 26.86789288408825]], calculator=SinglePointCalculator(...))}"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reactions[0]['systems']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment