Skip to content

Instantly share code, notes, and snippets.

@mariogeiger
Created January 29, 2023 19:21
Show Gist options
  • Save mariogeiger/c0d4fb3527c5876543e66793c612ec93 to your computer and use it in GitHub Desktop.
Save mariogeiger/c0d4fb3527c5876543e66793c612ec93 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import os\n",
"import zipfile\n",
"from functools import cache\n",
"from typing import List\n",
"from urllib.request import urlopen\n",
"\n",
"from ase.atoms import Atoms\n",
"\n",
"\n",
"def download_url(url: str, root: str) -> str:\n",
" \"\"\"Download if file does not exist in root already. Returns path to file.\"\"\"\n",
" filename = url.rpartition(\"/\")[2]\n",
" file_path = os.path.join(root, filename)\n",
"\n",
" try:\n",
" from tqdm import tqdm\n",
"\n",
" progress = True\n",
" except ImportError:\n",
" progress = False\n",
"\n",
" data = urlopen(url)\n",
" chunk_size = 1024\n",
" total_size = int(data.info()[\"Content-Length\"].strip())\n",
"\n",
" if os.path.exists(file_path):\n",
" if os.path.getsize(file_path) == total_size:\n",
" logging.info(f\"Using downloaded and verified file: {file_path}\")\n",
" return file_path\n",
"\n",
" logging.info(f\"Downloading {url} to {file_path}\")\n",
"\n",
" with open(file_path, \"wb\") as f:\n",
" if progress:\n",
" with tqdm(total=total_size) as pbar:\n",
" while True:\n",
" chunk = data.read(chunk_size)\n",
" if not chunk:\n",
" break\n",
" f.write(chunk)\n",
" pbar.update(chunk_size)\n",
" else:\n",
" while True:\n",
" chunk = data.read(chunk_size)\n",
" if not chunk:\n",
" break\n",
" f.write(chunk)\n",
"\n",
" return file_path\n",
"\n",
"\n",
"def extract_zip(path: str, root: str):\n",
" \"\"\"Extract zip if content does not exist in root already.\"\"\"\n",
" logging.info(f\"Extracting {path} to {root}...\")\n",
" with zipfile.ZipFile(path, \"r\") as f:\n",
" for name in f.namelist():\n",
" if name.endswith(\"/\"):\n",
" logging.info(f\"Skip directory {name}\")\n",
" continue\n",
" out_path = os.path.join(root, name)\n",
" file_size = f.getinfo(name).file_size\n",
" if os.path.exists(out_path) and os.path.getsize(out_path) == file_size:\n",
" logging.info(f\"Skip existing file {name}\")\n",
" continue\n",
" logging.info(f\"Extracting {name} to {root}...\")\n",
" f.extract(name, root)\n",
"\n",
"\n",
"def read_sdf(f):\n",
" while True:\n",
" name = f.readline()\n",
" if not name:\n",
" break\n",
"\n",
" f.readline()\n",
" f.readline()\n",
"\n",
" L1 = f.readline().split()\n",
" try:\n",
" natoms = int(L1[0])\n",
" except IndexError:\n",
" print(L1)\n",
" break\n",
"\n",
" positions = []\n",
" symbols = []\n",
" for _ in range(natoms):\n",
" line = f.readline()\n",
" x, y, z, symbol = line.split()[:4]\n",
" symbols.append(symbol)\n",
" positions.append([float(x), float(y), float(z)])\n",
"\n",
" yield Atoms(symbols=symbols, positions=positions)\n",
"\n",
" while True:\n",
" line = f.readline()\n",
" if line.startswith(\"$$$$\"):\n",
" break\n",
"\n",
"\n",
"@cache\n",
"def load_qm9(root: str) -> List[Atoms]:\n",
" raw_url = \"https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/molnet_publish/qm9.zip\"\n",
"\n",
" if not os.path.exists(root):\n",
" os.makedirs(root)\n",
"\n",
" path = download_url(raw_url, root)\n",
" extract_zip(path, root)\n",
"\n",
" with open(os.path.join(root, \"gdb9.sdf\")) as f:\n",
" return list(read_sdf(f))\n"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"66814 molecules with 4 kinds of atoms: O, H, C, N\n",
"45770 molecules with 3 kinds of atoms: O, H, C\n",
"14198 molecules with 3 kinds of atoms: H, C, N\n",
"4907 molecules with 2 kinds of atoms: H, C\n",
"1062 molecules with 5 kinds of atoms: H, C, N, O, F\n",
"736 molecules with 4 kinds of atoms: F, H, C, N\n",
"250 molecules with 4 kinds of atoms: O, F, C, H\n",
"96 molecules with 3 kinds of atoms: F, C, H\n",
"26 molecules with 3 kinds of atoms: O, C, N\n",
"14 molecules with 4 kinds of atoms: O, F, C, N\n",
"4 molecules with 2 kinds of atoms: C, N\n",
"3 molecules with 3 kinds of atoms: F, C, N\n",
"2 molecules with 2 kinds of atoms: F, C\n",
"1 molecules with 2 kinds of atoms: H, N\n",
"1 molecules with 2 kinds of atoms: O, H\n",
"1 molecules with 2 kinds of atoms: O, N\n"
]
}
],
"source": [
"import numpy as np\n",
"from collections import defaultdict\n",
"import ase.data\n",
"\n",
"kinds = defaultdict(list)\n",
"\n",
"for mol in load_qm9(\"data\"):\n",
" compounds = frozenset(mol.numbers)\n",
" kinds[compounds].append(mol)\n",
"\n",
"for compounds, mols in sorted(kinds.items(), key=lambda x: len(x[1]), reverse=True):\n",
" symbols = \", \".join(ase.data.chemical_symbols[z] for z in compounds)\n",
" print(f\"{len(mols)} molecules with {len(compounds)} kinds of atoms: {symbols}\")"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<html>\n",
"\n",
" <head>\n",
"\n",
" <title>ASE atomic visualization</title>\n",
"\n",
" <link rel=\"stylesheet\" type=\"text/css\"\n",
"\n",
" href=\"https://www.x3dom.org/x3dom/release/x3dom.css\">\n",
"\n",
" </link>\n",
"\n",
" <script type=\"text/javascript\"\n",
"\n",
" src=\"https://www.x3dom.org/x3dom/release/x3dom.js\">\n",
"\n",
" </script>\n",
"\n",
" </head>\n",
"\n",
" <body>\n",
"\n",
" <X3D>\n",
"\n",
" <Scene>\n",
"\n",
" <Transform translation=\"-0.02 1.53 0.01\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"0.565 0.565 0.565\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.76\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" <Transform translation=\"0.00 -0.00 0.00\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"0.565 0.565 0.565\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.76\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" <Transform translation=\"0.99 1.94 0.00\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.31\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" <Transform translation=\"-0.54 1.92 -0.87\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.31\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" <Transform translation=\"-0.53 1.91 0.90\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.31\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" <Transform translation=\"0.53 -0.40 0.88\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.31\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" <Transform translation=\"-1.01 -0.42 0.01\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.31\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" <Transform translation=\"0.51 -0.39 -0.89\">\n",
"\n",
" <Shape>\n",
"\n",
" <Appearance>\n",
"\n",
" <Material diffuseColor=\"1.000 1.000 1.000\" specularColor=\"0.5 0.5 0.5\">\n",
"\n",
" </Material>\n",
"\n",
" </Appearance>\n",
"\n",
" <Sphere radius=\"0.31\">\n",
"\n",
" </Sphere>\n",
"\n",
" </Shape>\n",
"\n",
" </Transform>\n",
"\n",
" </Scene>\n",
"\n",
" </X3D>\n",
"\n",
" </body>\n",
"\n",
"</html>\n",
"\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from ase.visualize import view\n",
"\n",
"view(kinds[frozenset([1, 6])][2], viewer=\"x3d\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "f26faf9d33dc8b83cd077f62f5d9010e5bc51611e479f12b96223e2da63ba699"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment