Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save yuxuanzhuang/7b4c92e5021ed2485c63c8b044a28cb3 to your computer and use it in GitHub Desktop.
Save yuxuanzhuang/7b4c92e5021ed2485c63c8b044a28cb3 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import io\n",
"import os\n",
"import shlex\n",
"import timeit\n",
"import MDAnalysis as mda\n",
"from MDAnalysis.core.universe import Merge\n",
"import pickle "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import multiprocessing"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from functools import partial"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Large Trajecory Generation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"u = mda.Universe('test_multi.pdb')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"u_larger = Merge(u.atoms, u.atoms, u.atoms, u.atoms)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/PDB.py:718: UserWarning: Unit cell dimensions not found. CRYST1 record set to unitary values.\n",
" warnings.warn(\"Unit cell dimensions not found. \"\n"
]
}
],
"source": [
"u_larger.atoms.write('test_larger.pdb')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with mda.Writer(\"test_larger_longer.pdb\", n_atoms=u_larger.trajectory.n_atoms) as W:\n",
" for repeat in range(500):\n",
" W.write(u_larger.atoms)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with mda.Writer(\"test_larger_longer.xyz\", n_atoms=u_larger.trajectory.n_atoms) as W:\n",
" for repeat in range(500):\n",
" W.write(u_larger.atoms)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Serialize IO"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### PDB 7GB\n",
"- 190725 atoms\n",
"- 500 frames"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"testfile = 'test_larger_longer.pdb'\n",
"def create_universe(testfile):\n",
" u = mda.Universe(testfile)\n",
"\n",
" \n",
"u = mda.Universe(testfile)\n",
"def pickle_time(u):\n",
" u_p = pickle.loads(pickle.dumps(u.trajectory))\n",
"\n",
" \n",
"def cog(u, ag, ts):\n",
" u.trajectory[ts]\n",
" return ag.center_of_geometry()\n",
"\n",
"def analysis_through_traj(u):\n",
" result = []\n",
" n_frames = u.trajectory.n_frames\n",
" ag = u.select_atoms('protein')\n",
" for ts in range(n_frames):\n",
" result.append(cog(u, ag, ts))\n",
" return result\n",
" \n",
"result_serilize = timeit.timeit(\"create_universe(testfile)\", globals=globals(), number=10)\n",
"result_pickle = timeit.timeit(\"pickle_time(u)\", globals=globals(), number=10)\n",
"result_iter = timeit.timeit(\"analysis_through_traj(u)\", globals=globals(), number=10)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Create Universe: 705.544\n",
"Pickle/Unpickle Trajectory: 0.020\n",
"Analysis along traj: 1664.560\n"
]
}
],
"source": [
"print(f\"Create Universe: {result_serilize:.3f}\")\n",
"print(f\"Pickle/Unpickle Trajectory: {result_pickle:.3f}\")\n",
"print(f\"Analysis along traj: {result_iter:.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### XYZ 3.8GB\n",
"- 190725 atoms\n",
"- 500 frames"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"testfile = 'test_larger_longer.xyz'\n",
"def create_universe(testfile):\n",
" u = mda.Universe(testfile)\n",
"\n",
" \n",
"u = mda.Universe(testfile)\n",
"def pickle_time(u):\n",
" u_p = pickle.loads(pickle.dumps(u.trajectory))\n",
"\n",
" \n",
"def cog(u, ag, ts):\n",
" u.trajectory[ts]\n",
" return ag.center_of_geometry()\n",
"\n",
"def analysis_through_traj(u):\n",
" result = []\n",
" n_frames = u.trajectory.n_frames\n",
" ag = u.atoms\n",
" for ts in range(n_frames):\n",
" result.append(cog(u, ag, ts))\n",
" return result\n",
" \n",
"result_serilize = timeit.timeit(\"create_universe(testfile)\", globals=globals(), number=10)\n",
"result_pickle = timeit.timeit(\"pickle_time(u)\", globals=globals(), number=10)\n",
"result_iter = timeit.timeit(\"analysis_through_traj(u)\", globals=globals(), number=10)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In #PR 2723, XYZ file\n",
"Create Universe: 10.063\n",
"Pickle/Unpickle Trajectory: 0.005\n",
"Analysis along traj: 1141.099\n"
]
}
],
"source": [
"print('In #PR 2723, XYZ file')\n",
"print(f\"Create Universe: {result_serilize:.3f}\")\n",
"print(f\"Pickle/Unpickle Trajectory: {result_pickle:.3f}\")\n",
"print(f\"Analysis along traj: {result_iter:.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Serialize PR #2703"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### PDB 7GB\n",
"- 190725 atoms\n",
"- 500 frames"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"testfile = 'test_larger_longer.pdb'\n",
"def create_universe(testfile):\n",
" u = mda.Universe(testfile)\n",
"\n",
" \n",
"u = mda.Universe(testfile)\n",
"def pickle_time(u):\n",
" u_p = pickle.loads(pickle.dumps(u.trajectory))\n",
"\n",
" \n",
"def cog(u, ag, ts):\n",
" u.trajectory[ts]\n",
" return ag.center_of_geometry()\n",
"\n",
"def analysis_through_traj(u):\n",
" result = []\n",
" n_frames = u.trajectory.n_frames\n",
" ag = u.select_atoms('protein')\n",
" for ts in range(n_frames):\n",
" result.append(cog(u, ag, ts))\n",
" return result\n",
" \n",
"result_serilize = timeit.timeit(\"create_universe(testfile)\", globals=globals(), number=10)\n",
"result_pickle = timeit.timeit(\"pickle_time(u)\", globals=globals(), number=10)\n",
"result_iter = timeit.timeit(\"analysis_through_traj(u)\", globals=globals(), number=10)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In #PR 2704, PDB file\n",
"Create Universe: 619.693\n",
"Pickle/Unpickle Trajectory: 0.014\n",
"Analysis along traj: 1556.207\n"
]
}
],
"source": [
"print('In #PR 2704, PDB file')\n",
"print(f\"Create Universe: {result_serilize:.3f}\")\n",
"print(f\"Pickle/Unpickle Trajectory: {result_pickle:.3f}\")\n",
"print(f\"Analysis along traj: {result_iter:.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### XYZ 3.8GB\n",
"- 190725 atoms\n",
"- 500 frames"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"testfile = 'test_larger_longer.xyz'\n",
"def create_universe(testfile):\n",
" u = mda.Universe(testfile)\n",
"\n",
" \n",
"u = mda.Universe(testfile)\n",
"def pickle_time(u):\n",
" u_p = pickle.loads(pickle.dumps(u.trajectory))\n",
"\n",
" \n",
"def cog(u, ag, ts):\n",
" u.trajectory[ts]\n",
" return ag.center_of_geometry()\n",
"\n",
"def analysis_through_traj(u):\n",
" result = []\n",
" n_frames = u.trajectory.n_frames\n",
" ag = u.atoms\n",
" for ts in range(n_frames):\n",
" result.append(cog(u, ag, ts))\n",
" return result\n",
" \n",
"result_serilize = timeit.timeit(\"create_universe(testfile)\", globals=globals(), number=10)\n",
"result_pickle = timeit.timeit(\"pickle_time(u)\", globals=globals(), number=10)\n",
"result_iter = timeit.timeit(\"analysis_through_traj(u)\", globals=globals(), number=10)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In #PR 2704, XYZ file\n",
"Create Universe: 9.322\n",
"Pickle/Unpickle Trajectory: 0.007\n",
"Analysis along traj: 1120.986\n"
]
}
],
"source": [
"print('In #PR 2704, XYZ file')\n",
"print(f\"Create Universe: {result_serilize:.3f}\")\n",
"print(f\"Pickle/Unpickle Trajectory: {result_pickle:.3f}\")\n",
"print(f\"Analysis along traj: {result_iter:.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "mda_py38",
"language": "python",
"name": "gsoc"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment