Skip to content

Instantly share code, notes, and snippets.

@yuxuanzhuang
Last active October 14, 2020 16:15
Show Gist options
  • Save yuxuanzhuang/4918cd1b5d8d62de79eab9df40de4bb7 to your computer and use it in GitHub Desktop.
Save yuxuanzhuang/4918cd1b5d8d62de79eab9df40de4bb7 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import MDAnalysis as mda\n",
"import MDAnalysisTests.datafiles as data\n",
"\n",
"import itertools\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import timeit\n",
"%matplotlib inline\n",
"\n",
"from multiprocessing import cpu_count\n",
"n_jobs = cpu_count()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from threadpoolctl import threadpool_limits "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import time"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"%load_ext line_profiler"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"files = {'PSF': data.PSF,\n",
" 'DCD': data.DCD,\n",
" 'PDB': 'Trajectory/YiiP_system.pdb',\n",
" 'LONG_TRAJ': 'Trajectory/YiiP_system_90ns_center.xtc',\n",
" 'SHORT_TRAJ':'Trajectory/YiiP_system_9ns_center.xtc'}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"File list:\n",
"- Yiip_system.pdb\n",
" - 111815 atoms\n",
" - 8.5 MB\n",
"- Yiip_system_9(0)_ns_center.xtc\n",
" - 111815 atoms\n",
" - 900/9000 frames\n",
" - 356/3560 MB\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from MDAnalysis.analysis.rms import RMSD as serial_rmsd"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from MDAnalysis import transformations as trans"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/scottzhuang/mdanalysis/package/MDAnalysis/topology/guessers.py:146: UserWarning: Failed to guess the mass for the following atom types: D\n",
" warnings.warn(\"Failed to guess the mass for the following atom types: {}\".format(atom_type))\n",
"/home/scottzhuang/mdanalysis/package/MDAnalysis/topology/guessers.py:146: UserWarning: Failed to guess the mass for the following atom types: Z\n",
" warnings.warn(\"Failed to guess the mass for the following atom types: {}\".format(atom_type))\n",
"/home/scottzhuang/mdanalysis/package/MDAnalysis/topology/PDBParser.py:330: UserWarning: Invalid elements found in the PDB file, elements attributes will not be populated.\n",
" warnings.warn(\"Invalid elements found in the PDB file, \"\n"
]
}
],
"source": [
"u = mda.Universe(files['PDB'], files['SHORT_TRAJ'])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def visiting_positions(u, n_thread):\n",
" with threadpool_limits(n_thread):\n",
" for ts in u.trajectory:\n",
" _ = ts.positions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visiting Performance"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 4.75398 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/base.py\n",
"Function: next at line 1457\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 1457 def next(self):\n",
" 1458 \"\"\"Forward one step to next frame.\"\"\"\n",
" 1459 903 378.0 0.4 0.0 try:\n",
" 1460 903 4750500.0 5260.8 99.9 ts = self._read_next_timestep()\n",
" 1461 1 0.0 0.0 0.0 except (EOFError, IOError):\n",
" 1462 1 40.0 40.0 0.0 self.rewind()\n",
" 1463 1 1.0 1.0 0.0 raise StopIteration from None\n",
" 1464 else:\n",
" 1465 902 1553.0 1.7 0.0 for auxname in self.aux_list:\n",
" 1466 ts = self._auxs[auxname].update_ts(ts)\n",
" 1467 \n",
" 1468 902 1189.0 1.3 0.0 ts = self._apply_transformations(ts)\n",
" 1469 \n",
" 1470 902 322.0 0.4 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader.next visiting_positions(u, 12)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 4.75201 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/base.py\n",
"Function: next at line 1457\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 1457 def next(self):\n",
" 1458 \"\"\"Forward one step to next frame.\"\"\"\n",
" 1459 903 341.0 0.4 0.0 try:\n",
" 1460 903 4748669.0 5258.8 99.9 ts = self._read_next_timestep()\n",
" 1461 1 1.0 1.0 0.0 except (EOFError, IOError):\n",
" 1462 1 37.0 37.0 0.0 self.rewind()\n",
" 1463 1 2.0 2.0 0.0 raise StopIteration from None\n",
" 1464 else:\n",
" 1465 902 1479.0 1.6 0.0 for auxname in self.aux_list:\n",
" 1466 ts = self._auxs[auxname].update_ts(ts)\n",
" 1467 \n",
" 1468 902 1184.0 1.3 0.0 ts = self._apply_transformations(ts)\n",
" 1469 \n",
" 1470 902 296.0 0.3 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader.next visiting_positions(u, 6)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 4.76074 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/base.py\n",
"Function: next at line 1457\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 1457 def next(self):\n",
" 1458 \"\"\"Forward one step to next frame.\"\"\"\n",
" 1459 903 352.0 0.4 0.0 try:\n",
" 1460 903 4757272.0 5268.3 99.9 ts = self._read_next_timestep()\n",
" 1461 1 1.0 1.0 0.0 except (EOFError, IOError):\n",
" 1462 1 43.0 43.0 0.0 self.rewind()\n",
" 1463 1 1.0 1.0 0.0 raise StopIteration from None\n",
" 1464 else:\n",
" 1465 902 1537.0 1.7 0.0 for auxname in self.aux_list:\n",
" 1466 ts = self._auxs[auxname].update_ts(ts)\n",
" 1467 \n",
" 1468 902 1234.0 1.4 0.0 ts = self._apply_transformations(ts)\n",
" 1469 \n",
" 1470 902 298.0 0.3 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader.next visiting_positions(u, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding Transformation"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"fit_trans = trans.fit_rot_trans(u.atoms, u.atoms)\n",
"\n",
"u.trajectory.add_transformations(fit_trans)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visiting Performance"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 21.0675 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/base.py\n",
"Function: next at line 1457\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 1457 def next(self):\n",
" 1458 \"\"\"Forward one step to next frame.\"\"\"\n",
" 1459 903 794.0 0.9 0.0 try:\n",
" 1460 903 7842536.0 8685.0 37.2 ts = self._read_next_timestep()\n",
" 1461 1 2.0 2.0 0.0 except (EOFError, IOError):\n",
" 1462 1 64.0 64.0 0.0 self.rewind()\n",
" 1463 1 4.0 4.0 0.0 raise StopIteration from None\n",
" 1464 else:\n",
" 1465 902 3436.0 3.8 0.0 for auxname in self.aux_list:\n",
" 1466 ts = self._auxs[auxname].update_ts(ts)\n",
" 1467 \n",
" 1468 902 13218977.0 14655.2 62.7 ts = self._apply_transformations(ts)\n",
" 1469 \n",
" 1470 902 1656.0 1.8 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader.next visiting_positions(u, 12)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 12.7347 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/base.py\n",
"Function: next at line 1457\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 1457 def next(self):\n",
" 1458 \"\"\"Forward one step to next frame.\"\"\"\n",
" 1459 903 498.0 0.6 0.0 try:\n",
" 1460 903 4979787.0 5514.7 39.1 ts = self._read_next_timestep()\n",
" 1461 1 1.0 1.0 0.0 except (EOFError, IOError):\n",
" 1462 1 51.0 51.0 0.0 self.rewind()\n",
" 1463 1 3.0 3.0 0.0 raise StopIteration from None\n",
" 1464 else:\n",
" 1465 902 2475.0 2.7 0.0 for auxname in self.aux_list:\n",
" 1466 ts = self._auxs[auxname].update_ts(ts)\n",
" 1467 \n",
" 1468 902 7750723.0 8592.8 60.9 ts = self._apply_transformations(ts)\n",
" 1469 \n",
" 1470 902 1199.0 1.3 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader.next visiting_positions(u, 6)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 13.085 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/base.py\n",
"Function: next at line 1457\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 1457 def next(self):\n",
" 1458 \"\"\"Forward one step to next frame.\"\"\"\n",
" 1459 903 507.0 0.6 0.0 try:\n",
" 1460 903 4951537.0 5483.4 37.8 ts = self._read_next_timestep()\n",
" 1461 1 1.0 1.0 0.0 except (EOFError, IOError):\n",
" 1462 1 49.0 49.0 0.0 self.rewind()\n",
" 1463 1 3.0 3.0 0.0 raise StopIteration from None\n",
" 1464 else:\n",
" 1465 902 2356.0 2.6 0.0 for auxname in self.aux_list:\n",
" 1466 ts = self._auxs[auxname].update_ts(ts)\n",
" 1467 \n",
" 1468 902 8129421.0 9012.7 62.1 ts = self._apply_transformations(ts)\n",
" 1469 \n",
" 1470 902 1168.0 1.3 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader.next visiting_positions(u, 1)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 7.91996 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/XDR.py\n",
"Function: _read_next_timestep at line 264\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 264 def _read_next_timestep(self, ts=None):\n",
" 265 \"\"\"copy next frame into timestep\"\"\"\n",
" 266 903 5772.0 6.4 0.1 if self._frame == self.n_frames - 1:\n",
" 267 1 5.0 5.0 0.0 raise IOError(errno.EIO, 'trying to go over trajectory limit')\n",
" 268 902 877.0 1.0 0.0 if ts is None:\n",
" 269 902 760.0 0.8 0.0 ts = self.ts\n",
" 270 902 7468214.0 8279.6 94.3 frame = self._xdr.read()\n",
" 271 902 1958.0 2.2 0.0 self._frame += 1\n",
" 272 902 441268.0 489.2 5.6 self._frame_to_ts(frame, ts)\n",
" 273 902 1103.0 1.2 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader._read_next_timestep visiting_positions(u, 12)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 5.82703 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/XDR.py\n",
"Function: _read_next_timestep at line 264\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 264 def _read_next_timestep(self, ts=None):\n",
" 265 \"\"\"copy next frame into timestep\"\"\"\n",
" 266 903 5273.0 5.8 0.1 if self._frame == self.n_frames - 1:\n",
" 267 1 5.0 5.0 0.0 raise IOError(errno.EIO, 'trying to go over trajectory limit')\n",
" 268 902 730.0 0.8 0.0 if ts is None:\n",
" 269 902 517.0 0.6 0.0 ts = self.ts\n",
" 270 902 5459719.0 6052.9 93.7 frame = self._xdr.read()\n",
" 271 902 1519.0 1.7 0.0 self._frame += 1\n",
" 272 902 358373.0 397.3 6.2 self._frame_to_ts(frame, ts)\n",
" 273 902 891.0 1.0 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader._read_next_timestep visiting_positions(u, 6)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 4.95612 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/coordinates/XDR.py\n",
"Function: _read_next_timestep at line 264\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 264 def _read_next_timestep(self, ts=None):\n",
" 265 \"\"\"copy next frame into timestep\"\"\"\n",
" 266 903 4566.0 5.1 0.1 if self._frame == self.n_frames - 1:\n",
" 267 1 4.0 4.0 0.0 raise IOError(errno.EIO, 'trying to go over trajectory limit')\n",
" 268 902 572.0 0.6 0.0 if ts is None:\n",
" 269 902 436.0 0.5 0.0 ts = self.ts\n",
" 270 902 4633718.0 5137.2 93.5 frame = self._xdr.read()\n",
" 271 902 1247.0 1.4 0.0 self._frame += 1\n",
" 272 902 314800.0 349.0 6.4 self._frame_to_ts(frame, ts)\n",
" 273 902 773.0 0.9 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.coordinates.XTC.XTCReader._read_next_timestep visiting_positions(u, 1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "gsoc",
"language": "python",
"name": "gsoc"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
@orbeckst
Copy link

It is admittedly weird that the presence of transformations affect the read performance.

Is this reproducible?

@orbeckst
Copy link

Without transformations, n_threads and oversubscription do not affect ts = self._read_next_timestep(), with ~5260 µs.

With transformation, at n=1 and n=6 frame = self._xdr.read() is ~5000 µs or 6000 µs (comparable to the above) but when oversubscribed rises to 8279.6 µs. That's odd.

@yuxuanzhuang
Copy link
Author

yuxuanzhuang commented Oct 14, 2020

It seems to be a generic effect of the numpy parallel code (e.g. np.dot). I did a test without MDAnalysis and the code after np.dot (regardless it is numpy related or not) tends to slow down if numpy is subscribing multiple threads.
https://gist.github.com/yuxuanzhuang/82e1e7b57d0cda80ac964d1cd138f618

@orbeckst
Copy link

This is just weird.

I suppose the conclusion is still that oversubscribing hurts performance.

@yuxuanzhuang
Copy link
Author

I can confirm it should be an OpenBlas issue. For numpy built with mkl, the default num_of_thread will be 6 (the physical core number), so the oversubscribing won't be happening.

[{'filepath': '/home/scottzhuang/anaconda3/lib/libmkl_rt.so',
  'prefix': 'libmkl_rt',
  'user_api': 'blas',
  'internal_api': 'mkl',
  'version': '2020.0.0',
  'num_threads': 6,
  'threading_layer': 'intel'},
 {'filepath': '/home/scottzhuang/anaconda3/lib/libiomp5.so',
  'prefix': 'libiomp',
  'user_api': 'openmp',
  'internal_api': 'openmp',
  'version': None,
  'num_threads': 12}]

The problem now is, OpenBlas always uses all hyperthreading in default, which the numerical code doesn't seem to benefit from (?), and also tend to cause the troubles mentioned above. I guess we should limit threads to the number of physical cores for all the MDAnalysis code...before OpenBlas changes its default.

@orbeckst
Copy link

Can you share your conclusions in the MDA issue MDAnalysis/mdanalysis#2975 for discussion so that other interested parties can also comment?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment