Skip to content

Instantly share code, notes, and snippets.

@yuxuanzhuang
Last active October 13, 2020 20:35
Show Gist options
  • Save yuxuanzhuang/17ed6def63b08248db59f2c44e3e0419 to your computer and use it in GitHub Desktop.
Save yuxuanzhuang/17ed6def63b08248db59f2c44e3e0419 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import MDAnalysis as mda\n",
"import MDAnalysisTests.datafiles as data\n",
"\n",
"import itertools\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import timeit\n",
"%matplotlib inline\n",
"\n",
"from multiprocessing import cpu_count\n",
"n_jobs = cpu_count()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from threadpoolctl import threadpool_limits "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import time"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"%load_ext line_profiler"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"files = {'PSF': data.PSF,\n",
" 'DCD': data.DCD,\n",
" 'PDB': 'Trajectory/YiiP_system.pdb',\n",
" 'LONG_TRAJ': 'Trajectory/YiiP_system_90ns_center.xtc',\n",
" 'SHORT_TRAJ':'Trajectory/YiiP_system_9ns_center.xtc'}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"File list:\n",
"- Yiip_system.pdb\n",
" - 111815 atoms\n",
" - 8.5 MB\n",
"- Yiip_system_9(0)_ns_center.xtc\n",
" - 111815 atoms\n",
" - 900/9000 frames\n",
" - 356/3560 MB\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from MDAnalysis.analysis.rms import RMSD as serial_rmsd"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from MDAnalysis import transformations as trans"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/scottzhuang/mdanalysis/package/MDAnalysis/topology/guessers.py:146: UserWarning: Failed to guess the mass for the following atom types: D\n",
" warnings.warn(\"Failed to guess the mass for the following atom types: {}\".format(atom_type))\n",
"/home/scottzhuang/mdanalysis/package/MDAnalysis/topology/guessers.py:146: UserWarning: Failed to guess the mass for the following atom types: Z\n",
" warnings.warn(\"Failed to guess the mass for the following atom types: {}\".format(atom_type))\n",
"/home/scottzhuang/mdanalysis/package/MDAnalysis/topology/PDBParser.py:330: UserWarning: Invalid elements found in the PDB file, elements attributes will not be populated.\n",
" warnings.warn(\"Invalid elements found in the PDB file, \"\n"
]
}
],
"source": [
"u = mda.Universe(files['PDB'], files['SHORT_TRAJ'])\n",
"atoms = u.atoms"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def serial_analysis(u, n_thread):\n",
" with threadpool_limits(n_thread):\n",
" rmsd = serial_rmsd(atoms, atoms)\n",
" rmsd.run()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Without Transformation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Analysis Performance"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 9.712 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/base.py\n",
"Function: run at line 164\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 164 def run(self, start=None, stop=None, step=None, verbose=None):\n",
" 165 \"\"\"Perform the calculation\n",
" 166 \n",
" 167 Parameters\n",
" 168 ----------\n",
" 169 start : int, optional\n",
" 170 start frame of analysis\n",
" 171 stop : int, optional\n",
" 172 stop frame of analysis\n",
" 173 step : int, optional\n",
" 174 number of frames to skip between each analysed frame\n",
" 175 verbose : bool, optional\n",
" 176 Turn on verbosity\n",
" 177 \"\"\"\n",
" 178 1 12.0 12.0 0.0 logger.info(\"Choosing frames to analyze\")\n",
" 179 # if verbose unchanged, use class default\n",
" 180 3 3.0 1.0 0.0 verbose = getattr(self, '_verbose',\n",
" 181 2 2.0 1.0 0.0 False) if verbose is None else verbose\n",
" 182 \n",
" 183 1 64.0 64.0 0.0 self._setup_frames(self._trajectory, start, stop, step)\n",
" 184 1 2.0 2.0 0.0 logger.info(\"Starting preparation\")\n",
" 185 1 20627.0 20627.0 0.2 self._prepare()\n",
" 186 903 4869087.0 5392.1 50.1 for i, ts in enumerate(ProgressBar(\n",
" 187 1 49.0 49.0 0.0 self._trajectory[self.start:self.stop:self.step],\n",
" 188 1 0.0 0.0 0.0 verbose=verbose)):\n",
" 189 901 1117.0 1.2 0.0 self._frame_index = i\n",
" 190 901 531.0 0.6 0.0 self._ts = ts\n",
" 191 901 1365.0 1.5 0.0 self.frames[i] = ts.frame\n",
" 192 901 2197.0 2.4 0.0 self.times[i] = ts.time\n",
" 193 # logger.info(\"--> Doing frame {} of {}\".format(i+1, self.n_frames))\n",
" 194 901 4816939.0 5346.2 49.6 self._single_frame()\n",
" 195 1 6.0 6.0 0.0 logger.info(\"Finishing up\")\n",
" 196 1 2.0 2.0 0.0 self._conclude()\n",
" 197 1 1.0 1.0 0.0 return self"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD.run serial_analysis(u, 12)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 9.62739 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/base.py\n",
"Function: run at line 164\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 164 def run(self, start=None, stop=None, step=None, verbose=None):\n",
" 165 \"\"\"Perform the calculation\n",
" 166 \n",
" 167 Parameters\n",
" 168 ----------\n",
" 169 start : int, optional\n",
" 170 start frame of analysis\n",
" 171 stop : int, optional\n",
" 172 stop frame of analysis\n",
" 173 step : int, optional\n",
" 174 number of frames to skip between each analysed frame\n",
" 175 verbose : bool, optional\n",
" 176 Turn on verbosity\n",
" 177 \"\"\"\n",
" 178 1 4.0 4.0 0.0 logger.info(\"Choosing frames to analyze\")\n",
" 179 # if verbose unchanged, use class default\n",
" 180 3 0.0 0.0 0.0 verbose = getattr(self, '_verbose',\n",
" 181 2 1.0 0.5 0.0 False) if verbose is None else verbose\n",
" 182 \n",
" 183 1 29.0 29.0 0.0 self._setup_frames(self._trajectory, start, stop, step)\n",
" 184 1 3.0 3.0 0.0 logger.info(\"Starting preparation\")\n",
" 185 1 19491.0 19491.0 0.2 self._prepare()\n",
" 186 903 4836474.0 5356.0 50.2 for i, ts in enumerate(ProgressBar(\n",
" 187 1 39.0 39.0 0.0 self._trajectory[self.start:self.stop:self.step],\n",
" 188 1 1.0 1.0 0.0 verbose=verbose)):\n",
" 189 901 1147.0 1.3 0.0 self._frame_index = i\n",
" 190 901 586.0 0.7 0.0 self._ts = ts\n",
" 191 901 1384.0 1.5 0.0 self.frames[i] = ts.frame\n",
" 192 901 2173.0 2.4 0.0 self.times[i] = ts.time\n",
" 193 # logger.info(\"--> Doing frame {} of {}\".format(i+1, self.n_frames))\n",
" 194 901 4766044.0 5289.7 49.5 self._single_frame()\n",
" 195 1 8.0 8.0 0.0 logger.info(\"Finishing up\")\n",
" 196 1 2.0 2.0 0.0 self._conclude()\n",
" 197 1 1.0 1.0 0.0 return self"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD.run serial_analysis(u, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## _Single_frame Performance"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 4.77348 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/rms.py\n",
"Function: _single_frame at line 633\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 633 def _single_frame(self):\n",
" 634 901 2573222.0 2856.0 53.9 mobile_com = self.mobile_atoms.center(self.weights_select).astype(np.float64)\n",
" 635 901 1190472.0 1321.3 24.9 self._mobile_coordinates64[:] = self.mobile_atoms.positions\n",
" 636 901 468754.0 520.3 9.8 self._mobile_coordinates64 -= mobile_com\n",
" 637 \n",
" 638 901 7189.0 8.0 0.2 self.rmsd[self._frame_index, :2] = self._ts.frame, self._trajectory.time\n",
" 639 \n",
" 640 901 755.0 0.8 0.0 if self._groupselections_atoms:\n",
" 641 # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate\n",
" 642 # array with float64 datatype (float32 leads to errors up to 1e-3 in\n",
" 643 # RMSD). Note that R is defined in such a way that it acts **to the\n",
" 644 # left** so that we can easily use broadcasting and save one\n",
" 645 # expensive numpy transposition.\n",
" 646 \n",
" 647 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 648 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 649 self._n_atoms, self._rot, self.weights_select)\n",
" 650 \n",
" 651 self._R[:, :] = self._rot.reshape(3, 3)\n",
" 652 # Transform each atom in the trajectory (use inplace ops to\n",
" 653 # avoid copying arrays) (Marginally (~3%) faster than\n",
" 654 # \"ts.positions[:] = (ts.positions - x_com) * R + ref_com\".)\n",
" 655 self._ts.positions[:] -= mobile_com\n",
" 656 \n",
" 657 # R acts to the left & is broadcasted N times.\n",
" 658 self._ts.positions[:] = np.dot(self._ts.positions, self._R)\n",
" 659 self._ts.positions += self._ref_com\n",
" 660 \n",
" 661 # 2) calculate secondary RMSDs (without any further\n",
" 662 # superposition)\n",
" 663 for igroup, (refpos, atoms) in enumerate(\n",
" 664 zip(self._groupselections_ref_coords64,\n",
" 665 self._groupselections_atoms), 3):\n",
" 666 self.rmsd[self._frame_index, igroup] = rmsd(\n",
" 667 refpos, atoms['mobile'].positions,\n",
" 668 weights=self.weights_groupselections[igroup-3],\n",
" 669 center=False, superposition=False)\n",
" 670 else:\n",
" 671 # only calculate RMSD by setting the Rmatrix to None (no need\n",
" 672 # to carry out the rotation as we already get the optimum RMSD)\n",
" 673 1802 531944.0 295.2 11.1 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 674 901 608.0 0.7 0.0 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 675 901 539.0 0.6 0.0 self._n_atoms, None, self.weights_select)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD._single_frame serial_analysis(u, 1)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 4.74534 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/rms.py\n",
"Function: _single_frame at line 633\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 633 def _single_frame(self):\n",
" 634 901 2555264.0 2836.0 53.8 mobile_com = self.mobile_atoms.center(self.weights_select).astype(np.float64)\n",
" 635 901 1178202.0 1307.7 24.8 self._mobile_coordinates64[:] = self.mobile_atoms.positions\n",
" 636 901 470892.0 522.6 9.9 self._mobile_coordinates64 -= mobile_com\n",
" 637 \n",
" 638 901 6995.0 7.8 0.1 self.rmsd[self._frame_index, :2] = self._ts.frame, self._trajectory.time\n",
" 639 \n",
" 640 901 747.0 0.8 0.0 if self._groupselections_atoms:\n",
" 641 # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate\n",
" 642 # array with float64 datatype (float32 leads to errors up to 1e-3 in\n",
" 643 # RMSD). Note that R is defined in such a way that it acts **to the\n",
" 644 # left** so that we can easily use broadcasting and save one\n",
" 645 # expensive numpy transposition.\n",
" 646 \n",
" 647 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 648 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 649 self._n_atoms, self._rot, self.weights_select)\n",
" 650 \n",
" 651 self._R[:, :] = self._rot.reshape(3, 3)\n",
" 652 # Transform each atom in the trajectory (use inplace ops to\n",
" 653 # avoid copying arrays) (Marginally (~3%) faster than\n",
" 654 # \"ts.positions[:] = (ts.positions - x_com) * R + ref_com\".)\n",
" 655 self._ts.positions[:] -= mobile_com\n",
" 656 \n",
" 657 # R acts to the left & is broadcasted N times.\n",
" 658 self._ts.positions[:] = np.dot(self._ts.positions, self._R)\n",
" 659 self._ts.positions += self._ref_com\n",
" 660 \n",
" 661 # 2) calculate secondary RMSDs (without any further\n",
" 662 # superposition)\n",
" 663 for igroup, (refpos, atoms) in enumerate(\n",
" 664 zip(self._groupselections_ref_coords64,\n",
" 665 self._groupselections_atoms), 3):\n",
" 666 self.rmsd[self._frame_index, igroup] = rmsd(\n",
" 667 refpos, atoms['mobile'].positions,\n",
" 668 weights=self.weights_groupselections[igroup-3],\n",
" 669 center=False, superposition=False)\n",
" 670 else:\n",
" 671 # only calculate RMSD by setting the Rmatrix to None (no need\n",
" 672 # to carry out the rotation as we already get the optimum RMSD)\n",
" 673 1802 532039.0 295.2 11.2 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 674 901 627.0 0.7 0.0 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 675 901 571.0 0.6 0.0 self._n_atoms, None, self.weights_select)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD._single_frame serial_analysis(u, 12)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding Transformation"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"fit_trans = trans.fit_rot_trans(u.atoms, u.atoms)\n",
"\n",
"u.trajectory.add_transformations(fit_trans)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Analysis Performance"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 32.0094 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/base.py\n",
"Function: run at line 164\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 164 def run(self, start=None, stop=None, step=None, verbose=None):\n",
" 165 \"\"\"Perform the calculation\n",
" 166 \n",
" 167 Parameters\n",
" 168 ----------\n",
" 169 start : int, optional\n",
" 170 start frame of analysis\n",
" 171 stop : int, optional\n",
" 172 stop frame of analysis\n",
" 173 step : int, optional\n",
" 174 number of frames to skip between each analysed frame\n",
" 175 verbose : bool, optional\n",
" 176 Turn on verbosity\n",
" 177 \"\"\"\n",
" 178 1 4.0 4.0 0.0 logger.info(\"Choosing frames to analyze\")\n",
" 179 # if verbose unchanged, use class default\n",
" 180 3 2.0 0.7 0.0 verbose = getattr(self, '_verbose',\n",
" 181 2 1.0 0.5 0.0 False) if verbose is None else verbose\n",
" 182 \n",
" 183 1 38.0 38.0 0.0 self._setup_frames(self._trajectory, start, stop, step)\n",
" 184 1 2.0 2.0 0.0 logger.info(\"Starting preparation\")\n",
" 185 1 48611.0 48611.0 0.2 self._prepare()\n",
" 186 903 22425967.0 24835.0 70.1 for i, ts in enumerate(ProgressBar(\n",
" 187 1 54.0 54.0 0.0 self._trajectory[self.start:self.stop:self.step],\n",
" 188 1 1.0 1.0 0.0 verbose=verbose)):\n",
" 189 901 3095.0 3.4 0.0 self._frame_index = i\n",
" 190 901 1251.0 1.4 0.0 self._ts = ts\n",
" 191 901 2451.0 2.7 0.0 self.frames[i] = ts.frame\n",
" 192 901 5597.0 6.2 0.0 self.times[i] = ts.time\n",
" 193 # logger.info(\"--> Doing frame {} of {}\".format(i+1, self.n_frames))\n",
" 194 901 9522307.0 10568.6 29.7 self._single_frame()\n",
" 195 1 10.0 10.0 0.0 logger.info(\"Finishing up\")\n",
" 196 1 4.0 4.0 0.0 self._conclude()\n",
" 197 1 2.0 2.0 0.0 return self"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD.run serial_analysis(u, 12)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 19.1609 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/base.py\n",
"Function: run at line 164\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 164 def run(self, start=None, stop=None, step=None, verbose=None):\n",
" 165 \"\"\"Perform the calculation\n",
" 166 \n",
" 167 Parameters\n",
" 168 ----------\n",
" 169 start : int, optional\n",
" 170 start frame of analysis\n",
" 171 stop : int, optional\n",
" 172 stop frame of analysis\n",
" 173 step : int, optional\n",
" 174 number of frames to skip between each analysed frame\n",
" 175 verbose : bool, optional\n",
" 176 Turn on verbosity\n",
" 177 \"\"\"\n",
" 178 1 5.0 5.0 0.0 logger.info(\"Choosing frames to analyze\")\n",
" 179 # if verbose unchanged, use class default\n",
" 180 3 2.0 0.7 0.0 verbose = getattr(self, '_verbose',\n",
" 181 2 1.0 0.5 0.0 False) if verbose is None else verbose\n",
" 182 \n",
" 183 1 31.0 31.0 0.0 self._setup_frames(self._trajectory, start, stop, step)\n",
" 184 1 2.0 2.0 0.0 logger.info(\"Starting preparation\")\n",
" 185 1 36103.0 36103.0 0.2 self._prepare()\n",
" 186 903 13829765.0 15315.4 72.2 for i, ts in enumerate(ProgressBar(\n",
" 187 1 38.0 38.0 0.0 self._trajectory[self.start:self.stop:self.step],\n",
" 188 1 1.0 1.0 0.0 verbose=verbose)):\n",
" 189 901 2397.0 2.7 0.0 self._frame_index = i\n",
" 190 901 677.0 0.8 0.0 self._ts = ts\n",
" 191 901 1658.0 1.8 0.0 self.frames[i] = ts.frame\n",
" 192 901 3910.0 4.3 0.0 self.times[i] = ts.time\n",
" 193 # logger.info(\"--> Doing frame {} of {}\".format(i+1, self.n_frames))\n",
" 194 901 5286333.0 5867.2 27.6 self._single_frame()\n",
" 195 1 7.0 7.0 0.0 logger.info(\"Finishing up\")\n",
" 196 1 3.0 3.0 0.0 self._conclude()\n",
" 197 1 0.0 0.0 0.0 return self"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD.run serial_analysis(u, 6)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 18.4071 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/base.py\n",
"Function: run at line 164\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 164 def run(self, start=None, stop=None, step=None, verbose=None):\n",
" 165 \"\"\"Perform the calculation\n",
" 166 \n",
" 167 Parameters\n",
" 168 ----------\n",
" 169 start : int, optional\n",
" 170 start frame of analysis\n",
" 171 stop : int, optional\n",
" 172 stop frame of analysis\n",
" 173 step : int, optional\n",
" 174 number of frames to skip between each analysed frame\n",
" 175 verbose : bool, optional\n",
" 176 Turn on verbosity\n",
" 177 \"\"\"\n",
" 178 1 7.0 7.0 0.0 logger.info(\"Choosing frames to analyze\")\n",
" 179 # if verbose unchanged, use class default\n",
" 180 3 2.0 0.7 0.0 verbose = getattr(self, '_verbose',\n",
" 181 2 3.0 1.5 0.0 False) if verbose is None else verbose\n",
" 182 \n",
" 183 1 48.0 48.0 0.0 self._setup_frames(self._trajectory, start, stop, step)\n",
" 184 1 4.0 4.0 0.0 logger.info(\"Starting preparation\")\n",
" 185 1 37576.0 37576.0 0.2 self._prepare()\n",
" 186 903 13423728.0 14865.7 72.9 for i, ts in enumerate(ProgressBar(\n",
" 187 1 39.0 39.0 0.0 self._trajectory[self.start:self.stop:self.step],\n",
" 188 1 0.0 0.0 0.0 verbose=verbose)):\n",
" 189 901 2162.0 2.4 0.0 self._frame_index = i\n",
" 190 901 689.0 0.8 0.0 self._ts = ts\n",
" 191 901 1578.0 1.8 0.0 self.frames[i] = ts.frame\n",
" 192 901 3605.0 4.0 0.0 self.times[i] = ts.time\n",
" 193 # logger.info(\"--> Doing frame {} of {}\".format(i+1, self.n_frames))\n",
" 194 901 4937683.0 5480.2 26.8 self._single_frame()\n",
" 195 1 6.0 6.0 0.0 logger.info(\"Finishing up\")\n",
" 196 1 2.0 2.0 0.0 self._conclude()\n",
" 197 1 0.0 0.0 0.0 return self"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD.run serial_analysis(u, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## _Single_frame Performance"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 4.93216 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/rms.py\n",
"Function: _single_frame at line 633\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 633 def _single_frame(self):\n",
" 634 901 2577233.0 2860.4 52.3 mobile_com = self.mobile_atoms.center(self.weights_select).astype(np.float64)\n",
" 635 901 1274168.0 1414.2 25.8 self._mobile_coordinates64[:] = self.mobile_atoms.positions\n",
" 636 901 534024.0 592.7 10.8 self._mobile_coordinates64 -= mobile_com\n",
" 637 \n",
" 638 901 11307.0 12.5 0.2 self.rmsd[self._frame_index, :2] = self._ts.frame, self._trajectory.time\n",
" 639 \n",
" 640 901 899.0 1.0 0.0 if self._groupselections_atoms:\n",
" 641 # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate\n",
" 642 # array with float64 datatype (float32 leads to errors up to 1e-3 in\n",
" 643 # RMSD). Note that R is defined in such a way that it acts **to the\n",
" 644 # left** so that we can easily use broadcasting and save one\n",
" 645 # expensive numpy transposition.\n",
" 646 \n",
" 647 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 648 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 649 self._n_atoms, self._rot, self.weights_select)\n",
" 650 \n",
" 651 self._R[:, :] = self._rot.reshape(3, 3)\n",
" 652 # Transform each atom in the trajectory (use inplace ops to\n",
" 653 # avoid copying arrays) (Marginally (~3%) faster than\n",
" 654 # \"ts.positions[:] = (ts.positions - x_com) * R + ref_com\".)\n",
" 655 self._ts.positions[:] -= mobile_com\n",
" 656 \n",
" 657 # R acts to the left & is broadcasted N times.\n",
" 658 self._ts.positions[:] = np.dot(self._ts.positions, self._R)\n",
" 659 self._ts.positions += self._ref_com\n",
" 660 \n",
" 661 # 2) calculate secondary RMSDs (without any further\n",
" 662 # superposition)\n",
" 663 for igroup, (refpos, atoms) in enumerate(\n",
" 664 zip(self._groupselections_ref_coords64,\n",
" 665 self._groupselections_atoms), 3):\n",
" 666 self.rmsd[self._frame_index, igroup] = rmsd(\n",
" 667 refpos, atoms['mobile'].positions,\n",
" 668 weights=self.weights_groupselections[igroup-3],\n",
" 669 center=False, superposition=False)\n",
" 670 else:\n",
" 671 # only calculate RMSD by setting the Rmatrix to None (no need\n",
" 672 # to carry out the rotation as we already get the optimum RMSD)\n",
" 673 1802 533174.0 295.9 10.8 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 674 901 659.0 0.7 0.0 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 675 901 694.0 0.8 0.0 self._n_atoms, None, self.weights_select)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD._single_frame serial_analysis(u, 1)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 5.18107 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/rms.py\n",
"Function: _single_frame at line 633\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 633 def _single_frame(self):\n",
" 634 901 2726071.0 3025.6 52.6 mobile_com = self.mobile_atoms.center(self.weights_select).astype(np.float64)\n",
" 635 901 1334988.0 1481.7 25.8 self._mobile_coordinates64[:] = self.mobile_atoms.positions\n",
" 636 901 547465.0 607.6 10.6 self._mobile_coordinates64 -= mobile_com\n",
" 637 \n",
" 638 901 12161.0 13.5 0.2 self.rmsd[self._frame_index, :2] = self._ts.frame, self._trajectory.time\n",
" 639 \n",
" 640 901 1037.0 1.2 0.0 if self._groupselections_atoms:\n",
" 641 # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate\n",
" 642 # array with float64 datatype (float32 leads to errors up to 1e-3 in\n",
" 643 # RMSD). Note that R is defined in such a way that it acts **to the\n",
" 644 # left** so that we can easily use broadcasting and save one\n",
" 645 # expensive numpy transposition.\n",
" 646 \n",
" 647 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 648 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 649 self._n_atoms, self._rot, self.weights_select)\n",
" 650 \n",
" 651 self._R[:, :] = self._rot.reshape(3, 3)\n",
" 652 # Transform each atom in the trajectory (use inplace ops to\n",
" 653 # avoid copying arrays) (Marginally (~3%) faster than\n",
" 654 # \"ts.positions[:] = (ts.positions - x_com) * R + ref_com\".)\n",
" 655 self._ts.positions[:] -= mobile_com\n",
" 656 \n",
" 657 # R acts to the left & is broadcasted N times.\n",
" 658 self._ts.positions[:] = np.dot(self._ts.positions, self._R)\n",
" 659 self._ts.positions += self._ref_com\n",
" 660 \n",
" 661 # 2) calculate secondary RMSDs (without any further\n",
" 662 # superposition)\n",
" 663 for igroup, (refpos, atoms) in enumerate(\n",
" 664 zip(self._groupselections_ref_coords64,\n",
" 665 self._groupselections_atoms), 3):\n",
" 666 self.rmsd[self._frame_index, igroup] = rmsd(\n",
" 667 refpos, atoms['mobile'].positions,\n",
" 668 weights=self.weights_groupselections[igroup-3],\n",
" 669 center=False, superposition=False)\n",
" 670 else:\n",
" 671 # only calculate RMSD by setting the Rmatrix to None (no need\n",
" 672 # to carry out the rotation as we already get the optimum RMSD)\n",
" 673 1802 557903.0 309.6 10.8 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 674 901 726.0 0.8 0.0 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 675 901 719.0 0.8 0.0 self._n_atoms, None, self.weights_select)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD._single_frame serial_analysis(u, 6)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 9.54392 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/analysis/rms.py\n",
"Function: _single_frame at line 633\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 633 def _single_frame(self):\n",
" 634 901 4954216.0 5498.6 51.9 mobile_com = self.mobile_atoms.center(self.weights_select).astype(np.float64)\n",
" 635 901 2740537.0 3041.7 28.7 self._mobile_coordinates64[:] = self.mobile_atoms.positions\n",
" 636 901 899702.0 998.6 9.4 self._mobile_coordinates64 -= mobile_com\n",
" 637 \n",
" 638 901 15913.0 17.7 0.2 self.rmsd[self._frame_index, :2] = self._ts.frame, self._trajectory.time\n",
" 639 \n",
" 640 901 1552.0 1.7 0.0 if self._groupselections_atoms:\n",
" 641 # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate\n",
" 642 # array with float64 datatype (float32 leads to errors up to 1e-3 in\n",
" 643 # RMSD). Note that R is defined in such a way that it acts **to the\n",
" 644 # left** so that we can easily use broadcasting and save one\n",
" 645 # expensive numpy transposition.\n",
" 646 \n",
" 647 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 648 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 649 self._n_atoms, self._rot, self.weights_select)\n",
" 650 \n",
" 651 self._R[:, :] = self._rot.reshape(3, 3)\n",
" 652 # Transform each atom in the trajectory (use inplace ops to\n",
" 653 # avoid copying arrays) (Marginally (~3%) faster than\n",
" 654 # \"ts.positions[:] = (ts.positions - x_com) * R + ref_com\".)\n",
" 655 self._ts.positions[:] -= mobile_com\n",
" 656 \n",
" 657 # R acts to the left & is broadcasted N times.\n",
" 658 self._ts.positions[:] = np.dot(self._ts.positions, self._R)\n",
" 659 self._ts.positions += self._ref_com\n",
" 660 \n",
" 661 # 2) calculate secondary RMSDs (without any further\n",
" 662 # superposition)\n",
" 663 for igroup, (refpos, atoms) in enumerate(\n",
" 664 zip(self._groupselections_ref_coords64,\n",
" 665 self._groupselections_atoms), 3):\n",
" 666 self.rmsd[self._frame_index, igroup] = rmsd(\n",
" 667 refpos, atoms['mobile'].positions,\n",
" 668 weights=self.weights_groupselections[igroup-3],\n",
" 669 center=False, superposition=False)\n",
" 670 else:\n",
" 671 # only calculate RMSD by setting the Rmatrix to None (no need\n",
" 672 # to carry out the rotation as we already get the optimum RMSD)\n",
" 673 1802 929278.0 515.7 9.7 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix(\n",
" 674 901 1354.0 1.5 0.0 self._ref_coordinates64, self._mobile_coordinates64,\n",
" 675 901 1373.0 1.5 0.0 self._n_atoms, None, self.weights_select)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.analysis.rms.RMSD._single_frame serial_analysis(u, 12)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Transformation Performance"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 8.51724 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/transformations/fit.py\n",
"Function: __call__ at line 210\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 210 def __call__(self, ts):\n",
" 211 904 2570430.0 2843.4 30.2 mobile_com = self.mobile.atoms.center(self.weights)\n",
" 212 904 1943236.0 2149.6 22.8 mobile_coordinates = self.mobile.atoms.positions - mobile_com\n",
" 213 1808 2472856.0 1367.7 29.0 rotation, dump = align.rotation_matrix(mobile_coordinates,\n",
" 214 904 771.0 0.9 0.0 self.ref_coordinates,\n",
" 215 904 511.0 0.6 0.0 weights=self.weights)\n",
" 216 904 1539.0 1.7 0.0 vector = self.ref_com\n",
" 217 904 684.0 0.8 0.0 if self.plane is not None:\n",
" 218 matrix = np.r_[rotation, np.zeros(3).reshape(1, 3)]\n",
" 219 matrix = np.c_[matrix, np.zeros(4)]\n",
" 220 euler_angs = np.asarray(euler_from_matrix(matrix, axes='sxyz'),\n",
" 221 np.float32)\n",
" 222 for i in range(0, euler_angs.size):\n",
" 223 euler_angs[i] = (euler_angs[self.plane] if i == self.plane\n",
" 224 else 0)\n",
" 225 rotation = euler_matrix(euler_angs[0],\n",
" 226 euler_angs[1],\n",
" 227 euler_angs[2],\n",
" 228 axes='sxyz')[:3, :3]\n",
" 229 vector[self.plane] = mobile_com[self.plane]\n",
" 230 904 348306.0 385.3 4.1 ts.positions = ts.positions - mobile_com\n",
" 231 904 931185.0 1030.1 10.9 ts.positions = np.dot(ts.positions, rotation.T)\n",
" 232 904 247094.0 273.3 2.9 ts.positions = ts.positions + vector\n",
" 233 904 630.0 0.7 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.transformations.fit.fit_rot_trans.__call__ serial_analysis(u, 1)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 8.23789 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/transformations/fit.py\n",
"Function: __call__ at line 210\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 210 def __call__(self, ts):\n",
" 211 904 2576161.0 2849.7 31.3 mobile_com = self.mobile.atoms.center(self.weights)\n",
" 212 904 1949342.0 2156.4 23.7 mobile_coordinates = self.mobile.atoms.positions - mobile_com\n",
" 213 1808 2466563.0 1364.2 29.9 rotation, dump = align.rotation_matrix(mobile_coordinates,\n",
" 214 904 698.0 0.8 0.0 self.ref_coordinates,\n",
" 215 904 540.0 0.6 0.0 weights=self.weights)\n",
" 216 904 1493.0 1.7 0.0 vector = self.ref_com\n",
" 217 904 723.0 0.8 0.0 if self.plane is not None:\n",
" 218 matrix = np.r_[rotation, np.zeros(3).reshape(1, 3)]\n",
" 219 matrix = np.c_[matrix, np.zeros(4)]\n",
" 220 euler_angs = np.asarray(euler_from_matrix(matrix, axes='sxyz'),\n",
" 221 np.float32)\n",
" 222 for i in range(0, euler_angs.size):\n",
" 223 euler_angs[i] = (euler_angs[self.plane] if i == self.plane\n",
" 224 else 0)\n",
" 225 rotation = euler_matrix(euler_angs[0],\n",
" 226 euler_angs[1],\n",
" 227 euler_angs[2],\n",
" 228 axes='sxyz')[:3, :3]\n",
" 229 vector[self.plane] = mobile_com[self.plane]\n",
" 230 904 352333.0 389.7 4.3 ts.positions = ts.positions - mobile_com\n",
" 231 904 602192.0 666.1 7.3 ts.positions = np.dot(ts.positions, rotation.T)\n",
" 232 904 287182.0 317.7 3.5 ts.positions = ts.positions + vector\n",
" 233 904 665.0 0.7 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.transformations.fit.fit_rot_trans.__call__ serial_analysis(u, 6)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-06 s\n",
"\n",
"Total time: 14.1718 s\n",
"File: /home/scottzhuang/mdanalysis/package/MDAnalysis/transformations/fit.py\n",
"Function: __call__ at line 210\n",
"\n",
"Line # Hits Time Per Hit % Time Line Contents\n",
"==============================================================\n",
" 210 def __call__(self, ts):\n",
" 211 904 4995354.0 5525.8 35.2 mobile_com = self.mobile.atoms.center(self.weights)\n",
" 212 904 4194614.0 4640.1 29.6 mobile_coordinates = self.mobile.atoms.positions - mobile_com\n",
" 213 1808 3237706.0 1790.8 22.8 rotation, dump = align.rotation_matrix(mobile_coordinates,\n",
" 214 904 1251.0 1.4 0.0 self.ref_coordinates,\n",
" 215 904 1097.0 1.2 0.0 weights=self.weights)\n",
" 216 904 2104.0 2.3 0.0 vector = self.ref_com\n",
" 217 904 1333.0 1.5 0.0 if self.plane is not None:\n",
" 218 matrix = np.r_[rotation, np.zeros(3).reshape(1, 3)]\n",
" 219 matrix = np.c_[matrix, np.zeros(4)]\n",
" 220 euler_angs = np.asarray(euler_from_matrix(matrix, axes='sxyz'),\n",
" 221 np.float32)\n",
" 222 for i in range(0, euler_angs.size):\n",
" 223 euler_angs[i] = (euler_angs[self.plane] if i == self.plane\n",
" 224 else 0)\n",
" 225 rotation = euler_matrix(euler_angs[0],\n",
" 226 euler_angs[1],\n",
" 227 euler_angs[2],\n",
" 228 axes='sxyz')[:3, :3]\n",
" 229 vector[self.plane] = mobile_com[self.plane]\n",
" 230 904 468590.0 518.4 3.3 ts.positions = ts.positions - mobile_com\n",
" 231 904 854181.0 944.9 6.0 ts.positions = np.dot(ts.positions, rotation.T)\n",
" 232 904 414268.0 458.3 2.9 ts.positions = ts.positions + vector\n",
" 233 904 1312.0 1.5 0.0 return ts"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%lprun -f mda.transformations.fit.fit_rot_trans.__call__ serial_analysis(u, 12)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "gsoc",
"language": "python",
"name": "gsoc"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
@yuxuanzhuang
Copy link
Author

@orbeckst Sorry something is wrong with gist, cannot display the old one properly.
It's a 6-core CPU. I have added the benchmark for 6 threads--don't seem to differ much from 1 thread.
So it's most likely a hyperthreading issue. What confused me is, subscribing to 12 threads doesn't hurt the performance without Transformations.

@yuxuanzhuang
Copy link
Author

Note the last three cells that self._xdr.read() is marginally slower, which I assume have nothing to do with Transformations, but still only happens with Transformations present.
https://gist.github.com/yuxuanzhuang/4918cd1b5d8d62de79eab9df40de4bb7

@orbeckst
Copy link

It is admittedly weird that the presence of transformations affect the read performance.

Is this reproducible?

(Actually, will continue this specific discussion on the other gist.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment