siboehm/profiling seml.md

## profiling seml.md

      
    Raw
  

              profiling seml.md
            
          
    Profiling SEML cluster jobs with py-spy

Setup


Copy the profiling.py somewhere into your project.
The code in seml_sweep.py should go into your experiment.py (where your ExperimentWrapper etc is defined).

Running it

Edit your experiment's .yaml file to conveniently turn profiling on and off:
fixed:
	profiling.run_profile: True
	profiling.outdir: "./"

Collecting the results

The final profile is saved on the local filesystem and inside of MongoDB.
If you have the file in the filesystem you can skip the next step.
Loading from MongoDB

Loading it from MongoDB is a bit annoying.

Look up the Object_id. It is saved in the document of your seml run. artifacs contains a list of artifacts (there's probably only one). Under file_id you find the Object id. Save in a variable called object_id.
Load the object:

from seml.database import get_mongodb_config, get_database
# loads the config from your .config/seml/mongodb.config
mongodb_config = get_mongodb_config()
# get pymongo connection to MongoDB
db = get_database(**mongodb_config)

import gridfs
# load the chunked object out of MongoDBs gridFS
profile_binary = gridfs.GridFS(db).get(object_id)
# write the binary file to disk
with open("profile.speedscope", "wb") as f:
	f.write(profile_binary.read(-1))
Checkout out the results

To look at the results, open https://speedscope.app and upload the profiling file.
If you click on any block, it will show you the line number in your code at the bottom.
Important: To look up the Python line corresponding to the line number, make sure you're looking at the same version of the code that you profiled! If you've changed your code then lines might have moved. You can use the git commit hash to make sure you're looking at the correct version of the code. It is stored in the MongoDB by SEML during the profiling run.

  
## profiling.py
import os
import shutil
import signal
import subprocess
from pathlib import Path

import sacred


class Profiler:
    outpath: Path
    _process: subprocess.Popen

    def __init__(self, seed: str, save_dir: str):
        """
        Creates a new profiler without start it yet.
        @param seed: random string used for generating unique filepath.
        @param save_dir: directory to save the file to.
        """
        assert Path(save_dir).is_dir(), f"{save_dir} is not a directory!"
        self.outpath = Path(save_dir) / f"profile_{seed}.speedscope"
        assert shutil.which("py-spy"), "py-spy not found, please install it first."

    def start(self):
        """Start recording the current Python process"""
        # starts py-spy in a new subprocess
        self._process = subprocess.Popen(
            [
                shutil.which("py-spy"),
                "record",
                "--pid",
                str(os.getpid()),  # tells py-spy to profile the current Python process
                "--rate",
                "3",  # three samples per second should be fine-grained enough and the outfile won't get too large
                "--format",
                "speedscope",  # look at profiles via https://speedscope.app
                "--output",
                str(
                    self.outpath
                ),  # file to save results at (once profiling has finished)
            ]
        )

    def stop(self, experiment: sacred.Experiment):
        """
        Stop recording and save the results to a file and to MongoDB
        @param experiment: The seml / sacred experiment.
        """
        # First, send same signal as CTRL+C would. Py-spy should quit and save the results.
        self._process.send_signal(signal.SIGINT)
        try:
            # if the profiler didn't exit after 10s, kill it
            self._process.wait(timeout=10)
        except subprocess.TimeoutExpired:
            # sends SIGKILL. py-spy will quit, but will not save a profile.
            self._process.kill()
            print("killed py-spy due to timeout.")
            # collect the zombie process
            self._process.wait(timeout=2)

        # upload the profiling results to mongoDB as a binary
        if self.outpath.is_file():
            experiment.add_artifact(
                str(self.outpath),
                name="py_spy_profile",
                content_type="application/octet-stream",
            )

## seml_sweep.py
from profiling import Profiler
from pathlib import Path


profiler = None


@ex.pre_run_hook(prefix="profiling")
def init_profiler(run_profiler: bool, outdir: str):
    if run_profiler:
        if not Path(outdir).exists():
            Path(outdir).mkdir(parents=True)
        global profiler
        profiler = Profiler(
            str(seml.utils.make_hash(ex.current_run.config)),
            outdir,
        )
        profiler.start()


@ex.post_run_hook
def stop_profiler():
    if profiler:
        profiler.stop(experiment=ex)
	import os
	import shutil
	import signal
	import subprocess
	from pathlib import Path

	import sacred


	class Profiler:
	outpath: Path
	_process: subprocess.Popen

	def __init__(self, seed: str, save_dir: str):
	"""
	Creates a new profiler without start it yet.
	@param seed: random string used for generating unique filepath.
	@param save_dir: directory to save the file to.
	"""
	assert Path(save_dir).is_dir(), f"{save_dir} is not a directory!"
	self.outpath = Path(save_dir) / f"profile_{seed}.speedscope"
	assert shutil.which("py-spy"), "py-spy not found, please install it first."

	def start(self):
	"""Start recording the current Python process"""
	# starts py-spy in a new subprocess
	self._process = subprocess.Popen(
	[
	shutil.which("py-spy"),
	"record",
	"--pid",
	str(os.getpid()), # tells py-spy to profile the current Python process
	"--rate",
	"3", # three samples per second should be fine-grained enough and the outfile won't get too large
	"--format",
	"speedscope", # look at profiles via https://speedscope.app
	"--output",
	str(
	self.outpath
	), # file to save results at (once profiling has finished)
	]
	)

	def stop(self, experiment: sacred.Experiment):
	"""
	Stop recording and save the results to a file and to MongoDB
	@param experiment: The seml / sacred experiment.
	"""
	# First, send same signal as CTRL+C would. Py-spy should quit and save the results.
	self._process.send_signal(signal.SIGINT)
	try:
	# if the profiler didn't exit after 10s, kill it
	self._process.wait(timeout=10)
	except subprocess.TimeoutExpired:
	# sends SIGKILL. py-spy will quit, but will not save a profile.
	self._process.kill()
	print("killed py-spy due to timeout.")
	# collect the zombie process
	self._process.wait(timeout=2)

	# upload the profiling results to mongoDB as a binary
	if self.outpath.is_file():
	experiment.add_artifact(
	str(self.outpath),
	name="py_spy_profile",
	content_type="application/octet-stream",
	)
	from profiling import Profiler
	from pathlib import Path


	profiler = None


	@ex.pre_run_hook(prefix="profiling")
	def init_profiler(run_profiler: bool, outdir: str):
	if run_profiler:
	if not Path(outdir).exists():
	Path(outdir).mkdir(parents=True)
	global profiler
	profiler = Profiler(
	str(seml.utils.make_hash(ex.current_run.config)),
	outdir,
	)
	profiler.start()


	@ex.post_run_hook
	def stop_profiler():
	if profiler:
	profiler.stop(experiment=ex)