rhee-elten/nv_gpu_stat.py

## nv_gpu_stat.py
## monitor GPU usage in jupyter notebook -- rhee.elten@gmail.com
## https://gist.github.com/rhee-elten/1a1070e3a812ca863c3b937b5180b2f8
# pylint: disable=invalid-name
# pylint: disable=using-constant-test
# pylint: disable=wrong-import-position
# pylint: disable=missing-class-docstring
# pylint: disable=missing-function-docstring
if 10:
    import os

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # make sure
    os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

if 10:
    import logging

    logging.basicConfig()
    logger = logging.getLogger("nv_gpu_stat")
    logger.setLevel(logging.DEBUG)

    # disable numpy warnings
    import warnings

    warnings.filterwarnings("ignore")

import sys
import io
import os
from os.path import expanduser, dirname, isfile
from time import sleep, time
from socket import gethostname
from datetime import datetime
from subprocess import run as subprocess_run
import numpy as np
from pandas import read_csv
from h5py import File as h5file
import matplotlib.pyplot as plt


def nv_gpu_stat():
    """
    example output:

    print(nv_gpu_stat().to_string(index=False))

    index             name   temperature.gpu   utilization.gpu [%]   power.draw [W]   memory.used [MiB]   memory.total [MiB]
         0   Tesla M40 24GB                17                     0            17.46                   0                24478
         1   Tesla M40 24GB                20                     0            18.23                   0                24478
         2   Tesla M40 24GB                19                     0            18.53                   0                24478
         3   Tesla M40 24GB                20                     0            18.83                   0                24478
    """
    query_gpu = "--query-gpu=index,name,temperature.gpu,utilization.gpu,power.draw,memory.used,memory.total"
    query_format = "--format=csv,nounits"

    try:
        nvidia_smi = "nvidia-smi"
        proc = subprocess_run(
            [nvidia_smi, query_gpu, query_format], capture_output=True, check=False
        )
    except FileNotFoundError:
        nvidia_smi = "C:/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi"
        proc = subprocess_run(
            [nvidia_smi, query_gpu, query_format], capture_output=True, check=False
        )
    assert proc.returncode == 0, ("proc failure exitcode:", proc.returncode)
    return read_csv(io.StringIO(proc.stdout.decode("ascii")))


NV_GPU_STAT_COLLECT_FILE = expanduser("~/.nv_gpu_stat/collect.h5")


def nv_gpu_stat_collect(
    collect_file=None, collect_interval=0.5, collect_length=2048, verbose=0
):
    def check_h5_file(f):
        if "timestamp" not in f:
            return False
        if "temperature_gpu" not in f:
            return False
        if "utilization_gpu" not in f:
            return False
        if "power_draw" not in f:
            return False
        if "memory_used" not in f:
            return False
        return True

    def initialize_h5_file(f, *, collect_length, num_gpus=None):
        """
        ## fill initial values into file if needed
        """
        # call nv_gpu_stat to get num_gpus
        num_gpus = num_gpus or nv_gpu_stat().values.shape[0]
        f.create_dataset("timestamp", (collect_length,), np.float64)
        f["timestamp"][:] = np.nan
        f.create_dataset("temperature_gpu", (collect_length, num_gpus), np.float64)
        f["temperature_gpu"][:] = np.nan
        f.create_dataset("utilization_gpu", (collect_length, num_gpus), np.float64)
        f["utilization_gpu"][:] = np.nan
        f.create_dataset("power_draw", (collect_length, num_gpus), np.float64)
        f["power_draw"][:] = np.nan
        f.create_dataset("memory_used", (collect_length, num_gpus), np.float64)
        f["memory_used"][:] = np.nan

    def update_h5_file(f, gpu_stat):
        ## read initial values
        timestamp = f["timestamp"][:]
        temperature_gpu = f["temperature_gpu"][:]
        utilization_gpu = f["utilization_gpu"][:]
        power_draw = f["power_draw"][:]
        memory_used = f["memory_used"][:]

        ## roll
        timestamp = np.roll(timestamp, -1, axis=0)
        temperature_gpu = np.roll(temperature_gpu, -1, axis=0)
        utilization_gpu = np.roll(utilization_gpu, -1, axis=0)
        power_draw = np.roll(power_draw, -1, axis=0)
        memory_used = np.roll(memory_used, -1, axis=0)

        ## put new measure at the last
        timestamp[-1] = t_now
        temperature_gpu[-1, :] = gpu_stat[" temperature.gpu"].values
        utilization_gpu[-1, :] = gpu_stat[" utilization.gpu [%]"].values
        power_draw[-1, :] = gpu_stat[" power.draw [W]"].values
        memory_used[-1, :] = gpu_stat[" memory.used [MiB]"].values

        ## write back to h5 file
        f["timestamp"][:] = timestamp
        f["temperature_gpu"][:] = temperature_gpu
        f["utilization_gpu"][:] = utilization_gpu
        f["power_draw"][:] = power_draw
        f["memory_used"][:] = memory_used

    collect_file = collect_file or NV_GPU_STAT_COLLECT_FILE

    os.makedirs(dirname(collect_file), exist_ok=True)

    logger.debug("stat_collect: [1]open(a): %s", collect_file)

    try:
        with h5file(collect_file, "a") as f:
            if not check_h5_file(f):
                initialize_h5_file(f, collect_length=collect_length)

            while True:
                t_now = time()
                if verbose:
                    logger.info(">>> nv_gpu_stat: time: %d", t_now)

                gpu_stat = nv_gpu_stat()

                update_h5_file(f, gpu_stat)

                t_next = t_now + collect_interval
                t_sleep = t_next - time()

                if t_sleep > 0.0:
                    sleep(t_sleep)

    except OSError:
        logger.exception("h5py File open failed")


def nv_gpu_stat_query(time_up_to=None, collect_file=None):
    time_up_to = time_up_to or time()
    collect_file = collect_file or NV_GPU_STAT_COLLECT_FILE

    if not isfile(collect_file):
        logger.debug("stat_query: file not exist: %s", collect_file)
        return None

    try:
        logger.debug("stat_query: open(r): %s", collect_file)
        with h5file(collect_file, "r") as f:
            timestamp = f["timestamp"][:]
            temperature_gpu = f["temperature_gpu"][:]
            utilization_gpu = f["utilization_gpu"][:]
            power_draw = f["power_draw"][:]
            memory_used = f["memory_used"][:]
    except:
        _, exc_value, _ = sys.exc_info()
        logger.exception("*** %s", exc_value)
        return None

    if time_up_to:
        in_range = timestamp <= time_up_to
        timestamp = timestamp[in_range]
        temperature_gpu = temperature_gpu[in_range]
        utilization_gpu = utilization_gpu[in_range]
        power_draw = power_draw[in_range]
        memory_used = memory_used[in_range]

    return dict(
        timestamp=timestamp,
        temperature_gpu=temperature_gpu,
        utilization_gpu=utilization_gpu,
        power_draw=power_draw,
        memory_used=memory_used,
    )


def nv_gpu_plot_values(
    time_series,
    values,
    t_now=None,
    ax=None,
    plot_seconds=450.0,
    stat_seconds=60.0,
    plot_config=None,
    min_ymax=10.0,
    plot_kws=None,
):
    plot_kws = plot_kws or dict()
    ax = ax or plt.gca()
    time_series = np.asarray(time_series)
    t_now = t_now or time_series[-1]
    values = np.asarray(values)
    plot_config = plot_config or dict(title="values", value_fmt="{:>5.1f}", ylim=None)
    title = plot_config["title"]
    value_fmt = plot_config["value_fmt"]
    if not callable(value_fmt):
        value_fmt = lambda x, _fmt=value_fmt: _fmt.format(x)
    ylim = plot_config["ylim"]

    if time_series[0] + stat_seconds > time_series[-1]:
        stat_seconds = time_series[-1] - time_series[0]

    stat_select = (t_now - stat_seconds <= time_series) & (time_series <= t_now)

    for i, ser in enumerate(np.transpose(values)):
        y_stat = ser[stat_select]
        # 현재값 (마지막 값)
        val_last = ser[-1]
        if len(y_stat) > 0:
            val_mean = np.nanmean(y_stat)
            val_max = np.nanmax(y_stat)
            # do plot
            label = "G{:d} {:s}, avg={:s}, max={:s}".format(
                i, value_fmt(val_last), value_fmt(val_mean), value_fmt(val_max)
            )
        else:
            # do plot
            label = "G{:d} {:s}".format(i, value_fmt(val_last))
        ax.plot(time_series - t_now, ser, label=label, **plot_kws)
    ax.set_title(title)
    ax.legend(loc="upper left", prop={"size": 8}, bbox_to_anchor=(1, 1))
    # ylim 상한 자동 계산, 15% 마진
    max_win = np.amax(values)
    max_win = max(min_ymax, max_win)
    ylim = ylim or [-max_win * 0.1, max_win * 1.1]
    ax.set_ylim(ylim)
    # ylim 내에 vertical line
    dyn_y_lim = ax.get_ylim()
    ax.vlines(-stat_seconds, *dyn_y_lim, ls="dashed", lw=1.0, color="k", alpha=0.5)
    ax.set_xlim([-plot_seconds, 0])


def nv_gpu_stat_draw(
    t_now=None,
    collect_file=None,
    values_dict=None,
    plot_configs=None,
    plot_context=None,
    hostname=None,
    figsize=(5.4, 7.4),
    dpi=100,
    plot_seconds=450.0,
    **kwargs
):
    if plot_context is None:
        plot_context = dict()

    t_now = t_now or time()
    hostname = hostname or gethostname()

    fig, axs = plt.subplots(4, 1, figsize=figsize, dpi=dpi)
    axs = axs.flatten()
    plot_context["fig"] = fig

    if plot_configs is None:
        plot_configs = dict(
            temperature_gpu=dict(
                title="temperature.gpu", value_fmt="{:>3.0f}", ylim=None
            ),
            utilization_gpu=dict(
                title="utilization.gpu [%]", value_fmt="{:>3.0f}%", ylim=None
            ),
            power_draw=dict(title="power.draw [W]", value_fmt="{:>3.0f}", ylim=None),
            memory_used=dict(title="memory.used [GB]", value_fmt="{:>4.1f}", ylim=None),
        )

    plot_kws = dict(lw=1, alpha=0.55)

    if values_dict is None:
        values_dict = nv_gpu_stat_query(collect_file=collect_file)

    timestamp = np.asarray(values_dict["timestamp"])
    temperature_gpu = np.asarray(values_dict["temperature_gpu"])
    utilization_gpu = np.asarray(values_dict["utilization_gpu"])
    power_draw = np.asarray(values_dict["power_draw"])
    memory_used = np.asarray(values_dict["memory_used"])
    memory_used = memory_used / 1024  # MiB ==> GB

    plot_select = (t_now - plot_seconds <= timestamp) & (timestamp <= t_now)
    if not np.any(plot_select):
        logger.warning("nv_gpu_stat_draw: no data")
        return

    timestamp = timestamp[plot_select]
    temperature_gpu = temperature_gpu[plot_select, :]
    utilization_gpu = utilization_gpu[plot_select, :]
    power_draw = power_draw[plot_select, :]
    memory_used = memory_used[plot_select, :]

    nv_gpu_plot_values(
        timestamp,
        utilization_gpu,
        ax=axs[0],
        t_now=t_now,
        plot_seconds=plot_seconds,
        plot_config=plot_configs["utilization_gpu"],
        plot_kws=plot_kws,
        **kwargs
    )
    nv_gpu_plot_values(
        timestamp,
        memory_used,
        ax=axs[1],
        t_now=t_now,
        plot_seconds=plot_seconds,
        plot_config=plot_configs["memory_used"],
        plot_kws=plot_kws,
        **kwargs
    )
    nv_gpu_plot_values(
        timestamp,
        power_draw,
        ax=axs[2],
        t_now=t_now,
        plot_seconds=plot_seconds,
        plot_config=plot_configs["power_draw"],
        plot_kws=plot_kws,
        **kwargs
    )
    nv_gpu_plot_values(
        timestamp,
        temperature_gpu,
        ax=axs[3],
        t_now=t_now,
        plot_seconds=plot_seconds,
        plot_config=plot_configs["temperature_gpu"],
        plot_kws=plot_kws,
        **kwargs
    )

    dtstr = datetime.fromtimestamp(t_now).strftime("%H:%M:%S")
    fig_title = "{:s}\n{:s}".format(hostname, dtstr)
    fig.suptitle(fig_title, fontsize=12)
    fig.tight_layout(rect=[0, 0.03, 1, 0.92])  # fig.tight_layout()
    plt.show()


def nv_gpu_stat_monitor(collect_file=None, interval=5.0, out=None, **kwargs):
    """
    usage:

    %matplotlib inline
    from nv_gpu_stat import nv_gpu_stat_monitor
    nv_gpu_stat_monitor()

    """
    from IPython.display import clear_output
    # from multiprocessing import Process
    from threading import Thread

    # child = Thread(target=(lambda: nv_gpu_stat_collect(verbose=1, collect_interval=2.0)))
    child = Thread(target=nv_gpu_stat_collect)
    child.start()

    try:
        with plt.ion():  # start interactive mode
            plot_context = None
            while True:
                t_now = time()
                collect = nv_gpu_stat_query(time_up_to=t_now, collect_file=collect_file)
                # 아직 collect_file 이 없으면, 대기
                if collect:
                    clear_output(wait=True)
                    nv_gpu_stat_draw(
                        t_now=t_now,
                        values_dict=collect,
                        plot_context=plot_context,
                        **kwargs
                    )
                t_next = t_now + interval
                t_sleep = t_next - time()
                if t_sleep > 0.0:
                    sleep(t_sleep)
    finally:
        child.join()

if __name__ == "__main__":
    try:
        get_ipython().run_line_magic("matplotlib", "inline")
    except:
        _, ex_val, _ = sys.exc_info()
        print(ex_val, file=sys.stderr)
        import matplotlib
        matplotlib.use("agg") # or Qt5Agg?

    try:
        nv_gpu_stat_monitor()
    finally:
        get_ipython().system("rm -fvr .??*.ipynb .ipynb_checkpoints __pycache__")
	## monitor GPU usage in jupyter notebook -- rhee.elten@gmail.com
	## https://gist.github.com/rhee-elten/1a1070e3a812ca863c3b937b5180b2f8
	# pylint: disable=invalid-name
	# pylint: disable=using-constant-test
	# pylint: disable=wrong-import-position
	# pylint: disable=missing-class-docstring
	# pylint: disable=missing-function-docstring
	if 10:
	import os

	os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # make sure
	os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

	if 10:
	import logging

	logging.basicConfig()
	logger = logging.getLogger("nv_gpu_stat")
	logger.setLevel(logging.DEBUG)

	# disable numpy warnings
	import warnings

	warnings.filterwarnings("ignore")

	import sys
	import io
	import os
	from os.path import expanduser, dirname, isfile
	from time import sleep, time
	from socket import gethostname
	from datetime import datetime
	from subprocess import run as subprocess_run
	import numpy as np
	from pandas import read_csv
	from h5py import File as h5file
	import matplotlib.pyplot as plt


	def nv_gpu_stat():
	"""
	example output:

	print(nv_gpu_stat().to_string(index=False))

	index name temperature.gpu utilization.gpu [%] power.draw [W] memory.used [MiB] memory.total [MiB]
	0 Tesla M40 24GB 17 0 17.46 0 24478
	1 Tesla M40 24GB 20 0 18.23 0 24478
	2 Tesla M40 24GB 19 0 18.53 0 24478
	3 Tesla M40 24GB 20 0 18.83 0 24478
	"""
	query_gpu = "--query-gpu=index,name,temperature.gpu,utilization.gpu,power.draw,memory.used,memory.total"
	query_format = "--format=csv,nounits"

	try:
	nvidia_smi = "nvidia-smi"
	proc = subprocess_run(
	[nvidia_smi, query_gpu, query_format], capture_output=True, check=False
	)
	except FileNotFoundError:
	nvidia_smi = "C:/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi"
	proc = subprocess_run(
	[nvidia_smi, query_gpu, query_format], capture_output=True, check=False
	)
	assert proc.returncode == 0, ("proc failure exitcode:", proc.returncode)
	return read_csv(io.StringIO(proc.stdout.decode("ascii")))


	NV_GPU_STAT_COLLECT_FILE = expanduser("~/.nv_gpu_stat/collect.h5")


	def nv_gpu_stat_collect(
	collect_file=None, collect_interval=0.5, collect_length=2048, verbose=0
	):
	def check_h5_file(f):
	if "timestamp" not in f:
	return False
	if "temperature_gpu" not in f:
	return False
	if "utilization_gpu" not in f:
	return False
	if "power_draw" not in f:
	return False
	if "memory_used" not in f:
	return False
	return True

	def initialize_h5_file(f, *, collect_length, num_gpus=None):
	"""
	## fill initial values into file if needed
	"""
	# call nv_gpu_stat to get num_gpus
	num_gpus = num_gpus or nv_gpu_stat().values.shape[0]
	f.create_dataset("timestamp", (collect_length,), np.float64)
	f["timestamp"][:] = np.nan
	f.create_dataset("temperature_gpu", (collect_length, num_gpus), np.float64)
	f["temperature_gpu"][:] = np.nan
	f.create_dataset("utilization_gpu", (collect_length, num_gpus), np.float64)
	f["utilization_gpu"][:] = np.nan
	f.create_dataset("power_draw", (collect_length, num_gpus), np.float64)
	f["power_draw"][:] = np.nan
	f.create_dataset("memory_used", (collect_length, num_gpus), np.float64)
	f["memory_used"][:] = np.nan

	def update_h5_file(f, gpu_stat):
	## read initial values
	timestamp = f["timestamp"][:]
	temperature_gpu = f["temperature_gpu"][:]
	utilization_gpu = f["utilization_gpu"][:]
	power_draw = f["power_draw"][:]
	memory_used = f["memory_used"][:]

	## roll
	timestamp = np.roll(timestamp, -1, axis=0)
	temperature_gpu = np.roll(temperature_gpu, -1, axis=0)
	utilization_gpu = np.roll(utilization_gpu, -1, axis=0)
	power_draw = np.roll(power_draw, -1, axis=0)
	memory_used = np.roll(memory_used, -1, axis=0)

	## put new measure at the last
	timestamp[-1] = t_now
	temperature_gpu[-1, :] = gpu_stat[" temperature.gpu"].values
	utilization_gpu[-1, :] = gpu_stat[" utilization.gpu [%]"].values
	power_draw[-1, :] = gpu_stat[" power.draw [W]"].values
	memory_used[-1, :] = gpu_stat[" memory.used [MiB]"].values

	## write back to h5 file
	f["timestamp"][:] = timestamp
	f["temperature_gpu"][:] = temperature_gpu
	f["utilization_gpu"][:] = utilization_gpu
	f["power_draw"][:] = power_draw
	f["memory_used"][:] = memory_used

	collect_file = collect_file or NV_GPU_STAT_COLLECT_FILE

	os.makedirs(dirname(collect_file), exist_ok=True)

	logger.debug("stat_collect: [1]open(a): %s", collect_file)

	try:
	with h5file(collect_file, "a") as f:
	if not check_h5_file(f):
	initialize_h5_file(f, collect_length=collect_length)

	while True:
	t_now = time()
	if verbose:
	logger.info(">>> nv_gpu_stat: time: %d", t_now)

	gpu_stat = nv_gpu_stat()

	update_h5_file(f, gpu_stat)

	t_next = t_now + collect_interval
	t_sleep = t_next - time()

	if t_sleep > 0.0:
	sleep(t_sleep)

	except OSError:
	logger.exception("h5py File open failed")


	def nv_gpu_stat_query(time_up_to=None, collect_file=None):
	time_up_to = time_up_to or time()
	collect_file = collect_file or NV_GPU_STAT_COLLECT_FILE

	if not isfile(collect_file):
	logger.debug("stat_query: file not exist: %s", collect_file)
	return None

	try:
	logger.debug("stat_query: open(r): %s", collect_file)
	with h5file(collect_file, "r") as f:
	timestamp = f["timestamp"][:]
	temperature_gpu = f["temperature_gpu"][:]
	utilization_gpu = f["utilization_gpu"][:]
	power_draw = f["power_draw"][:]
	memory_used = f["memory_used"][:]
	except:
	_, exc_value, _ = sys.exc_info()
	logger.exception("*** %s", exc_value)
	return None

	if time_up_to:
	in_range = timestamp <= time_up_to
	timestamp = timestamp[in_range]
	temperature_gpu = temperature_gpu[in_range]
	utilization_gpu = utilization_gpu[in_range]
	power_draw = power_draw[in_range]
	memory_used = memory_used[in_range]

	return dict(
	timestamp=timestamp,
	temperature_gpu=temperature_gpu,
	utilization_gpu=utilization_gpu,
	power_draw=power_draw,
	memory_used=memory_used,
	)


	def nv_gpu_plot_values(
	time_series,
	values,
	t_now=None,
	ax=None,
	plot_seconds=450.0,
	stat_seconds=60.0,
	plot_config=None,
	min_ymax=10.0,
	plot_kws=None,
	):
	plot_kws = plot_kws or dict()
	ax = ax or plt.gca()
	time_series = np.asarray(time_series)
	t_now = t_now or time_series[-1]
	values = np.asarray(values)
	plot_config = plot_config or dict(title="values", value_fmt="{:>5.1f}", ylim=None)
	title = plot_config["title"]
	value_fmt = plot_config["value_fmt"]
	if not callable(value_fmt):
	value_fmt = lambda x, _fmt=value_fmt: _fmt.format(x)
	ylim = plot_config["ylim"]

	if time_series[0] + stat_seconds > time_series[-1]:
	stat_seconds = time_series[-1] - time_series[0]

	stat_select = (t_now - stat_seconds <= time_series) & (time_series <= t_now)

	for i, ser in enumerate(np.transpose(values)):
	y_stat = ser[stat_select]
	# 현재값 (마지막 값)
	val_last = ser[-1]
	if len(y_stat) > 0:
	val_mean = np.nanmean(y_stat)
	val_max = np.nanmax(y_stat)
	# do plot
	label = "G{:d} {:s}, avg={:s}, max={:s}".format(
	i, value_fmt(val_last), value_fmt(val_mean), value_fmt(val_max)
	)
	else:
	# do plot
	label = "G{:d} {:s}".format(i, value_fmt(val_last))
	ax.plot(time_series - t_now, ser, label=label, **plot_kws)
	ax.set_title(title)
	ax.legend(loc="upper left", prop={"size": 8}, bbox_to_anchor=(1, 1))
	# ylim 상한 자동 계산, 15% 마진
	max_win = np.amax(values)
	max_win = max(min_ymax, max_win)
	ylim = ylim or [-max_win * 0.1, max_win * 1.1]
	ax.set_ylim(ylim)
	# ylim 내에 vertical line
	dyn_y_lim = ax.get_ylim()
	ax.vlines(-stat_seconds, *dyn_y_lim, ls="dashed", lw=1.0, color="k", alpha=0.5)
	ax.set_xlim([-plot_seconds, 0])


	def nv_gpu_stat_draw(
	t_now=None,
	collect_file=None,
	values_dict=None,
	plot_configs=None,
	plot_context=None,
	hostname=None,
	figsize=(5.4, 7.4),
	dpi=100,
	plot_seconds=450.0,
	**kwargs
	):
	if plot_context is None:
	plot_context = dict()

	t_now = t_now or time()
	hostname = hostname or gethostname()

	fig, axs = plt.subplots(4, 1, figsize=figsize, dpi=dpi)
	axs = axs.flatten()
	plot_context["fig"] = fig

	if plot_configs is None:
	plot_configs = dict(
	temperature_gpu=dict(
	title="temperature.gpu", value_fmt="{:>3.0f}", ylim=None
	),
	utilization_gpu=dict(
	title="utilization.gpu [%]", value_fmt="{:>3.0f}%", ylim=None
	),
	power_draw=dict(title="power.draw [W]", value_fmt="{:>3.0f}", ylim=None),
	memory_used=dict(title="memory.used [GB]", value_fmt="{:>4.1f}", ylim=None),
	)

	plot_kws = dict(lw=1, alpha=0.55)

	if values_dict is None:
	values_dict = nv_gpu_stat_query(collect_file=collect_file)

	timestamp = np.asarray(values_dict["timestamp"])
	temperature_gpu = np.asarray(values_dict["temperature_gpu"])
	utilization_gpu = np.asarray(values_dict["utilization_gpu"])
	power_draw = np.asarray(values_dict["power_draw"])
	memory_used = np.asarray(values_dict["memory_used"])
	memory_used = memory_used / 1024 # MiB ==> GB

	plot_select = (t_now - plot_seconds <= timestamp) & (timestamp <= t_now)
	if not np.any(plot_select):
	logger.warning("nv_gpu_stat_draw: no data")
	return

	timestamp = timestamp[plot_select]
	temperature_gpu = temperature_gpu[plot_select, :]
	utilization_gpu = utilization_gpu[plot_select, :]
	power_draw = power_draw[plot_select, :]
	memory_used = memory_used[plot_select, :]

	nv_gpu_plot_values(
	timestamp,
	utilization_gpu,
	ax=axs[0],
	t_now=t_now,
	plot_seconds=plot_seconds,
	plot_config=plot_configs["utilization_gpu"],
	plot_kws=plot_kws,
	**kwargs
	)
	nv_gpu_plot_values(
	timestamp,
	memory_used,
	ax=axs[1],
	t_now=t_now,
	plot_seconds=plot_seconds,
	plot_config=plot_configs["memory_used"],
	plot_kws=plot_kws,
	**kwargs
	)
	nv_gpu_plot_values(
	timestamp,
	power_draw,
	ax=axs[2],
	t_now=t_now,
	plot_seconds=plot_seconds,
	plot_config=plot_configs["power_draw"],
	plot_kws=plot_kws,
	**kwargs
	)
	nv_gpu_plot_values(
	timestamp,
	temperature_gpu,
	ax=axs[3],
	t_now=t_now,
	plot_seconds=plot_seconds,
	plot_config=plot_configs["temperature_gpu"],
	plot_kws=plot_kws,
	**kwargs
	)

	dtstr = datetime.fromtimestamp(t_now).strftime("%H:%M:%S")
	fig_title = "{:s}\n{:s}".format(hostname, dtstr)
	fig.suptitle(fig_title, fontsize=12)
	fig.tight_layout(rect=[0, 0.03, 1, 0.92]) # fig.tight_layout()
	plt.show()


	def nv_gpu_stat_monitor(collect_file=None, interval=5.0, out=None, **kwargs):
	"""
	usage:

	%matplotlib inline
	from nv_gpu_stat import nv_gpu_stat_monitor
	nv_gpu_stat_monitor()

	"""
	from IPython.display import clear_output
	# from multiprocessing import Process
	from threading import Thread

	# child = Thread(target=(lambda: nv_gpu_stat_collect(verbose=1, collect_interval=2.0)))
	child = Thread(target=nv_gpu_stat_collect)
	child.start()

	try:
	with plt.ion(): # start interactive mode
	plot_context = None
	while True:
	t_now = time()
	collect = nv_gpu_stat_query(time_up_to=t_now, collect_file=collect_file)
	# 아직 collect_file 이 없으면, 대기
	if collect:
	clear_output(wait=True)
	nv_gpu_stat_draw(
	t_now=t_now,
	values_dict=collect,
	plot_context=plot_context,
	**kwargs
	)
	t_next = t_now + interval
	t_sleep = t_next - time()
	if t_sleep > 0.0:
	sleep(t_sleep)
	finally:
	child.join()

	if __name__ == "__main__":
	try:
	get_ipython().run_line_magic("matplotlib", "inline")
	except:
	_, ex_val, _ = sys.exc_info()
	print(ex_val, file=sys.stderr)
	import matplotlib
	matplotlib.use("agg") # or Qt5Agg?

	try:
	nv_gpu_stat_monitor()
	finally:
	get_ipython().system("rm -fvr .??*.ipynb .ipynb_checkpoints __pycache__")