Skip to content

Instantly share code, notes, and snippets.

@vsoch
Last active July 11, 2019 16:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsoch/bf13c7eb1318503effd22e0e0b4190bf to your computer and use it in GitHub Desktop.
Save vsoch/bf13c7eb1318503effd22e0e0b4190bf to your computer and use it in GitHub Desktop.
Example of using the monitor_gpu decorator
{
"commits": [
"7ea1152603baef5f0012905f321586399f816958"
],
"dates": [
"2019-07-11 09:51:36 -0700"
],
"content": [
{
"nvml_driver_version": "418.67",
"nvml_system_nvml_version": "10.418.67",
"nvml_deviceCount": 1,
"nvml_unit_count": 0,
"devices": {
"Tesla V100-SXM2-32GB": {
"nvml_device_board_id": 6656,
"nvml_device_multi_gpu_board": 0,
"nvml_device_brand": 2,
"nvml_device_serial": "0323518083147",
"nvml_device_set_cpu_affinite": null,
"nvml_device_minor_number": 0,
"nvml_device_uuid": "GPU-be7b9ac8-f75e-1960-c52d-85429b4c86b1",
"nvml_device_inforom_version": "G503.0203.00.04",
"nvml_device_inforam_checksum": 0,
"nvml_device_display_mode": 1,
"nvml_device_display_active": 0,
"nvml_device_persistence_mode": 1,
"nvml_device_supported_memory_clocks": [
877
],
"nvml_device_performance_state": 0,
"nvml_device_management_mode": 1,
"nvml_device_power_managerment_mode": 1,
"nvml_device_power_management_limit": 300000,
"nvml_device_power_management_limit_constraints": [
150000,
300000
],
"nvml_device_power_management_default_limit": 300000,
"nvml_device_enforced_power_limit": 300000,
"nvml_device_power_usage": 43955,
"nvml_device_memory_info": {
"free": 34058207232,
"total": 34058272768,
"used": 65536
},
"nvml_device_bar1_memory_info": {
"bar1Free": 34357047296,
"bar1Total": 34359738368,
"bar1Used": 2691072
},
"nvml_device_compute_mode": 3,
"nvml_device_ecc_mode": [
1,
1
],
"nvml_device_current_ecc_mode": 1,
"nvml_device_pending_ecc_mode": 1,
"nvml_device_utilization_rates": {
"gpu": 0,
"memory": 0
},
"nvml_device_encoder_utilization": [
0,
167000
],
"nvml_device_decoder_utilization": [
0,
167000
],
"nvml_device_pci_replay_counter": 0,
"nvml_device_vbios_version": "88.00.43.00.03",
"nvml_device_compute_running_processes": [],
"nvml_device_grapics_running_processes": [],
"nvml_device_supported_event_types": 31,
"nvml_device_current_pcie_link_generation": 3,
"nvml_device_max_pcie_link_generation": 3,
"nvml_device_curr_pcie_link_width": 16,
"nvml_device_max_pcie_link_width": 16,
"nvml_device_supported_clocks_throttle_reasons": 511,
"nvml_device_current_clocks_throttle_reasons": 1,
"nvml_device_index": 0,
"nvml_device_accounting_mode": 0,
"nvml_device_accounting_pids": [],
"nvml_device_accounting_buffer_size": 4000
}
},
"SECONDS": "3"
},
{
"nvml_driver_version": "418.67",
"nvml_system_nvml_version": "10.418.67",
"nvml_deviceCount": 1,
"nvml_unit_count": 0,
"devices": {
"Tesla V100-SXM2-32GB": {
"nvml_device_board_id": 6656,
"nvml_device_multi_gpu_board": 0,
"nvml_device_brand": 2,
"nvml_device_serial": "0323518083147",
"nvml_device_set_cpu_affinite": null,
"nvml_device_minor_number": 0,
"nvml_device_uuid": "GPU-be7b9ac8-f75e-1960-c52d-85429b4c86b1",
"nvml_device_inforom_version": "G503.0203.00.04",
"nvml_device_inforam_checksum": 0,
"nvml_device_display_mode": 1,
"nvml_device_display_active": 0,
"nvml_device_persistence_mode": 1,
"nvml_device_supported_memory_clocks": [
877
],
"nvml_device_performance_state": 0,
"nvml_device_management_mode": 1,
"nvml_device_power_managerment_mode": 1,
"nvml_device_power_management_limit": 300000,
"nvml_device_power_management_limit_constraints": [
150000,
300000
],
"nvml_device_power_management_default_limit": 300000,
"nvml_device_enforced_power_limit": 300000,
"nvml_device_power_usage": 43955,
"nvml_device_memory_info": {
"free": 34058207232,
"total": 34058272768,
"used": 65536
},
"nvml_device_bar1_memory_info": {
"bar1Free": 34357047296,
"bar1Total": 34359738368,
"bar1Used": 2691072
},
"nvml_device_compute_mode": 3,
"nvml_device_ecc_mode": [
1,
1
],
"nvml_device_current_ecc_mode": 1,
"nvml_device_pending_ecc_mode": 1,
"nvml_device_utilization_rates": {
"gpu": 0,
"memory": 0
},
"nvml_device_encoder_utilization": [
0,
167000
],
"nvml_device_decoder_utilization": [
0,
167000
],
"nvml_device_pci_replay_counter": 0,
"nvml_device_vbios_version": "88.00.43.00.03",
"nvml_device_compute_running_processes": [],
"nvml_device_grapics_running_processes": [],
"nvml_device_supported_event_types": 31,
"nvml_device_current_pcie_link_generation": 3,
"nvml_device_max_pcie_link_generation": 3,
"nvml_device_curr_pcie_link_width": 16,
"nvml_device_max_pcie_link_width": 16,
"nvml_device_supported_clocks_throttle_reasons": 511,
"nvml_device_current_clocks_throttle_reasons": 1,
"nvml_device_index": 0,
"nvml_device_accounting_mode": 0,
"nvml_device_accounting_pids": [],
"nvml_device_accounting_buffer_size": 4000
}
},
"SECONDS": "3"
}
]
}
#!/usr/bin/env python
from watchme.watchers.gpu.decorators import monitor_gpu
from time import sleep
# Here we create a decorator to monitor "myfunc." Specifically, we:
# - want to use the watcher "decorator" that already exists. If we want to
# create on the fly, we would set creat=True
# - will record metrics every 3 seconds
# - to have somewhat of an impact on system resources we make a long list
# - we test to ensure that something is returned ("Hello!")
@monitor_gpu('gpu', seconds=3)
def myfunc(iters, pause):
long_list = []
print("Generating a long list, pause is %s and iters is %s" % (pause, iters))
for i in range(iters):
long_list = long_list + (i*10)*['pancakes']
print("i is %s, sleeping %s seconds" % (i, pause))
sleep(pause)
return len(long_list)
# ensure the function runs when the file is called
if __name__ == '__main__':
print("Calling myfunc with 2 iters")
result = myfunc(2, 2)
print("Result list has length %s" % result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment