lucataco/safeBloom.py

## safeBloom.py
import os
import datetime
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
import torch

sf_filename = hf_hub_download("bigscience/bloom-560m", filename="model.safetensors")
pt_filename = hf_hub_download("bigscience/bloom-560m", filename="pytorch_model.bin")


start_st = datetime.datetime.now()
weights = load_file(sf_filename, device="cpu")
load_time_st = datetime.datetime.now() - start_st
print(f"Loaded safetensors {load_time_st}")

start_pt = datetime.datetime.now()
weights = torch.load(pt_filename, map_location="cpu")
load_time_pt = datetime.datetime.now() - start_pt
print(f"Loaded pytorch {load_time_pt}")

print(f"on CPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")


# This is required because this feature hasn't been fully verified yet, but
# it's been tested on many different environments
os.environ["SAFETENSORS_FAST_GPU"] = "1"

# CUDA startup out of the measurement
torch.zeros((2, 2)).cuda()

start_st = datetime.datetime.now()
weights = load_file(sf_filename, device="cuda:0")
load_time_st = datetime.datetime.now() - start_st
print(f"Loaded safetensors {load_time_st}")

start_pt = datetime.datetime.now()
weights = torch.load(pt_filename, map_location="cuda:0")
load_time_pt = datetime.datetime.now() - start_pt
print(f"Loaded pytorch {load_time_pt}")

print(f"on GPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")
	import os
	import datetime
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file
	import torch

	sf_filename = hf_hub_download("bigscience/bloom-560m", filename="model.safetensors")
	pt_filename = hf_hub_download("bigscience/bloom-560m", filename="pytorch_model.bin")


	start_st = datetime.datetime.now()
	weights = load_file(sf_filename, device="cpu")
	load_time_st = datetime.datetime.now() - start_st
	print(f"Loaded safetensors {load_time_st}")

	start_pt = datetime.datetime.now()
	weights = torch.load(pt_filename, map_location="cpu")
	load_time_pt = datetime.datetime.now() - start_pt
	print(f"Loaded pytorch {load_time_pt}")

	print(f"on CPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")


	# This is required because this feature hasn't been fully verified yet, but
	# it's been tested on many different environments
	os.environ["SAFETENSORS_FAST_GPU"] = "1"

	# CUDA startup out of the measurement
	torch.zeros((2, 2)).cuda()

	start_st = datetime.datetime.now()
	weights = load_file(sf_filename, device="cuda:0")
	load_time_st = datetime.datetime.now() - start_st
	print(f"Loaded safetensors {load_time_st}")

	start_pt = datetime.datetime.now()
	weights = torch.load(pt_filename, map_location="cuda:0")
	load_time_pt = datetime.datetime.now() - start_pt
	print(f"Loaded pytorch {load_time_pt}")

	print(f"on GPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X")