del object_using_gpu
torch.cuda.empty_cache()
sz = a.element_size() * a.nelement()
sz_gb = sz >> 30
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor
from torch.distributions import uniform
rng = uniform.Uniform(-bound,bound)
t = rng.sample((2,3))
import torch, time
n = 1000
if torch.cuda.is_available():
device = torch.device("cuda")
elif torch.backends.mps.is_available():
device = torch.device("mps")
else:
device = torch.device('cpu')
A = torch.randn(n,n)
B = torch.randn(n,n)
gA = A.to(device)
gB = B.to(device)
start_time = time.time()
nrm = torch.matmul(A, B).norm()
print(" CPU took {} seconds ".format(time.time() - start_time))
print(" norm = ",nrm)
for iter in range(2):
start_time = time.time()
gnrm = torch.matmul(gA, gB).norm()
print(" GPU took {} seconds ".format(time.time() - start_time))
print(" norm = ",gnrm)
https://pytorch.org/docs/master/notes/mps.html
Check MPS backend:
torch.backends.mps.is_available()
torch.backends.mps.is_built()
cd E:\Anaconda2\envs\pytorch\Scripts
python visdom
target_net.load_state_dict( source_net.state_dict() )
qvalues = torch.rand((5, 5), requires_grad=True)
y = torch.LongTensor([1, 3])
new_qvalues = qvalues[:, y]
qvalues_a = new_qvalues ** 2
qvalues_a.backward(torch.ones(new_qvalues.shape), retain_graph=True)
# The outputs should be the same
print(qvalues.grad)
print(new_qvalues*2)
def shownet(netfile):
net = np.load(netfile)
for m in net:
if '_b' in m:
continue
fig,axes = plt.subplots(1,4,figsize=(12,5))
for axi in range(4):
mat = net[m][axi]
im = axes[axi].matshow(mat, interpolation='nearest', aspect='auto')
axes[axi].axis('off')
#fig.colorbar(im, ax=axes[axi])
fig.subplots_adjust(right=0.8)
cax = fig.add_axes([0.875,0.15,0.02,0.7])
fig.colorbar(im,cax=cax)
fig.suptitle(m)
plt.show()
next(model.parameters()).device.type
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
numparams += sum([np.prod(p.size()) for p in model_parameters])