Skip to content

Instantly share code, notes, and snippets.

@gpupo
Created June 6, 2024 19:39
Show Gist options
  • Save gpupo/b57e4ee399d4693b1b2bb7cc680e8add to your computer and use it in GitHub Desktop.
Save gpupo/b57e4ee399d4693b1b2bb7cc680e8add to your computer and use it in GitHub Desktop.
PyTorch usando CPU e treinamento paralelizado
### Model development and training
# Import libraries and dataset
import torch
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch import optim
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.multiprocessing as mp
from torch.utils.data import DataLoader
# Definir o dispositivo como CPU
device = torch.device("cpu")
print("Using device:", device)
# Define neural network, __init__ and forward functions
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(128 * 4 * 4, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, 10)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = self.pool(F.relu(self.bn2(self.conv2(x))))
x = self.pool(F.relu(self.bn3(self.conv3(x))))
x = x.view(-1, 128 * 4 * 4)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# Instantiate the Model
net = Net().to(device)
# Define the Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
# Load and transform the data
transform = transforms.Compose(
[transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, padding=4),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True)
testset = CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=8, pin_memory=True)
# Função de treinamento paralelizado
def train(net, trainloader, criterion, optimizer, device):
for epoch in range(25): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a tuple of [inputs, labels]
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 200 == 199: # print every 200 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
running_loss = 0.0
print('Finished Training')
if __name__ == '__main__':
mp.set_start_method('spawn') # Para evitar erros de fork
net.share_memory() # Modelo compartilhado entre processos
processes = []
for rank in range(mp.cpu_count()):
p = mp.Process(target=train, args=(net, trainloader, criterion, optimizer, device))
p.start()
processes.append(p)
for p in processes:
p.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment