Skip to content

Instantly share code, notes, and snippets.

@brodzik
Created December 13, 2019 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brodzik/09b723569c3201470ae93ccefc463777 to your computer and use it in GitHub Desktop.
Save brodzik/09b723569c3201470ae93ccefc463777 to your computer and use it in GitHub Desktop.
import os
import random
import cv2
import numpy as np
import pandas as pd
import torch
import torch.cuda
import torch.nn as nn
from albumentations import *
from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import train_test_split
from torch.nn.utils import clip_grad_norm_
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm
SEED = 42
EPOCHS = 100
BATCH_SIZE = 16
LEARNING_RATE = 0.0001
GRADIENT_ACCUMULATION = 4
GRADIENT_CLIP = 0.1
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DTYPE = torch.float32
CSV_FILE = "../input/aptos2019-blindness-detection/train.csv"
TRAIN_DIR = "../input/aptos2019-blindness-detection/train_images"
TEST_DIR = "../input/aptos2019-blindness-detection/train_images"
TRAIN_TRANSFORM = Compose([
HorizontalFlip(),
RandomBrightnessContrast(),
Resize(height=256, width=256),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
TEST_TRANSFORM = Compose([
Resize(height=256, width=256),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
def seed_everything(seed):
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
def make_train_test_split(csv_file):
df = pd.read_csv(csv_file)
return train_test_split(df, test_size=0.2, random_state=SEED, shuffle=True, stratify=df["diagnosis"])
class MyDataset(Dataset):
def __init__(self, df, img_dir, transform=None):
self.df = df
self.img_dir = img_dir
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
img = cv2.imread(os.path.join(self.img_dir, self.df.iloc[idx]["id_code"] + ".png"))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if self.transform:
aug = self.transform(image=img)
img = aug["image"]
return torch.Tensor(img).permute(2, 0, 1), torch.Tensor([self.df.iloc[idx]["diagnosis"]])
def save_checkpoint(model, epoch):
torch.save(model.state_dict(), "checkpoint_epoch_{}.pth".format(epoch))
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group["lr"]
def main():
seed_everything(SEED)
train, test = make_train_test_split(CSV_FILE)
train_dataset = MyDataset(train, TRAIN_DIR, TRAIN_TRANSFORM)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_dataset = MyDataset(test, TEST_DIR, TEST_TRANSFORM)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
model = EfficientNet.from_pretrained("efficientnet-b3", num_classes=1)
model = model.to(device=DEVICE, dtype=DTYPE)
criterion = nn.SmoothL1Loss()
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=3, min_lr=1e-8, verbose=True)
log = pd.DataFrame(columns=["epoch", "train_loss", "test_loss", "learning_rate"])
for epoch in range(EPOCHS):
tqdm_loader = tqdm(train_loader)
model = model.train()
train_loss = 0
for batch, (X, y) in enumerate(tqdm_loader):
X = X.to(device=DEVICE, dtype=DTYPE)
y = y.to(device=DEVICE, dtype=DTYPE)
y_pred = model(X)
loss = criterion(y_pred, y)
loss.backward()
if batch % GRADIENT_ACCUMULATION == GRADIENT_ACCUMULATION - 1:
clip_grad_norm_(model.parameters(), GRADIENT_CLIP)
optimizer.step()
optimizer.zero_grad()
train_loss = (train_loss * batch + loss.item()) / (batch + 1)
tqdm_loader.set_description("[TRAIN] Epoch: {} Loss: {:.4f} LR: {}".format(epoch, train_loss, get_lr(optimizer)))
tqdm_loader = tqdm(test_loader)
model = model.eval()
test_loss = 0
with torch.no_grad():
for batch, (X, y) in enumerate(tqdm_loader):
X = X.to(device=DEVICE, dtype=DTYPE)
y = y.to(device=DEVICE, dtype=DTYPE)
y_pred = model(X)
loss = criterion(y_pred, y)
test_loss = (test_loss * batch + loss.item()) / (batch + 1)
tqdm_loader.set_description("[TEST ] Epoch: {} Loss: {:.4f} LR: {}".format(epoch, test_loss, get_lr(optimizer)))
save_checkpoint(model, epoch)
log = log.append({"epoch": epoch, "train_loss": train_loss, "test_loss": test_loss, "learning_rate": get_lr(optimizer)}, ignore_index=True)
log["epoch"] = log["epoch"].astype(int)
log.to_csv("log.csv", index=False)
scheduler.step(test_loss)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment