Skip to content

Instantly share code, notes, and snippets.

@stefanonardo
Last active February 28, 2024 19:21
Show Gist options
  • Save stefanonardo/693d96ceb2f531fa05db530f3e21517d to your computer and use it in GitHub Desktop.
Save stefanonardo/693d96ceb2f531fa05db530f3e21517d to your computer and use it in GitHub Desktop.
Early Stopping PyTorch
# MIT License
#
# Copyright (c) 2018 Stefano Nardo https://gist.github.com/stefanonardo
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class EarlyStopping(object):
def __init__(self, mode='min', min_delta=0, patience=10, percentage=False):
self.mode = mode
self.min_delta = min_delta
self.patience = patience
self.best = None
self.num_bad_epochs = 0
self.is_better = None
self._init_is_better(mode, min_delta, percentage)
if patience == 0:
self.is_better = lambda a, b: True
self.step = lambda a: False
def step(self, metrics):
if self.best is None:
self.best = metrics
return False
if torch.isnan(metrics):
return True
if self.is_better(metrics, self.best):
self.num_bad_epochs = 0
self.best = metrics
else:
self.num_bad_epochs += 1
if self.num_bad_epochs >= self.patience:
return True
return False
def _init_is_better(self, mode, min_delta, percentage):
if mode not in {'min', 'max'}:
raise ValueError('mode ' + mode + ' is unknown!')
if not percentage:
if mode == 'min':
self.is_better = lambda a, best: a < best - min_delta
if mode == 'max':
self.is_better = lambda a, best: a > best + min_delta
else:
if mode == 'min':
self.is_better = lambda a, best: a < best - (
best * min_delta / 100)
if mode == 'max':
self.is_better = lambda a, best: a > best + (
best * min_delta / 100)
@laihaoran
Copy link

How can I use it in my code? Very Thanks.

@pennz
Copy link

pennz commented Jul 2, 2019

Thanks for your script, which is easy to use and with little overhead, Stefano.

For usage, here is an example:

...
        es = EarlyStopping(patience=5)

        num_epochs = 100
        for epoch in range(num_epochs):
            train_one_epoch(model, data_loader)  # train the model for one epoch, on training set
            metric = eval(model, data_loader_dev)  # evalution on dev set (i.e., holdout from training)
            if es.step(metric):
                break  # early stop criterion is met, we can stop now

...

@jbh1128d1
Copy link

I followed the above example and got the following error:


train_log loss:
training   (min:    0.087, max:    0.087, cur:    0.087)

train_accuracy:
training   (min:    0.084, max:    0.084, cur:    0.084)
test       (min:    0.547, max:    0.547, cur:    0.547)
test       (min:    0.342, max:    0.342, cur:    0.342)
1 100
1 200
1 300
1 400
1 500
1 600
1 700
1 800
1 900
1 1000
{'train_log loss': 0.08505982160568237, 'train_accuracy': 0.08596525341272354}
1 100
1 200
1 300
1 400
1 500
1 600
1 700
1 800
1 900
1 1000
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-47-46b97a2ae203> in <module>
     72 
     73 
---> 74 train_model(model = combined_model, criterion = criterion, optimizer = optimizer, num_epochs=500)

<ipython-input-47-46b97a2ae203> in train_model(model, criterion, optimizer, num_epochs)
     52                     torch.save(combined_model.state_dict()
     53                                , 'D:\\CIS inspection images 0318\\self_build\\combined_model_1.pt') 
---> 54                 if es.step(epoch_acc):
     55                     break
     56 

<ipython-input-44-a6e50955f13f> in step(self, metrics)
     18             return False
     19 
---> 20         if np.isnan(metrics):
     21             return True
     22 

~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\tensor.py in __array__(self, dtype)
    484     def __array__(self, dtype=None):
    485         if dtype is None:
--> 486             return self.numpy()
    487         else:
    488             return self.numpy().astype(dtype, copy=False)

TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

Here is the model loop:

es = EarlyStopping(patience = 10)

def train_model(model, criterion, optimizer, num_epochs=10):
    liveloss = PlotLosses(series_fmt={'training': '{}', 'test':'test_{}'})
    max_accuracy = 0
    for epoch in range(num_epochs):
        logs = {}
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for b, (image, label, policy, categorical_data) in enumerate(dataloaders[phase]):
                image = image.cuda()
                label = label.cuda()
                #numerical_data = numerical_data.cuda()
                categorical_data = categorical_data.cuda()

                outputs = model(image, categorical_data)
                loss = criterion(outputs, label)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.detach() * image.size(0)
                running_corrects += torch.sum(preds == label.data)
                
                b += 1
                if b % print_interval == 0:
                    print(epoch, b)
                    
                if b == max_trn_batch:
                    break

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)
            
            prefix = ''
            if phase == 'test':
                prefix = 'test_'
                if epoch_acc > max_accuracy:
                    max_accuracy = epoch_acc
                    torch.save(combined_model.state_dict()
                               , 'D:\\CIS inspection images 0318\\self_build\\combined_model_1.pt') 
                if es.step(epoch_acc):
                    break
                    
            else:
                prefix = 'train_'
                
           

            logs[prefix + 'log loss'] = epoch_loss.item()
            logs[prefix + 'accuracy'] = epoch_acc.item()
            print(logs)
         
        
        liveloss.update(logs)
        liveloss.draw()
    
    print(max_accuracy)
    scheduler.step(loss)

@stefanonardo
Copy link
Author

Try replacing np.isnan with torch.isnan

@jbh1128d1
Copy link

jbh1128d1 commented Feb 10, 2020 via email

@kvipinkumar
Copy link

Thank you for this script!

@jakobamb
Copy link

Hi @stefanonardo , how should I credit you and what is the license of the script?

@stefanonardo
Copy link
Author

Hi @stefanonardo , how should I credit you and what is the license of the script?

Hi @jakobamb, thank you for asking. I added a license.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment