Skip to content

Instantly share code, notes, and snippets.

@alexcpn
Last active June 26, 2023 11:00
Show Gist options
  • Save alexcpn/7b72b35534712b7d06f7516a34401eb0 to your computer and use it in GitHub Desktop.
Save alexcpn/7b72b35534712b7d06f7516a34401eb0 to your computer and use it in GitHub Desktop.
How to freeze and train Huggingface models
model = AutoModelForSeq2SeqLM.from_pretrained(model_name,device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
#freeze decoder block
num_encoder_layers = len(model.encoder.block)
num_decoder_layers = len(model.decoder.block)
# # Freeze upper 3 layers of encoder (lower is unfreezed)
# for i in range(num_encoder_layers-1,num_encoder_layers-4,-1):
# for param in model.encoder.block[i].parameters():
# param.requires_grad = False
# Freeze all layers of decoder
# for i in range(num_decoder_layers):
# for param in model.decoder.block[i].parameters():
# param.requires_grad = False
# OR
# freeze everything
for param in model.parameters():
param.requires_grad = False
# # Un-Freeze lower 4 layers of encoder
# for i in range(0,4,1):
# for param in model.encoder.block[i].parameters():
# param.requires_grad = True
# Un-Freeze higher 1 layers of encoder
for i in range(num_decoder_layers-1,num_decoder_layers-2,-1):
for param in model.encoder.block[i].parameters():
param.requires_grad = True
for name, param in model.named_parameters():
print(name,param.requires_grad)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment