surak/output.txt

## output.txt
The activation script must be sourced, otherwise the virtual environment will not work.
Setting vars
Starting new training
############## 8
GPUS 0,1,2,3
############## 8
GPUS 0,1,2,3
############## 8
GPUS 0,1,2,3
############## 8
GPUS 0,1,2,3
############## 8
GPUS 0,1,2,3
############## 8
GPUS 0,1,2,3
############## 8
GPUS 0,1,2,3
############## 8
GPUS 0,1,2,3
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
The model and loaded state dict do not match exactly

size mismatch for pos_embed: copying a param with shape torch.Size([1, 589, 768]) from checkpoint, the shape in current model is torch.Size([1, 197, 768]).
unexpected key in source state_dict: mask_token, decoder_pos_embed, decoder_embed.weight, decoder_embed.bias, decoder_blocks.0.norm1.weight, decoder_blocks.0.norm1.bias, decoder_blocks.0.attn.qkv.weight, decoder_blocks.0.attn.qkv.bias, decoder_blocks.0.attn.proj.weight, decoder_blocks.0.attn.proj.bias, decoder_blocks.0.norm2.weight, decoder_blocks.0.norm2.bias, decoder_blocks.0.mlp.fc1.weight, decoder_blocks.0.mlp.fc1.bias, decoder_blocks.0.mlp.fc2.weight, decoder_blocks.0.mlp.fc2.bias, decoder_blocks.1.norm1.weight, decoder_blocks.1.norm1.bias, decoder_blocks.1.attn.qkv.weight, decoder_blocks.1.attn.qkv.bias, decoder_blocks.1.attn.proj.weight, decoder_blocks.1.attn.proj.bias, decoder_blocks.1.norm2.weight, decoder_blocks.1.norm2.bias, decoder_blocks.1.mlp.fc1.weight, decoder_blocks.1.mlp.fc1.bias, decoder_blocks.1.mlp.fc2.weight, decoder_blocks.1.mlp.fc2.bias, decoder_blocks.2.norm1.weight, decoder_blocks.2.norm1.bias, decoder_blocks.2.attn.qkv.weight, decoder_blocks.2.attn.qkv.bias, decoder_blocks.2.attn.proj.weight, decoder_blocks.2.attn.proj.bias, decoder_blocks.2.norm2.weight, decoder_blocks.2.norm2.bias, decoder_blocks.2.mlp.fc1.weight, decoder_blocks.2.mlp.fc1.bias, decoder_blocks.2.mlp.fc2.weight, decoder_blocks.2.mlp.fc2.bias, decoder_blocks.3.norm1.weight, decoder_blocks.3.norm1.bias, decoder_blocks.3.attn.qkv.weight, decoder_blocks.3.attn.qkv.bias, decoder_blocks.3.attn.proj.weight, decoder_blocks.3.attn.proj.bias, decoder_blocks.3.norm2.weight, decoder_blocks.3.norm2.bias, decoder_blocks.3.mlp.fc1.weight, decoder_blocks.3.mlp.fc1.bias, decoder_blocks.3.mlp.fc2.weight, decoder_blocks.3.mlp.fc2.bias, decoder_blocks.4.norm1.weight, decoder_blocks.4.norm1.bias, decoder_blocks.4.attn.qkv.weight, decoder_blocks.4.attn.qkv.bias, decoder_blocks.4.attn.proj.weight, decoder_blocks.4.attn.proj.bias, decoder_blocks.4.norm2.weight, decoder_blocks.4.norm2.bias, decoder_blocks.4.mlp.fc1.weight, decoder_blocks.4.mlp.fc1.bias, decoder_blocks.4.mlp.fc2.weight, decoder_blocks.4.mlp.fc2.bias, decoder_blocks.5.norm1.weight, decoder_blocks.5.norm1.bias, decoder_blocks.5.attn.qkv.weight, decoder_blocks.5.attn.qkv.bias, decoder_blocks.5.attn.proj.weight, decoder_blocks.5.attn.proj.bias, decoder_blocks.5.norm2.weight, decoder_blocks.5.norm2.bias, decoder_blocks.5.mlp.fc1.weight, decoder_blocks.5.mlp.fc1.bias, decoder_blocks.5.mlp.fc2.weight, decoder_blocks.5.mlp.fc2.bias, decoder_blocks.6.norm1.weight, decoder_blocks.6.norm1.bias, decoder_blocks.6.attn.qkv.weight, decoder_blocks.6.attn.qkv.bias, decoder_blocks.6.attn.proj.weight, decoder_blocks.6.attn.proj.bias, decoder_blocks.6.norm2.weight, decoder_blocks.6.norm2.bias, decoder_blocks.6.mlp.fc1.weight, decoder_blocks.6.mlp.fc1.bias, decoder_blocks.6.mlp.fc2.weight, decoder_blocks.6.mlp.fc2.bias, decoder_blocks.7.norm1.weight, decoder_blocks.7.norm1.bias, decoder_blocks.7.attn.qkv.weight, decoder_blocks.7.attn.qkv.bias, decoder_blocks.7.attn.proj.weight, decoder_blocks.7.attn.proj.bias, decoder_blocks.7.norm2.weight, decoder_blocks.7.norm2.bias, decoder_blocks.7.mlp.fc1.weight, decoder_blocks.7.mlp.fc1.bias, decoder_blocks.7.mlp.fc2.weight, decoder_blocks.7.mlp.fc2.bias, decoder_norm.weight, decoder_norm.bias, decoder_pred.weight, decoder_pred.bias
	The activation script must be sourced, otherwise the virtual environment will not work.
	Setting vars
	Starting new training
	############## 8
	GPUS 0,1,2,3
	############## 8
	GPUS 0,1,2,3
	############## 8
	GPUS 0,1,2,3
	############## 8
	GPUS 0,1,2,3
	############## 8
	GPUS 0,1,2,3
	############## 8
	GPUS 0,1,2,3
	############## 8
	GPUS 0,1,2,3
	############## 8
	GPUS 0,1,2,3
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load from /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	load checkpoint from local path: /p/project/training2411/strube1/HDCRS-school-2024/models/Prithvi_100M.pt
	The model and loaded state dict do not match exactly

	size mismatch for pos_embed: copying a param with shape torch.Size([1, 589, 768]) from checkpoint, the shape in current model is torch.Size([1, 197, 768]).
	unexpected key in source state_dict: mask_token, decoder_pos_embed, decoder_embed.weight, decoder_embed.bias, decoder_blocks.0.norm1.weight, decoder_blocks.0.norm1.bias, decoder_blocks.0.attn.qkv.weight, decoder_blocks.0.attn.qkv.bias, decoder_blocks.0.attn.proj.weight, decoder_blocks.0.attn.proj.bias, decoder_blocks.0.norm2.weight, decoder_blocks.0.norm2.bias, decoder_blocks.0.mlp.fc1.weight, decoder_blocks.0.mlp.fc1.bias, decoder_blocks.0.mlp.fc2.weight, decoder_blocks.0.mlp.fc2.bias, decoder_blocks.1.norm1.weight, decoder_blocks.1.norm1.bias, decoder_blocks.1.attn.qkv.weight, decoder_blocks.1.attn.qkv.bias, decoder_blocks.1.attn.proj.weight, decoder_blocks.1.attn.proj.bias, decoder_blocks.1.norm2.weight, decoder_blocks.1.norm2.bias, decoder_blocks.1.mlp.fc1.weight, decoder_blocks.1.mlp.fc1.bias, decoder_blocks.1.mlp.fc2.weight, decoder_blocks.1.mlp.fc2.bias, decoder_blocks.2.norm1.weight, decoder_blocks.2.norm1.bias, decoder_blocks.2.attn.qkv.weight, decoder_blocks.2.attn.qkv.bias, decoder_blocks.2.attn.proj.weight, decoder_blocks.2.attn.proj.bias, decoder_blocks.2.norm2.weight, decoder_blocks.2.norm2.bias, decoder_blocks.2.mlp.fc1.weight, decoder_blocks.2.mlp.fc1.bias, decoder_blocks.2.mlp.fc2.weight, decoder_blocks.2.mlp.fc2.bias, decoder_blocks.3.norm1.weight, decoder_blocks.3.norm1.bias, decoder_blocks.3.attn.qkv.weight, decoder_blocks.3.attn.qkv.bias, decoder_blocks.3.attn.proj.weight, decoder_blocks.3.attn.proj.bias, decoder_blocks.3.norm2.weight, decoder_blocks.3.norm2.bias, decoder_blocks.3.mlp.fc1.weight, decoder_blocks.3.mlp.fc1.bias, decoder_blocks.3.mlp.fc2.weight, decoder_blocks.3.mlp.fc2.bias, decoder_blocks.4.norm1.weight, decoder_blocks.4.norm1.bias, decoder_blocks.4.attn.qkv.weight, decoder_blocks.4.attn.qkv.bias, decoder_blocks.4.attn.proj.weight, decoder_blocks.4.attn.proj.bias, decoder_blocks.4.norm2.weight, decoder_blocks.4.norm2.bias, decoder_blocks.4.mlp.fc1.weight, decoder_blocks.4.mlp.fc1.bias, decoder_blocks.4.mlp.fc2.weight, decoder_blocks.4.mlp.fc2.bias, decoder_blocks.5.norm1.weight, decoder_blocks.5.norm1.bias, decoder_blocks.5.attn.qkv.weight, decoder_blocks.5.attn.qkv.bias, decoder_blocks.5.attn.proj.weight, decoder_blocks.5.attn.proj.bias, decoder_blocks.5.norm2.weight, decoder_blocks.5.norm2.bias, decoder_blocks.5.mlp.fc1.weight, decoder_blocks.5.mlp.fc1.bias, decoder_blocks.5.mlp.fc2.weight, decoder_blocks.5.mlp.fc2.bias, decoder_blocks.6.norm1.weight, decoder_blocks.6.norm1.bias, decoder_blocks.6.attn.qkv.weight, decoder_blocks.6.attn.qkv.bias, decoder_blocks.6.attn.proj.weight, decoder_blocks.6.attn.proj.bias, decoder_blocks.6.norm2.weight, decoder_blocks.6.norm2.bias, decoder_blocks.6.mlp.fc1.weight, decoder_blocks.6.mlp.fc1.bias, decoder_blocks.6.mlp.fc2.weight, decoder_blocks.6.mlp.fc2.bias, decoder_blocks.7.norm1.weight, decoder_blocks.7.norm1.bias, decoder_blocks.7.attn.qkv.weight, decoder_blocks.7.attn.qkv.bias, decoder_blocks.7.attn.proj.weight, decoder_blocks.7.attn.proj.bias, decoder_blocks.7.norm2.weight, decoder_blocks.7.norm2.bias, decoder_blocks.7.mlp.fc1.weight, decoder_blocks.7.mlp.fc1.bias, decoder_blocks.7.mlp.fc2.weight, decoder_blocks.7.mlp.fc2.bias, decoder_norm.weight, decoder_norm.bias, decoder_pred.weight, decoder_pred.bias