Skip to content

Instantly share code, notes, and snippets.

from torch import nn
from torchtune.utils import get_memory_stats, get_device
from torchao.dtypes.nf4tensor import to_nf4
from bitsandbytes.functional import quantize_nf4
def main():
device = get_device('cuda')
# Size of Llama3-8B output projection weight
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
pipeline,
logging,
GenerationConfig,
def validate_state_dict_for_lora(
*,
lora_modules: List[str],
full_model_state_dict_keys: List[str],
lora_state_dict_keys: Optional[List[str]] = None,
base_model_state_dict_keys: Optional[List[str]] = None,
):
is_lora_param = lambda x: "lora" in x and any([k in x for k in lora_modules])
for k in full_model_state_dict_keys:
if not is_lora_param(k):
_IncompatibleKeys(missing_keys=['classifier.model.0.weight', 'classifier.model.0.bias', 'classifier.model.3.weight', 'classifier.model.3.bias'], unexpected_keys=['image_codebook.encoder.blocks.input.w', 'image_codebook.encoder.blocks.input.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_1.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_1.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_2.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_2.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_3.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_3.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_4.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_4.b', 'image_codebook.encoder.blocks.group_1.block_2.res_path.conv_1.w', 'image_codebook.encoder.blocks.group_1.block_2.res_path.conv_1.b', 'image_codebook.encoder.blocks.group_1.block_2.res_path.conv_2.w', 'image_codebook.encoder.blocks.group_1.bloc
_IncompatibleKeys(missing_keys=['classifier.model.0.weight', 'classifier.model.0.bias', 'classifier.model.3.weight', 'classifier.model.3.bias'], unexpected_keys=['image_codebook.encoder.blocks.input.w', 'image_codebook.encoder.blocks.input.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_1.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_1.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_2.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_2.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_3.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_3.b', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_4.w', 'image_codebook.encoder.blocks.group_1.block_1.res_path.conv_4.b', 'image_codebook.encoder.blocks.group_1.block_2.res_path.conv_1.w', 'image_codebook.encoder.blocks.group_1.block_2.res_path.conv_1.b', 'image_codebook.encoder.blocks.group_1.block_2.res_path.conv_2.w', 'image_codebook.encoder.blocks.group_1.bloc