-
-
Save wassname/42aba7168bb83e278fcfea87e70fa3af to your computer and use it in GitHub Desktop.
[tool.poetry] | |
name = "strer_llms" | |
version = "0.1.0" | |
description = "" | |
authors = [""] | |
license = "MIT" | |
readme = "README.md" | |
[tool.poetry.dependencies] | |
python = ">=3.10,<3.13" | |
torch = {version = "^2.2.2+cu121", source = "pytorch"} | |
simple-parsing = "^0.1.4" | |
tqdm = "^4.66.1" | |
numpy = "^1.26.1" | |
pandas = "^2.1.1" | |
lightning = "^2.1.0" | |
matplotlib = "^3.8.0" | |
loguru = "^0.7.2" | |
einops = "^0.7.0" | |
scikit-learn = "^1.3.1" | |
pytorch-optimizer = "^2.12.0" | |
torchinfo = "^1.8.0" | |
transformers-stream-generator = "^0.0.5" | |
tiktoken = "^0.6.0" | |
baukit = {git = "https://github.com/davidbau/baukit"} | |
jaxtyping = "^0.2.28" | |
colorama = "^0.4.6" | |
pytest = "^8.2.0" | |
outlines = "^0.0.41" | |
[[tool.poetry.source]] | |
name = "pytorch" | |
url = "https://download.pytorch.org/whl/cu121" | |
priority = "explicit" | |
[tool.poetry.group.dev.dependencies] | |
ipykernel = "^6.25.2" | |
ruff = "^0.1.3" | |
pylama = "^8.4.1" | |
[build-system] | |
requires = ["poetry-core"] | |
build-backend = "poetry.core.masonry.api" |
Note that you can just do torch.save(FILE_PATH, model.state_dict()) as with any PyTorch model. - Neel Nanda
I meet this bro,Is it because I packed the wrong bag?
RuntimeError Traceback (most recent call last)
----> 8 model.save_pretrained("C:/Users/Administrator/Desktop/outputs/")
2575 f"The weights trying to be saved contained shared tensors {error_names} that are mismatching the transformers base configuration. Try saving using safe_serialization=False or remove this tensor sharing.",
RuntimeError: The weights trying to be saved contained shared tensors [{'model.layers.31.self_attn.q_proj.weight', 'model.layers.31.self_attn.o_proj.weight'}, {'model.layers.31.self_attn.k_proj.weight', 'model.layers.31.self_attn.v_proj.weight'}, {'model.layers.31.mlp.gate_proj.weight', 'model.layers.31.mlp.down_proj.weight', 'model.layers.31.mlp.up_proj.weight'}, {'model.layers.31.input_layernorm.weight', 'model.norm.weight', 'model.layers.31.post_attention_layernorm.weight'}] that are mismatching the transformers base configuration. Try saving using safe_serialization=False or remove this tensor sharing.
idk, somethings not compatible, but it's hard to know what's changed for you
it might be worth trying this newer lib https://github.com/FailSpy/abliterator, failspy has some great code and has gone further than me at this point
idk, somethings not compatible, but it's hard to know what's changed for you
it might be worth trying this newer lib https://github.com/FailSpy/abliterator, failspy has some great code and has gone further than me at this point
I solved the problem by run model in single card. thank you for sharing the new library haha.
Yes the intervention is broken, I couldn't work out how to get interventions working effectively with baukit. But the merged part seems good, which seems more important.