-
-
Save wassname/42aba7168bb83e278fcfea87e70fa3af to your computer and use it in GitHub Desktop.
[tool.poetry] | |
name = "strer_llms" | |
version = "0.1.0" | |
description = "" | |
authors = [""] | |
license = "MIT" | |
readme = "README.md" | |
[tool.poetry.dependencies] | |
python = ">=3.10,<3.13" | |
torch = {version = "^2.2.2+cu121", source = "pytorch"} | |
simple-parsing = "^0.1.4" | |
tqdm = "^4.66.1" | |
numpy = "^1.26.1" | |
pandas = "^2.1.1" | |
lightning = "^2.1.0" | |
matplotlib = "^3.8.0" | |
loguru = "^0.7.2" | |
einops = "^0.7.0" | |
scikit-learn = "^1.3.1" | |
pytorch-optimizer = "^2.12.0" | |
torchinfo = "^1.8.0" | |
transformers-stream-generator = "^0.0.5" | |
tiktoken = "^0.6.0" | |
baukit = {git = "https://github.com/davidbau/baukit"} | |
jaxtyping = "^0.2.28" | |
colorama = "^0.4.6" | |
pytest = "^8.2.0" | |
outlines = "^0.0.41" | |
[[tool.poetry.source]] | |
name = "pytorch" | |
url = "https://download.pytorch.org/whl/cu121" | |
priority = "explicit" | |
[tool.poetry.group.dev.dependencies] | |
ipykernel = "^6.25.2" | |
ruff = "^0.1.3" | |
pylama = "^8.4.1" | |
[build-system] | |
requires = ["poetry-core"] | |
build-backend = "poetry.core.masonry.api" |
https://huggingface.co/wassname/meta-llama-3-8b-instruct-helpfull
perplexity -m lmstudio-community/Meta-Llama-3-8B-Instruct-Q6_K.gguf -b 32 -c 512 -f wiki.test.raw
# Final estimate: PPL = 7.5588 +/- 0.05599
perplexity -m wassname/meta-llama-3-8b-instruct-extra_helpfull_Q6_K.gguf -b 32 -c 512 -f wiki.test.raw
# Final estimate: PPL = 9.0920 +/- 0.06815
perplexity -m cognitivecomputations/dolphin-2.9-llama3-8b-q5_K_M.gguf -b 32 -c 512 -f wiki.test.raw
# Final estimate: PPL = 9.9277 +/- 0.08261
will you release llama 70b as well? that would be amazing!
Probably not I'm afraid, my gpu is too small and my time is too limited. But I'm sure others will jump in with time
winglian:is the most recent version of the notebook broken? running it as is doesn't seem to ablate the refusals. The intervention generations still refuse, but the fully "merged" orthogonalized ones are more helpful.
Yes the intervention is broken, I couldn't work out how to get interventions working effectively with baukit. But the merged part seems good, which seems more important.
Note that you can just do torch.save(FILE_PATH, model.state_dict()) as with any PyTorch model. - Neel Nanda
I meet this bro,Is it because I packed the wrong bag?
RuntimeError Traceback (most recent call last)
----> 8 model.save_pretrained("C:/Users/Administrator/Desktop/outputs/")
2575 f"The weights trying to be saved contained shared tensors {error_names} that are mismatching the transformers base configuration. Try saving using safe_serialization=False or remove this tensor sharing.",
RuntimeError: The weights trying to be saved contained shared tensors [{'model.layers.31.self_attn.q_proj.weight', 'model.layers.31.self_attn.o_proj.weight'}, {'model.layers.31.self_attn.k_proj.weight', 'model.layers.31.self_attn.v_proj.weight'}, {'model.layers.31.mlp.gate_proj.weight', 'model.layers.31.mlp.down_proj.weight', 'model.layers.31.mlp.up_proj.weight'}, {'model.layers.31.input_layernorm.weight', 'model.norm.weight', 'model.layers.31.post_attention_layernorm.weight'}] that are mismatching the transformers base configuration. Try saving using safe_serialization=False or remove this tensor sharing.
idk, somethings not compatible, but it's hard to know what's changed for you
it might be worth trying this newer lib https://github.com/FailSpy/abliterator, failspy has some great code and has gone further than me at this point
idk, somethings not compatible, but it's hard to know what's changed for you
it might be worth trying this newer lib https://github.com/FailSpy/abliterator, failspy has some great code and has gone further than me at this point
I solved the problem by run model in single card. thank you for sharing the new library haha.
Check out the updated script (at the same URL). I converted it to Baukit (which is a simpler library) and made several changes to simplify it. The result now saves directly and appears to encounter fewer issues.
I did introduce some complexity; it now gets the direction for each block, rather than just block N. It seems like Llama might require that, considering it probably involves more complex concepts in its residual stream because it's smarter than Qwen and Gemma. However, this is just my speculation.
I still haven't run any benchmarks on the result and probably won't have time to. However, I do wonder if it degrades or improves upon baseline performance.
For anyone who is enjoying increasing their knowledge of this field, check out these intros:
Related discussions:
To understand why many people (including me) are worried that a superintelligent AI might kill us all see this intro. There are many orgs that are working on this who support open source! We want the good ending, not the bad one, join us.