This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import Ernie4_5TokenizerFast | |
hf_fast_tok = Ernie4_5TokenizerFast.from_pretrained("baidu/ERNIE-4.5-0.3B-Base-PT", from_slow=True) | |
hf_fast_tok.model_max_length = 131072 | |
hf_fast_tok.init_kwargs.pop("auto_map", None) | |
hf_fast_tok.init_kwargs.pop("use_default_system_prompt", None) | |
hf_fast_tok.init_kwargs.pop("legacy", None) | |
hf_fast_tok.init_kwargs.pop("sp_model_kwargs", None) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Using https://github.com/vasqu/dia/tree/hf-next""" | |
import dac | |
import soundfile as sf | |
from datasets import Audio, load_dataset | |
from dia.model import Dia | |
# prepare dac |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dac | |
import torch | |
from datasets import Audio, load_dataset | |
from transformers import AutoProcessor, DacModel | |
model = DacModel.from_pretrained("descript/dac_44khz") | |
dac_model_path = dac.utils.download() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorWithFlattening | |
model_id = "meta-llama/Llama-3.2-1B" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
tokenizer.pad_token = tokenizer.eos_token | |
collator = DataCollatorWithFlattening(return_flash_attn_kwargs=True) |