Skip to content

Instantly share code, notes, and snippets.

@alankrantas
Created April 23, 2025 13:58
Show Gist options
  • Save alankrantas/219d375cecc89aee2fe9f3eda4b6dfe8 to your computer and use it in GitHub Desktop.
Save alankrantas/219d375cecc89aee2fe9f3eda4b6dfe8 to your computer and use it in GitHub Desktop.
Simple Text Generation Streaming with HuggingFace Transformers Pipeline
'''
pip3 install huggingface_hub transformers[torch]
'''
model = 'your_hf_model_here' # gated models will need to request access: https://huggingface.co/docs/hub/models-gated
accessToken = 'your_hf_token_here' # for downloading model: https://huggingface.co/docs/hub/security-tokens
prompt = 'Tell me a random story.'
config = {
'max_new_tokens': 512,
'temperature': 0.2,
'top_p': 0.95,
'top_k': 30,
'repetition_penalty': 1.05,
'do_sample': True,
'return_full_text': False,
}
from huggingface_hub import snapshot_download
from transformers import pipeline, TextIteratorStreamer
import threading, os, sys, gc
gc.enable()
path = f'model/{model}'
# download model (skip if already exist)
if not os.path.isdir(path):
try:
snapshot_download(
repo_id=model,
repo_type='model',
local_dir=path,
token=accessToken
)
except Exception as e:
print(f'error on downloading {model}: {e}')
sys.exit()
# generating streaming text
try:
generator = pipeline(
task='text-generation',
model=path,
device=0,
)
streamer = TextIteratorStreamer(
tokenizer=generator.tokenizer,
skip_prompt=True,
)
thread = threading.Thread(target=generator, kwargs={
'text_inputs': [ # chat template - for instruct models only
[
{
'role': 'system',
'content': 'You are a helpful assistant.',
},
{
'role': 'user',
'content': prompt,
},
],
],
**config,
'streamer': streamer
})
thread.start()
for text in streamer:
print(text, end='')
print('')
except Exception as e:
print(f'error on generating: {e}')
finally:
thread.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment