alankrantas/huggingface_transformers_pipeline.py

## huggingface_transformers_pipeline.py
'''
pip3 install huggingface_hub transformers[torch]
'''


model = 'your_hf_model_here'  # gated models will need to request access: https://huggingface.co/docs/hub/models-gated

accessToken = 'your_hf_token_here'  # for downloading model: https://huggingface.co/docs/hub/security-tokens

prompt = 'Tell me a random story.'

config = {
    'max_new_tokens': 512,
    'temperature': 0.2,
    'top_p': 0.95,
    'top_k': 30,
    'repetition_penalty': 1.05,
    'do_sample': True,
    'return_full_text': False,
}


from huggingface_hub import snapshot_download
from transformers import pipeline, TextIteratorStreamer
import threading, os, sys, gc


gc.enable()
path = f'model/{model}'


# download model (skip if already exist)

if not os.path.isdir(path):
    try:
        snapshot_download(
            repo_id=model,
            repo_type='model',
            local_dir=path,
            token=accessToken
        )
    except Exception as e:
        print(f'error on downloading {model}: {e}')
        sys.exit()


# generating streaming text

try:
    generator = pipeline(
        task='text-generation',
        model=path,
        device=0,
    )

    streamer = TextIteratorStreamer(
        tokenizer=generator.tokenizer,
        skip_prompt=True,
    )

    thread = threading.Thread(target=generator, kwargs={
        'text_inputs': [  # chat template - for instruct models only
            [
                {
                    'role': 'system',
                    'content': 'You are a helpful assistant.',
                },
                {
                    'role': 'user',
                    'content': prompt,
                },
            ],
        ],
        **config,
        'streamer': streamer
    })

    thread.start()

    for text in streamer:
        print(text, end='')
    print('')


except Exception as e:
    print(f'error on generating: {e}')

finally:
    thread.join()
	'''
	pip3 install huggingface_hub transformers[torch]
	'''


	model = 'your_hf_model_here' # gated models will need to request access: https://huggingface.co/docs/hub/models-gated

	accessToken = 'your_hf_token_here' # for downloading model: https://huggingface.co/docs/hub/security-tokens

	prompt = 'Tell me a random story.'

	config = {
	'max_new_tokens': 512,
	'temperature': 0.2,
	'top_p': 0.95,
	'top_k': 30,
	'repetition_penalty': 1.05,
	'do_sample': True,
	'return_full_text': False,
	}


	from huggingface_hub import snapshot_download
	from transformers import pipeline, TextIteratorStreamer
	import threading, os, sys, gc


	gc.enable()
	path = f'model/{model}'


	# download model (skip if already exist)

	if not os.path.isdir(path):
	try:
	snapshot_download(
	repo_id=model,
	repo_type='model',
	local_dir=path,
	token=accessToken
	)
	except Exception as e:
	print(f'error on downloading {model}: {e}')
	sys.exit()


	# generating streaming text

	try:
	generator = pipeline(
	task='text-generation',
	model=path,
	device=0,
	)

	streamer = TextIteratorStreamer(
	tokenizer=generator.tokenizer,
	skip_prompt=True,
	)

	thread = threading.Thread(target=generator, kwargs={
	'text_inputs': [ # chat template - for instruct models only
	[
	{
	'role': 'system',
	'content': 'You are a helpful assistant.',
	},
	{
	'role': 'user',
	'content': prompt,
	},
	],
	],
	**config,
	'streamer': streamer
	})

	thread.start()

	for text in streamer:
	print(text, end='')
	print('')


	except Exception as e:
	print(f'error on generating: {e}')

	finally:
	thread.join()