msoftware/transformer_batch_test.py

## transformer_batch_test.py
#
# Copyright 2024 by Michael Jentsch
#
# https://www.jentsch.io/vorteile-der-batch-textgenerierung-gegenuber-der-sequenziellen-generierung-mehrerer-texte/
#

import torch
import time
from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, BitsAndBytesConfig

model = "deepseek-ai/deepseek-coder-7b-instruct-v1.5"

tokenizer = AutoTokenizer.from_pretrained(
    model,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16)

model = AutoModelForCausalLM.from_pretrained(
    model,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True)

model.eval()

prompt = """You are a helpful AI Java programming and documentation assistant. Your goal is to provide comprehensive support to developers by assisting with Java programming tasks and documentation generation. Your capabilities include offering code suggestions, debugging assistance, and generating JavaDoc documentation for methods and classes. Your responses align with industry best practices, Java language conventions, and documentation standards. Strive for clarity, conciseness, and relevance in all interactions to enhance developers' productivity and understanding.
### Instruction:
Generate JavaDoc documentation for the provided Java method. Ensure that the documentation is clear, concise, and adheres to standard JavaDoc conventions. Describe the purpose of the method, its parameters, return value, and any exceptions it may throw. Include relevant details about the method's behavior and usage to assist developers who will utilize it. Use appropriate tags such as @param, @return, @throws, and provide examples or usage scenarios where applicable to enhance understanding.
```java
@Override
	public List<Document> similaritySearch(SearchRequest request) {
		if (request.getFilterExpression() != null) {
			throw new UnsupportedOperationException(
					"The [" + this.getClass() + "] doesn't support metadata filtering!");
		}

		List<Double> userQueryEmbedding = getUserQueryEmbedding(request.getQuery());
		return this.store.values()
			.stream()
			.map(entry -> new Similarity(entry.getId(),
					EmbeddingMath.cosineSimilarity(userQueryEmbedding, entry.getEmbedding())))
			.filter(s -> s.score >= request.getSimilarityThreshold())
			.sorted(Comparator.<Similarity>comparingDouble(s -> s.score).reversed())
			.limit(request.getTopK())
			.map(s -> this.store.get(s.key))
			.toList();
	}
```
### Response:
"""

input_batch = [prompt,prompt,prompt,prompt,prompt]

start = time.time()

with torch.no_grad():
  tokens = tokenizer.batch_encode_plus(
      input_batch,
      padding=True,
      return_tensors="pt").to(model.device)

with torch.no_grad():
  outputs = model.generate(
       tokens['input_ids'],
       max_new_tokens=1024,
       do_sample=True,
       top_k = 2,
       top_p = 0.6,
       temperature=0.3,
       num_return_sequences=1,
       num_beams=1,
       eos_token_id=tokenizer.eos_token_id,
       pad_token_id=tokenizer.pad_token_id)

end = time.time()

print("Duration: {:10.2f} sec.".format(end - start))
	#
	# Copyright 2024 by Michael Jentsch
	#
	# https://www.jentsch.io/vorteile-der-batch-textgenerierung-gegenuber-der-sequenziellen-generierung-mehrerer-texte/
	#

	import torch
	import time
	from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, BitsAndBytesConfig

	model = "deepseek-ai/deepseek-coder-7b-instruct-v1.5"

	tokenizer = AutoTokenizer.from_pretrained(
	model,
	device_map="auto",
	trust_remote_code=True,
	torch_dtype=torch.float16)

	model = AutoModelForCausalLM.from_pretrained(
	model,
	device_map="auto",
	torch_dtype=torch.float16,
	trust_remote_code=True)

	model.eval()

	prompt = """You are a helpful AI Java programming and documentation assistant. Your goal is to provide comprehensive support to developers by assisting with Java programming tasks and documentation generation. Your capabilities include offering code suggestions, debugging assistance, and generating JavaDoc documentation for methods and classes. Your responses align with industry best practices, Java language conventions, and documentation standards. Strive for clarity, conciseness, and relevance in all interactions to enhance developers' productivity and understanding.
	### Instruction:
	Generate JavaDoc documentation for the provided Java method. Ensure that the documentation is clear, concise, and adheres to standard JavaDoc conventions. Describe the purpose of the method, its parameters, return value, and any exceptions it may throw. Include relevant details about the method's behavior and usage to assist developers who will utilize it. Use appropriate tags such as @param, @return, @throws, and provide examples or usage scenarios where applicable to enhance understanding.
	```java
	@Override
	public List<Document> similaritySearch(SearchRequest request) {
	if (request.getFilterExpression() != null) {
	throw new UnsupportedOperationException(
	"The [" + this.getClass() + "] doesn't support metadata filtering!");
	}

	List<Double> userQueryEmbedding = getUserQueryEmbedding(request.getQuery());
	return this.store.values()
	.stream()
	.map(entry -> new Similarity(entry.getId(),
	EmbeddingMath.cosineSimilarity(userQueryEmbedding, entry.getEmbedding())))
	.filter(s -> s.score >= request.getSimilarityThreshold())
	.sorted(Comparator.<Similarity>comparingDouble(s -> s.score).reversed())
	.limit(request.getTopK())
	.map(s -> this.store.get(s.key))
	.toList();
	}
	```
	### Response:
	"""

	input_batch = [prompt,prompt,prompt,prompt,prompt]

	start = time.time()

	with torch.no_grad():
	tokens = tokenizer.batch_encode_plus(
	input_batch,
	padding=True,
	return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	tokens['input_ids'],
	max_new_tokens=1024,
	do_sample=True,
	top_k = 2,
	top_p = 0.6,
	temperature=0.3,
	num_return_sequences=1,
	num_beams=1,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.pad_token_id)

	end = time.time()

	print("Duration: {:10.2f} sec.".format(end - start))