Skip to content

Instantly share code, notes, and snippets.

@sieu-n
Created July 6, 2023 08:11
Show Gist options
  • Save sieu-n/88542733914f80f780359f5c82c99a62 to your computer and use it in GitHub Desktop.
Save sieu-n/88542733914f80f780359f5c82c99a62 to your computer and use it in GitHub Desktop.
import json
from multiprocessing.pool import ThreadPool
import datasets
from langchain.chat_models import ChatOpenAI
from langchain.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate,
SystemMessagePromptTemplate)
from tqdm import tqdm
from enum import Enum
class Colors(Enum):
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
MAGENTA = "\033[95m"
CYAN = "\033[96m"
GREY = "\033[90m"
RESET = "\033[0m"
# Function to print text with color
def print_color(text, color, *args, **kwargs):
print(f"{color.value}{text}{Colors.RESET.value}", *args, **kwargs)
PROMPT_TEMPLATES = {
"ko": ChatPromptTemplate.from_messages(
[
SystemMessagePromptTemplate.from_template(
"You are a helpful assistant fluent in English and Korean."
),
HumanMessagePromptTemplate.from_template(
"Translate the following text to Korean. Show the answer only. text=```{instruction}```"
),
]
),
"ko-fluent": ChatPromptTemplate.from_messages(
[
SystemMessagePromptTemplate.from_template(
"You are a helpful assistant fluent in English and Korean."
),
HumanMessagePromptTemplate.from_template(
"Translate the following text to Korean. Show the answer only. 말 그대로 번역하지 말고, 의미가 유지되는 한에서 자연스러운 요청으로 번역해줘. text=```{instruction}```"
),
]
),
"ja": ChatPromptTemplate.from_messages(
[
SystemMessagePromptTemplate.from_template(
"You are a helpful assistant fluent in English and Japanese."
),
HumanMessagePromptTemplate.from_template(
"Translate the following text to Japanese. Show the answer only. text=```{instruction}```"
),
]
),
"ja-fluent": ChatPromptTemplate.from_messages(
[
SystemMessagePromptTemplate.from_template(
"You are a helpful assistant fluent in English and Japanese."
),
HumanMessagePromptTemplate.from_template(
"Translate the following text to Japanese. Show the answer only. このテキストを直訳するのではなく、その意味を保持しつつ、より自然なリクエストに言い換えて翻訳してください text=```{instruction}```"
),
]
),
}
cur_prompt = "ja-fluent"
DATASET_NAME = "alpaca_eval"
VERBOSE = True
N_THREADS = 16
path = f"./evaluation/output/230705_{cur_prompt}_alpacaeval.json"
def generate(sample):
idx, sample = sample
instruction = sample["instruction"]
prompt = PROMPT_TEMPLATES[cur_prompt].format_messages(instruction=instruction)
chat = ChatOpenAI(temperature=0, model_name="gpt-4")
resp = chat.predict_messages(prompt).content.rstrip()
if resp.startswith("```") and instruction.endswith("```"):
# Remove only the starting and ending backticks
resp = resp[3:-3] # This strips the first and last three characters.
if VERBOSE:
print_color(f"(#{idx}) Human: ", Colors.GREEN, end="")
print(instruction)
print_color(f"(#{idx}) GPT: ", Colors.RED, end="")
print_color("-------------------------", Colors.GREY)
print(resp)
print_color("--------------------------------", Colors.GREY)
return resp
# handle the exception, or just pass
if __name__ == "__main__":
eval_set = datasets.load_dataset("tatsu-lab/alpaca_eval", "alpaca_eval")["eval"]
res = []
with ThreadPool(N_THREADS) as pool:
im = pool.imap(generate, enumerate(eval_set))
for out in tqdm(im, total=len(eval_set)):
# this returns in order. this is for tqdm
res.append(out)
to_save = []
for idx, sample in enumerate(eval_set):
sample["instruction"] = res[idx]
to_save.append(sample)
with open(path, "w") as f:
json.dump(to_save, f, ensure_ascii=False, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment