Created
July 6, 2023 08:11
-
-
Save sieu-n/88542733914f80f780359f5c82c99a62 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from multiprocessing.pool import ThreadPool | |
import datasets | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate, | |
SystemMessagePromptTemplate) | |
from tqdm import tqdm | |
from enum import Enum | |
class Colors(Enum): | |
RED = "\033[91m" | |
GREEN = "\033[92m" | |
YELLOW = "\033[93m" | |
BLUE = "\033[94m" | |
MAGENTA = "\033[95m" | |
CYAN = "\033[96m" | |
GREY = "\033[90m" | |
RESET = "\033[0m" | |
# Function to print text with color | |
def print_color(text, color, *args, **kwargs): | |
print(f"{color.value}{text}{Colors.RESET.value}", *args, **kwargs) | |
PROMPT_TEMPLATES = { | |
"ko": ChatPromptTemplate.from_messages( | |
[ | |
SystemMessagePromptTemplate.from_template( | |
"You are a helpful assistant fluent in English and Korean." | |
), | |
HumanMessagePromptTemplate.from_template( | |
"Translate the following text to Korean. Show the answer only. text=```{instruction}```" | |
), | |
] | |
), | |
"ko-fluent": ChatPromptTemplate.from_messages( | |
[ | |
SystemMessagePromptTemplate.from_template( | |
"You are a helpful assistant fluent in English and Korean." | |
), | |
HumanMessagePromptTemplate.from_template( | |
"Translate the following text to Korean. Show the answer only. 말 그대로 번역하지 말고, 의미가 유지되는 한에서 자연스러운 요청으로 번역해줘. text=```{instruction}```" | |
), | |
] | |
), | |
"ja": ChatPromptTemplate.from_messages( | |
[ | |
SystemMessagePromptTemplate.from_template( | |
"You are a helpful assistant fluent in English and Japanese." | |
), | |
HumanMessagePromptTemplate.from_template( | |
"Translate the following text to Japanese. Show the answer only. text=```{instruction}```" | |
), | |
] | |
), | |
"ja-fluent": ChatPromptTemplate.from_messages( | |
[ | |
SystemMessagePromptTemplate.from_template( | |
"You are a helpful assistant fluent in English and Japanese." | |
), | |
HumanMessagePromptTemplate.from_template( | |
"Translate the following text to Japanese. Show the answer only. このテキストを直訳するのではなく、その意味を保持しつつ、より自然なリクエストに言い換えて翻訳してください text=```{instruction}```" | |
), | |
] | |
), | |
} | |
cur_prompt = "ja-fluent" | |
DATASET_NAME = "alpaca_eval" | |
VERBOSE = True | |
N_THREADS = 16 | |
path = f"./evaluation/output/230705_{cur_prompt}_alpacaeval.json" | |
def generate(sample): | |
idx, sample = sample | |
instruction = sample["instruction"] | |
prompt = PROMPT_TEMPLATES[cur_prompt].format_messages(instruction=instruction) | |
chat = ChatOpenAI(temperature=0, model_name="gpt-4") | |
resp = chat.predict_messages(prompt).content.rstrip() | |
if resp.startswith("```") and instruction.endswith("```"): | |
# Remove only the starting and ending backticks | |
resp = resp[3:-3] # This strips the first and last three characters. | |
if VERBOSE: | |
print_color(f"(#{idx}) Human: ", Colors.GREEN, end="") | |
print(instruction) | |
print_color(f"(#{idx}) GPT: ", Colors.RED, end="") | |
print_color("-------------------------", Colors.GREY) | |
print(resp) | |
print_color("--------------------------------", Colors.GREY) | |
return resp | |
# handle the exception, or just pass | |
if __name__ == "__main__": | |
eval_set = datasets.load_dataset("tatsu-lab/alpaca_eval", "alpaca_eval")["eval"] | |
res = [] | |
with ThreadPool(N_THREADS) as pool: | |
im = pool.imap(generate, enumerate(eval_set)) | |
for out in tqdm(im, total=len(eval_set)): | |
# this returns in order. this is for tqdm | |
res.append(out) | |
to_save = [] | |
for idx, sample in enumerate(eval_set): | |
sample["instruction"] = res[idx] | |
to_save.append(sample) | |
with open(path, "w") as f: | |
json.dump(to_save, f, ensure_ascii=False, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment