mzbac/gist:b12f322dbeb7c9ef229d10b18d509951 Secret

## gistfile1.txt
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments,BitsAndBytesConfig
from datasets import load_dataset
import json

model_name ="meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

dataset = load_dataset("glaiveai/glaive-function-calling-v2",split="train")

def cleanup(input_string):
    arguments_index = input_string.find('"arguments"')

    if arguments_index == -1:
        return input_string

    start_quote = input_string.find("'", arguments_index)

    if start_quote == -1:
        return input_string

    end_quote = input_string.rfind("'")

    if end_quote == -1 or end_quote <= start_quote:
        return input_string

    arguments_value = input_string[start_quote+1:end_quote]

    output_string = input_string[:start_quote] + arguments_value + input_string[end_quote+1:]

    return output_string

def formatting_prompts_func(example):
    output_texts = []

    for i in range(len(example['system'])):
        messages = [
            {
                "role": "system",
                "content": example['system'][i][len("SYSTEM:"):].strip(),
            },
        ]
        conversations = example['chat'][i].split("<|endoftext|>")
        for message in conversations:
            continue_outer = False
            message = message.strip()
            if message:
                if "USER:" in message:
                    user_content = message.split("ASSISTANT:")[0].strip()
                    messages.append({"role": "user", "content": user_content[5:].strip()})

                    if "ASSISTANT:" in message:
                        assistant_content = message.split("ASSISTANT:")[1].strip()
                        if "<functioncall>" in assistant_content:
                            text = assistant_content.replace("<functioncall>","").strip()
                            json_str = cleanup(text)
                            try:
                                data = json.loads(json_str)
                            except json.JSONDecodeError as e:
                                print(f"0 - Failed to decode JSON: {json_str} - {assistant_content}")
                                continue_outer = True
                                break

                            new_func_text = "<functioncall> "+ json_str
                            messages.append({"role": "assistant", "content": new_func_text})
                        else:
                            messages.append({"role": "assistant", "content": assistant_content})
                elif message.startswith("FUNCTION RESPONSE:"):
                    function_response = message[18:].strip()
                    if "ASSISTANT:" in function_response:
                        function_content, assistant_content = function_response.split("ASSISTANT:")
                        try:
                            data = json.loads(function_content.strip())
                        except json.JSONDecodeError as e:
                            print(f"1 - Failed to decode JSON: {function_content}")
                            continue_outer = True
                            break

                        messages.append({"role": "user", "content": function_content.strip()})
                        messages.append({"role": "assistant", "content": assistant_content.strip()})
                    else:
                        try:
                            data = json.loads(function_response.strip())
                        except json.JSONDecodeError as e:
                            print(f"2 - Failed to decode JSON: {function_response}")
                            continue_outer = True
                            break
                        messages.append({"role": "user", "content": function_response.strip()})
                elif message.startswith("ASSISTANT:"):
                    assistant_content = message.split("ASSISTANT:")[1].strip()
                    if "<functioncall>" in assistant_content:
                        text = assistant_content.replace("<functioncall>","").strip()
                        json_str = cleanup(text)
                        try:
                            data = json.loads(json_str)
                        except json.JSONDecodeError as e:
                            print(f"3 - Failed to decode JSON: {json_str} - {assistant_content}")
                            continue_outer = True
                            break
                        new_func_text = "<functioncall> "+ json_str
                        messages.append({"role": "assistant", "content": new_func_text})
        if continue_outer:
            continue
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
        output_texts.append(text)
    del example['system']
    del example['chat']
    return {"text": output_texts}
dataset = dataset.map(formatting_prompts_func, batched=True)
	from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments,BitsAndBytesConfig
	from datasets import load_dataset
	import json

	model_name ="meta-llama/Meta-Llama-3-8B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	dataset = load_dataset("glaiveai/glaive-function-calling-v2",split="train")

	def cleanup(input_string):
	arguments_index = input_string.find('"arguments"')

	if arguments_index == -1:
	return input_string

	start_quote = input_string.find("'", arguments_index)

	if start_quote == -1:
	return input_string

	end_quote = input_string.rfind("'")

	if end_quote == -1 or end_quote <= start_quote:
	return input_string

	arguments_value = input_string[start_quote+1:end_quote]

	output_string = input_string[:start_quote] + arguments_value + input_string[end_quote+1:]

	return output_string

	def formatting_prompts_func(example):
	output_texts = []

	for i in range(len(example['system'])):
	messages = [
	{
	"role": "system",
	"content": example['system'][i][len("SYSTEM:"):].strip(),
	},
	]
	conversations = example['chat'][i].split("<\|endoftext\|>")
	for message in conversations:
	continue_outer = False
	message = message.strip()
	if message:
	if "USER:" in message:
	user_content = message.split("ASSISTANT:")[0].strip()
	messages.append({"role": "user", "content": user_content[5:].strip()})

	if "ASSISTANT:" in message:
	assistant_content = message.split("ASSISTANT:")[1].strip()
	if "<functioncall>" in assistant_content:
	text = assistant_content.replace("<functioncall>","").strip()
	json_str = cleanup(text)
	try:
	data = json.loads(json_str)
	except json.JSONDecodeError as e:
	print(f"0 - Failed to decode JSON: {json_str} - {assistant_content}")
	continue_outer = True
	break

	new_func_text = "<functioncall> "+ json_str
	messages.append({"role": "assistant", "content": new_func_text})
	else:
	messages.append({"role": "assistant", "content": assistant_content})
	elif message.startswith("FUNCTION RESPONSE:"):
	function_response = message[18:].strip()
	if "ASSISTANT:" in function_response:
	function_content, assistant_content = function_response.split("ASSISTANT:")
	try:
	data = json.loads(function_content.strip())
	except json.JSONDecodeError as e:
	print(f"1 - Failed to decode JSON: {function_content}")
	continue_outer = True
	break

	messages.append({"role": "user", "content": function_content.strip()})
	messages.append({"role": "assistant", "content": assistant_content.strip()})
	else:
	try:
	data = json.loads(function_response.strip())
	except json.JSONDecodeError as e:
	print(f"2 - Failed to decode JSON: {function_response}")
	continue_outer = True
	break
	messages.append({"role": "user", "content": function_response.strip()})
	elif message.startswith("ASSISTANT:"):
	assistant_content = message.split("ASSISTANT:")[1].strip()
	if "<functioncall>" in assistant_content:
	text = assistant_content.replace("<functioncall>","").strip()
	json_str = cleanup(text)
	try:
	data = json.loads(json_str)
	except json.JSONDecodeError as e:
	print(f"3 - Failed to decode JSON: {json_str} - {assistant_content}")
	continue_outer = True
	break
	new_func_text = "<functioncall> "+ json_str
	messages.append({"role": "assistant", "content": new_func_text})
	if continue_outer:
	continue
	text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
	output_texts.append(text)
	del example['system']
	del example['chat']
	return {"text": output_texts}
	dataset = dataset.map(formatting_prompts_func, batched=True)