Last active
October 30, 2023 18:12
-
-
Save alexandrehptavares/e571af2b15676a57c93deb6d931942f2 to your computer and use it in GitHub Desktop.
Fine-tune a GPT model using Langflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"name":"Fine-tune GPT","description":"Building Intelligent Interactions.","data":{"nodes":[{"width":384,"height":623,"id":"ChatOpenAI-do85j","type":"genericNode","position":{"x":-1113.0013036522153,"y":322.9219767794165},"data":{"type":"ChatOpenAI","node":{"template":{"callbacks":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"callbacks","advanced":false,"dynamic":false,"info":"","type":"langchain.callbacks.base.BaseCallbackHandler","list":true},"cache":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"cache","advanced":false,"dynamic":false,"info":"","type":"bool","list":false},"client":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"client","advanced":false,"dynamic":false,"info":"","type":"Any","list":false},"max_retries":{"required":false,"placeholder":"","show":false,"multiline":false,"value":6,"password":false,"name":"max_retries","advanced":false,"dynamic":false,"info":"","type":"int","list":false},"max_tokens":{"required":false,"placeholder":"","show":true,"multiline":false,"password":true,"name":"max_tokens","advanced":false,"dynamic":false,"info":"","type":"int","list":false,"value":""},"metadata":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"metadata","advanced":false,"dynamic":false,"info":"","type":"dict","list":false},"model_kwargs":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"model_kwargs","advanced":true,"dynamic":false,"info":"","type":"dict","list":false},"model_name":{"required":false,"placeholder":"","show":true,"multiline":false,"value":"gpt-3.5-turbo","password":false,"options":["gpt-3.5-turbo-0613","gpt-3.5-turbo","gpt-3.5-turbo-16k-0613","gpt-3.5-turbo-16k","gpt-4-0613","gpt-4-32k-0613","gpt-4","gpt-4-32k"],"name":"model_name","advanced":false,"dynamic":false,"info":"","type":"str","list":true},"n":{"required":false,"placeholder":"","show":false,"multiline":false,"value":1,"password":false,"name":"n","advanced":false,"dynamic":false,"info":"","type":"int","list":false},"openai_api_base":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"openai_api_base","display_name":"OpenAI API Base","advanced":false,"dynamic":false,"info":"\nThe base URL of the OpenAI API. Defaults to https://api.openai.com/v1.\n\nYou can change this to use other APIs like JinaChat, LocalAI and Prem.\n","type":"str","list":false,"value":""},"openai_api_key":{"required":false,"placeholder":"","show":true,"multiline":false,"value":"","password":true,"name":"openai_api_key","display_name":"OpenAI API Key","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"openai_organization":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"openai_organization","display_name":"OpenAI Organization","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"openai_proxy":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"openai_proxy","display_name":"OpenAI Proxy","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"request_timeout":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"request_timeout","advanced":false,"dynamic":false,"info":"","type":"float","list":false},"streaming":{"required":false,"placeholder":"","show":false,"multiline":false,"value":false,"password":false,"name":"streaming","advanced":false,"dynamic":false,"info":"","type":"bool","list":false},"tags":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"tags","advanced":false,"dynamic":false,"info":"","type":"str","list":true},"temperature":{"required":false,"placeholder":"","show":true,"multiline":false,"value":0.7,"password":false,"name":"temperature","advanced":false,"dynamic":false,"info":"","type":"float","list":false},"tiktoken_model_name":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"tiktoken_model_name","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"verbose":{"required":false,"placeholder":"","show":false,"multiline":false,"value":false,"password":false,"name":"verbose","advanced":false,"dynamic":false,"info":"","type":"bool","list":false},"_type":"ChatOpenAI"},"description":"`OpenAI` Chat large language models API.","base_classes":["BaseChatModel","BaseLanguageModel","ChatOpenAI","BaseLLM"],"display_name":"ChatOpenAI","custom_fields":{},"output_types":[],"documentation":"https://python.langchain.com/docs/modules/model_io/models/chat/integrations/openai","beta":false,"error":null},"id":"ChatOpenAI-do85j"},"selected":false,"positionAbsolute":{"x":-1113.0013036522153,"y":322.9219767794165},"dragging":false},{"width":384,"height":727,"id":"CustomComponent-SNGd7","type":"genericNode","position":{"x":-592.8543094958106,"y":323.07654372746487},"data":{"type":"CustomComponent","node":{"template":{"code":{"dynamic":true,"required":true,"placeholder":"","show":true,"multiline":true,"value":"from typing import Optional\nfrom langflow import CustomComponent\nfrom langflow.template.field.base import TemplateField\nfrom langchain.llms import HuggingFaceEndpoint\nfrom langchain.llms.base import BaseLLM\nimport openai\nimport pickle\nimport pandas as pd\nimport json\nimport time\n\nclass FineTuneGPTComponent(CustomComponent):\n display_name: str = \"Fine Tune GPT Model\"\n description: str = \"Fine Tune a GPT Model with loaded training data. Data must be in .jsonl format or .csv with 'user' and 'assistant' columns. User can specify also the column 'system'.\"\n \n def convert_table_to_openai_format(self, table, model_type, system_content=None):\n dataset = []\n for _, row in table.iterrows():\n if model_type == \"gpt-3.5-turbo\":\n if system_content:\n dataset.append(\n {\"messages\": [\n {\"role\": \"system\", \"content\": system_content},\n {\"role\": \"user\", \"content\": row['user']},\n {\"role\": \"assistant\", \"content\": row['assistant']}\n ]}\n )\n else:\n dataset.append(\n {\"messages\": [\n {\"role\": \"user\", \"content\": row['user']},\n {\"role\": \"assistant\", \"content\": row['assistant']}\n ]}\n )\n elif model_type in [\"babbage-002\", \"davinci-002\"]:\n dataset.append(\n {\"prompt\": row['user'], \"completion\": row['assistant']}\n )\n else:\n raise ValueError(\"Invalid model type! \\n Options: 'gpt-3.5-turbo', 'babbage-002', 'davinci-002'\")\n self.repr_value = \"OK\"\n table.to_csv(\"TESTING.csv\")\n return dataset\n \n def build_config(self):\n return {\n \"training_data_path\": {\n \"display_name\": \"Training data\",\n \"required\": True,\n \"file_types\": [\"jsonl\",\"csv\"],\n \"field_type\": \"file\", \n \"suffixes\": [\".jsonl\",\".csv\"],\n },\n \"system_content\":{\n \"display_name\": \"System Content\",\n \"required\": False, \n },\n \"job_id_save_path\":{\n \"display_name\": \"job ID path\",\n \"required\": True, \n },\n \"n_epochs\":{\n \"display_name\": \"Epochs\",\n \"required\": False, \n },\n \"code\": {\"show\": True},\n }\n \n def build(\n self,\n training_data_path: str,\n LLM: BaseLLM,\n job_id_save_path: str=\"job_id.txt\",\n system_content = None,\n n_epochs = None,\n ) -> BaseLLM:\n \n openai_api_key = LLM.openai_api_key\n model_type = LLM.model_name\n \n try:\n # Check training data format\n file_type = training_data_path.split(\".\")[-1]\n raw_data_path = training_data_path.split(\".\")[0]\n \n if file_type == \"csv\":\n table = pd.read_csv(training_data_path)\n dataset = self.convert_table_to_openai_format(\n table, \n model_type=model_type,\n system_content=system_content,\n ) \n with open(raw_data_path+\".jsonl\", 'w') as outfile:\n for entry in dataset:\n json.dump(entry, outfile)\n outfile.write('\\n')\n elif file_type == \"json\":\n pass\n else:\n raise ValueError(\"Invalid training data format \\nOptions: .jsonl or .csv\")\n \n file_id = openai.File.create(\n file=open(raw_data_path+\".jsonl\", \"rb\"),\n purpose='fine-tune',\n api_key=openai_api_key,\n ).id\n job = openai.FineTuningJob.create(\n training_file=file_id, \n model=model_type,\n api_key=openai_api_key,\n hyperparameters={\"n_epochs\":n_epochs,}\n )\n \n # Save job id\n job_id = job.id\n with open(job_id_save_path,\"wb\") as file:\n pickle.dump(job_id, file)\n \n running = True\n while(running):\n status = openai.FineTuningJob.list_events(\n id=job_id, \n api_key=openai_api_key, \n limit=1\n )\n message = status['data'][0]['message']\n self.repr_value = message\n if message == \"The job has successfully completed\":\n running = False\n time.sleep(5)\n \n except Exception as e:\n #raise ValueError(\"Could not connect to OpeanAI.\") from e\n raise ValueError(e) from e\n self.repr_value = e\n return 0","password":false,"name":"code","advanced":false,"type":"code","list":false},"_type":"CustomComponent","LLM":{"required":true,"placeholder":"","show":true,"multiline":false,"password":false,"name":"LLM","display_name":"LLM","advanced":false,"dynamic":false,"info":"","type":"BaseLLM","list":false},"job_id_save_path":{"required":true,"placeholder":"","show":true,"multiline":false,"value":"job_id.txt","password":false,"name":"job_id_save_path","display_name":"job ID path","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"n_epochs":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"n_epochs","display_name":"Epochs","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"system_content":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"system_content","display_name":"System Content","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"training_data_path":{"required":true,"placeholder":"","show":true,"multiline":false,"suffixes":[".jsonl",".csv"],"password":false,"name":"training_data_path","display_name":"Training data","advanced":false,"dynamic":false,"info":"","type":"file","list":false,"fileTypes":["jsonl","csv"],"file_path":"C:\\Users\\xande\\AppData\\Local\\langflow\\langflow\\Cache\\e426367d-67aa-413a-8ec6-e0d119340d12\\53ddd5e193322b52ce474aa708aeb88a38eccba3e17694ec264fd80ea444cb14.csv","value":"fine_tune_mdx.csv"}},"description":"Fine Tune a GPT Model with loaded training data. Data must be in .jsonl format or .csv with 'user' and 'assistant' columns. User can specify also the column 'system'.","base_classes":["BaseLanguageModel","BaseLLM"],"display_name":"Fine Tune GPT Model","custom_fields":{"LLM":null,"job_id_save_path":null,"n_epochs":null,"system_content":null,"training_data_path":null},"output_types":[],"documentation":"","beta":true,"error":null},"id":"CustomComponent-SNGd7"},"selected":false,"dragging":false,"positionAbsolute":{"x":-592.8543094958106,"y":323.07654372746487}}],"edges":[{"source":"ChatOpenAI-do85j","target":"CustomComponent-SNGd7","sourceHandle":"ChatOpenAI|ChatOpenAI-do85j|BaseChatModel|BaseLanguageModel|ChatOpenAI|BaseLLM","targetHandle":"BaseLLM|LLM|CustomComponent-SNGd7","id":"reactflow__edge-ChatOpenAI-do85jChatOpenAI|ChatOpenAI-do85j|BaseChatModel|BaseLanguageModel|ChatOpenAI|BaseLLM-CustomComponent-SNGd7BaseLLM|LLM|CustomComponent-SNGd7","style":{"stroke":"#555"},"className":"stroke-gray-900 ","animated":false,"selected":false}],"viewport":{"x":726.1502238317959,"y":-165.92230938004224,"zoom":0.5908314342817417}},"id":"e426367d-67aa-413a-8ec6-e0d119340d12","user_id":"09e0d67d-3765-43ba-9d5f-9dc205202340"} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment