Skip to content

Instantly share code, notes, and snippets.

@alexandrehptavares
Last active October 30, 2023 18:12
Show Gist options
  • Save alexandrehptavares/e571af2b15676a57c93deb6d931942f2 to your computer and use it in GitHub Desktop.
Save alexandrehptavares/e571af2b15676a57c93deb6d931942f2 to your computer and use it in GitHub Desktop.
Fine-tune a GPT model using Langflow
{"name":"Fine-tune GPT","description":"Building Intelligent Interactions.","data":{"nodes":[{"width":384,"height":623,"id":"ChatOpenAI-do85j","type":"genericNode","position":{"x":-1113.0013036522153,"y":322.9219767794165},"data":{"type":"ChatOpenAI","node":{"template":{"callbacks":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"callbacks","advanced":false,"dynamic":false,"info":"","type":"langchain.callbacks.base.BaseCallbackHandler","list":true},"cache":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"cache","advanced":false,"dynamic":false,"info":"","type":"bool","list":false},"client":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"client","advanced":false,"dynamic":false,"info":"","type":"Any","list":false},"max_retries":{"required":false,"placeholder":"","show":false,"multiline":false,"value":6,"password":false,"name":"max_retries","advanced":false,"dynamic":false,"info":"","type":"int","list":false},"max_tokens":{"required":false,"placeholder":"","show":true,"multiline":false,"password":true,"name":"max_tokens","advanced":false,"dynamic":false,"info":"","type":"int","list":false,"value":""},"metadata":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"metadata","advanced":false,"dynamic":false,"info":"","type":"dict","list":false},"model_kwargs":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"model_kwargs","advanced":true,"dynamic":false,"info":"","type":"dict","list":false},"model_name":{"required":false,"placeholder":"","show":true,"multiline":false,"value":"gpt-3.5-turbo","password":false,"options":["gpt-3.5-turbo-0613","gpt-3.5-turbo","gpt-3.5-turbo-16k-0613","gpt-3.5-turbo-16k","gpt-4-0613","gpt-4-32k-0613","gpt-4","gpt-4-32k"],"name":"model_name","advanced":false,"dynamic":false,"info":"","type":"str","list":true},"n":{"required":false,"placeholder":"","show":false,"multiline":false,"value":1,"password":false,"name":"n","advanced":false,"dynamic":false,"info":"","type":"int","list":false},"openai_api_base":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"openai_api_base","display_name":"OpenAI API Base","advanced":false,"dynamic":false,"info":"\nThe base URL of the OpenAI API. Defaults to https://api.openai.com/v1.\n\nYou can change this to use other APIs like JinaChat, LocalAI and Prem.\n","type":"str","list":false,"value":""},"openai_api_key":{"required":false,"placeholder":"","show":true,"multiline":false,"value":"","password":true,"name":"openai_api_key","display_name":"OpenAI API Key","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"openai_organization":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"openai_organization","display_name":"OpenAI Organization","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"openai_proxy":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"openai_proxy","display_name":"OpenAI Proxy","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"request_timeout":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"request_timeout","advanced":false,"dynamic":false,"info":"","type":"float","list":false},"streaming":{"required":false,"placeholder":"","show":false,"multiline":false,"value":false,"password":false,"name":"streaming","advanced":false,"dynamic":false,"info":"","type":"bool","list":false},"tags":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"tags","advanced":false,"dynamic":false,"info":"","type":"str","list":true},"temperature":{"required":false,"placeholder":"","show":true,"multiline":false,"value":0.7,"password":false,"name":"temperature","advanced":false,"dynamic":false,"info":"","type":"float","list":false},"tiktoken_model_name":{"required":false,"placeholder":"","show":false,"multiline":false,"password":false,"name":"tiktoken_model_name","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"verbose":{"required":false,"placeholder":"","show":false,"multiline":false,"value":false,"password":false,"name":"verbose","advanced":false,"dynamic":false,"info":"","type":"bool","list":false},"_type":"ChatOpenAI"},"description":"`OpenAI` Chat large language models API.","base_classes":["BaseChatModel","BaseLanguageModel","ChatOpenAI","BaseLLM"],"display_name":"ChatOpenAI","custom_fields":{},"output_types":[],"documentation":"https://python.langchain.com/docs/modules/model_io/models/chat/integrations/openai","beta":false,"error":null},"id":"ChatOpenAI-do85j"},"selected":false,"positionAbsolute":{"x":-1113.0013036522153,"y":322.9219767794165},"dragging":false},{"width":384,"height":727,"id":"CustomComponent-SNGd7","type":"genericNode","position":{"x":-592.8543094958106,"y":323.07654372746487},"data":{"type":"CustomComponent","node":{"template":{"code":{"dynamic":true,"required":true,"placeholder":"","show":true,"multiline":true,"value":"from typing import Optional\nfrom langflow import CustomComponent\nfrom langflow.template.field.base import TemplateField\nfrom langchain.llms import HuggingFaceEndpoint\nfrom langchain.llms.base import BaseLLM\nimport openai\nimport pickle\nimport pandas as pd\nimport json\nimport time\n\nclass FineTuneGPTComponent(CustomComponent):\n display_name: str = \"Fine Tune GPT Model\"\n description: str = \"Fine Tune a GPT Model with loaded training data. Data must be in .jsonl format or .csv with 'user' and 'assistant' columns. User can specify also the column 'system'.\"\n \n def convert_table_to_openai_format(self, table, model_type, system_content=None):\n dataset = []\n for _, row in table.iterrows():\n if model_type == \"gpt-3.5-turbo\":\n if system_content:\n dataset.append(\n {\"messages\": [\n {\"role\": \"system\", \"content\": system_content},\n {\"role\": \"user\", \"content\": row['user']},\n {\"role\": \"assistant\", \"content\": row['assistant']}\n ]}\n )\n else:\n dataset.append(\n {\"messages\": [\n {\"role\": \"user\", \"content\": row['user']},\n {\"role\": \"assistant\", \"content\": row['assistant']}\n ]}\n )\n elif model_type in [\"babbage-002\", \"davinci-002\"]:\n dataset.append(\n {\"prompt\": row['user'], \"completion\": row['assistant']}\n )\n else:\n raise ValueError(\"Invalid model type! \\n Options: 'gpt-3.5-turbo', 'babbage-002', 'davinci-002'\")\n self.repr_value = \"OK\"\n table.to_csv(\"TESTING.csv\")\n return dataset\n \n def build_config(self):\n return {\n \"training_data_path\": {\n \"display_name\": \"Training data\",\n \"required\": True,\n \"file_types\": [\"jsonl\",\"csv\"],\n \"field_type\": \"file\", \n \"suffixes\": [\".jsonl\",\".csv\"],\n },\n \"system_content\":{\n \"display_name\": \"System Content\",\n \"required\": False, \n },\n \"job_id_save_path\":{\n \"display_name\": \"job ID path\",\n \"required\": True, \n },\n \"n_epochs\":{\n \"display_name\": \"Epochs\",\n \"required\": False, \n },\n \"code\": {\"show\": True},\n }\n \n def build(\n self,\n training_data_path: str,\n LLM: BaseLLM,\n job_id_save_path: str=\"job_id.txt\",\n system_content = None,\n n_epochs = None,\n ) -> BaseLLM:\n \n openai_api_key = LLM.openai_api_key\n model_type = LLM.model_name\n \n try:\n # Check training data format\n file_type = training_data_path.split(\".\")[-1]\n raw_data_path = training_data_path.split(\".\")[0]\n \n if file_type == \"csv\":\n table = pd.read_csv(training_data_path)\n dataset = self.convert_table_to_openai_format(\n table, \n model_type=model_type,\n system_content=system_content,\n ) \n with open(raw_data_path+\".jsonl\", 'w') as outfile:\n for entry in dataset:\n json.dump(entry, outfile)\n outfile.write('\\n')\n elif file_type == \"json\":\n pass\n else:\n raise ValueError(\"Invalid training data format \\nOptions: .jsonl or .csv\")\n \n file_id = openai.File.create(\n file=open(raw_data_path+\".jsonl\", \"rb\"),\n purpose='fine-tune',\n api_key=openai_api_key,\n ).id\n job = openai.FineTuningJob.create(\n training_file=file_id, \n model=model_type,\n api_key=openai_api_key,\n hyperparameters={\"n_epochs\":n_epochs,}\n )\n \n # Save job id\n job_id = job.id\n with open(job_id_save_path,\"wb\") as file:\n pickle.dump(job_id, file)\n \n running = True\n while(running):\n status = openai.FineTuningJob.list_events(\n id=job_id, \n api_key=openai_api_key, \n limit=1\n )\n message = status['data'][0]['message']\n self.repr_value = message\n if message == \"The job has successfully completed\":\n running = False\n time.sleep(5)\n \n except Exception as e:\n #raise ValueError(\"Could not connect to OpeanAI.\") from e\n raise ValueError(e) from e\n self.repr_value = e\n return 0","password":false,"name":"code","advanced":false,"type":"code","list":false},"_type":"CustomComponent","LLM":{"required":true,"placeholder":"","show":true,"multiline":false,"password":false,"name":"LLM","display_name":"LLM","advanced":false,"dynamic":false,"info":"","type":"BaseLLM","list":false},"job_id_save_path":{"required":true,"placeholder":"","show":true,"multiline":false,"value":"job_id.txt","password":false,"name":"job_id_save_path","display_name":"job ID path","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"n_epochs":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"n_epochs","display_name":"Epochs","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"system_content":{"required":false,"placeholder":"","show":true,"multiline":false,"password":false,"name":"system_content","display_name":"System Content","advanced":false,"dynamic":false,"info":"","type":"str","list":false},"training_data_path":{"required":true,"placeholder":"","show":true,"multiline":false,"suffixes":[".jsonl",".csv"],"password":false,"name":"training_data_path","display_name":"Training data","advanced":false,"dynamic":false,"info":"","type":"file","list":false,"fileTypes":["jsonl","csv"],"file_path":"C:\\Users\\xande\\AppData\\Local\\langflow\\langflow\\Cache\\e426367d-67aa-413a-8ec6-e0d119340d12\\53ddd5e193322b52ce474aa708aeb88a38eccba3e17694ec264fd80ea444cb14.csv","value":"fine_tune_mdx.csv"}},"description":"Fine Tune a GPT Model with loaded training data. Data must be in .jsonl format or .csv with 'user' and 'assistant' columns. User can specify also the column 'system'.","base_classes":["BaseLanguageModel","BaseLLM"],"display_name":"Fine Tune GPT Model","custom_fields":{"LLM":null,"job_id_save_path":null,"n_epochs":null,"system_content":null,"training_data_path":null},"output_types":[],"documentation":"","beta":true,"error":null},"id":"CustomComponent-SNGd7"},"selected":false,"dragging":false,"positionAbsolute":{"x":-592.8543094958106,"y":323.07654372746487}}],"edges":[{"source":"ChatOpenAI-do85j","target":"CustomComponent-SNGd7","sourceHandle":"ChatOpenAI|ChatOpenAI-do85j|BaseChatModel|BaseLanguageModel|ChatOpenAI|BaseLLM","targetHandle":"BaseLLM|LLM|CustomComponent-SNGd7","id":"reactflow__edge-ChatOpenAI-do85jChatOpenAI|ChatOpenAI-do85j|BaseChatModel|BaseLanguageModel|ChatOpenAI|BaseLLM-CustomComponent-SNGd7BaseLLM|LLM|CustomComponent-SNGd7","style":{"stroke":"#555"},"className":"stroke-gray-900 ","animated":false,"selected":false}],"viewport":{"x":726.1502238317959,"y":-165.92230938004224,"zoom":0.5908314342817417}},"id":"e426367d-67aa-413a-8ec6-e0d119340d12","user_id":"09e0d67d-3765-43ba-9d5f-9dc205202340"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment