Skip to content

Instantly share code, notes, and snippets.

@pveierland
Created January 4, 2024 05:25
Show Gist options
  • Save pveierland/7dce5fc5658ca53f51066c09776d8caa to your computer and use it in GitHub Desktop.
Save pveierland/7dce5fc5658ca53f51066c09776d8caa to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "389fcfcf-7862-4ad3-9c62-47b339420dc7",
"metadata": {},
"source": [
"# Comparison of Together.XYZ models with OpenLLM Leaderboard Data"
]
},
{
"cell_type": "markdown",
"id": "f080ecfa-b318-44bb-9f14-ee28ddf6e5e7",
"metadata": {},
"source": [
"NB: Matching performed using fuzzy string matching and is not perfect. Manually compare results where needed."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4d31ccbf-dc2f-4b55-9087-8ca5ec882903",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import json\n",
"import pandas as pd\n",
"import pydash\n",
"import requests\n",
"from thefuzz import process\n",
"\n",
"pd.options.display.max_rows = 9999\n",
"pd.options.display.max_seq_items = 9999"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b291c7a1-c698-4613-8565-c60323bdcb43",
"metadata": {},
"outputs": [],
"source": [
"together_models = pd.DataFrame(\n",
" data=json.loads(Path('together_models.json').read_text())\n",
")\n",
"\n",
"openllm_input = pydash.get(\n",
" json.loads(Path('hf_open_llm_leaderboard.json').read_text()),\n",
" 'output.data.0',\n",
")\n",
"\n",
"openllm_input_benchmarks = pd.DataFrame(\n",
" data=openllm_input['data'],\n",
" columns=openllm_input['headers']\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "012f9fc8-0cb2-4e8b-a892-b303489be471",
"metadata": {},
"outputs": [],
"source": [
"def lookup_match(\n",
" *, query: str, data: pd.DataFrame, data_key: str, score_cutoff: float,\n",
"):\n",
" if match := process.extractOne(\n",
" query,\n",
" data[data_key],\n",
" score_cutoff=score_cutoff,\n",
" ):\n",
" choice, score, key = match\n",
" return data.iloc[key]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5749fc58-2cfd-4791-845d-745259ce010c",
"metadata": {},
"outputs": [],
"source": [
"results = []\n",
"\n",
"for _, together_model in together_models.iterrows():\n",
" benchmark_match = lookup_match(\n",
" query=together_model['name'],\n",
" data=openllm_input_benchmarks,\n",
" data_key='model_name_for_query',\n",
" score_cutoff=90,\n",
" )\n",
"\n",
" if benchmark_match is None:\n",
" continue\n",
"\n",
" results.append(\n",
" dict(\n",
" together_model=together_model['name'],\n",
" openllm_model=benchmark_match['model_name_for_query'],\n",
" openllm_rank=benchmark_match.name,\n",
" ) | (\n",
" {\n",
" column_name: benchmark_match[column_name]\n",
" for column_name in ['T', 'Average ⬆️', 'ARC', 'HellaSwag', 'MMLU', 'TruthfulQA', 'Winogrande', 'GSM8K', 'Type']\n",
" }\n",
" )\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "cdf1211b-1f7e-4ba5-8c73-fc792f931ac4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>together_model</th>\n",
" <th>openllm_model</th>\n",
" <th>openllm_rank</th>\n",
" <th>T</th>\n",
" <th>Average ⬆️</th>\n",
" <th>ARC</th>\n",
" <th>HellaSwag</th>\n",
" <th>MMLU</th>\n",
" <th>TruthfulQA</th>\n",
" <th>Winogrande</th>\n",
" <th>GSM8K</th>\n",
" <th>Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>upstage/SOLAR-10.7B-Instruct-v1.0</td>\n",
" <td>upstage/SOLAR-10.7B-Instruct-v1.0</td>\n",
" <td>3</td>\n",
" <td>🟦</td>\n",
" <td>74.20</td>\n",
" <td>71.08</td>\n",
" <td>88.16</td>\n",
" <td>66.21</td>\n",
" <td>71.43</td>\n",
" <td>83.58</td>\n",
" <td>64.75</td>\n",
" <td>RL-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NousResearch/Nous-Hermes-2-Yi-34B</td>\n",
" <td>NousResearch/Nous-Hermes-2-Yi-34B</td>\n",
" <td>13</td>\n",
" <td>🔶</td>\n",
" <td>73.74</td>\n",
" <td>66.89</td>\n",
" <td>85.49</td>\n",
" <td>76.70</td>\n",
" <td>60.37</td>\n",
" <td>82.95</td>\n",
" <td>70.05</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>mistralai/Mixtral-8x7B-Instruct-v0.1</td>\n",
" <td>mistralai/Mixtral-8x7B-Instruct-v0.1</td>\n",
" <td>21</td>\n",
" <td>⭕</td>\n",
" <td>72.62</td>\n",
" <td>70.22</td>\n",
" <td>87.63</td>\n",
" <td>71.16</td>\n",
" <td>64.58</td>\n",
" <td>81.37</td>\n",
" <td>60.73</td>\n",
" <td>instruction-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>garage-bAInd/Platypus2-70B-instruct</td>\n",
" <td>garage-bAInd/Platypus2-70B-instruct</td>\n",
" <td>87</td>\n",
" <td>🔶</td>\n",
" <td>69.30</td>\n",
" <td>71.84</td>\n",
" <td>87.94</td>\n",
" <td>70.48</td>\n",
" <td>62.26</td>\n",
" <td>82.72</td>\n",
" <td>40.56</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>openchat/openchat-3.5-1210</td>\n",
" <td>openchat/openchat-3.5-1210</td>\n",
" <td>98</td>\n",
" <td>🔶</td>\n",
" <td>68.89</td>\n",
" <td>64.93</td>\n",
" <td>84.92</td>\n",
" <td>64.62</td>\n",
" <td>52.15</td>\n",
" <td>80.74</td>\n",
" <td>65.96</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>mistralai/Mixtral-8x7B-v0.1</td>\n",
" <td>mistralai/Mixtral-8x7B-v0.1</td>\n",
" <td>111</td>\n",
" <td>🟢</td>\n",
" <td>68.42</td>\n",
" <td>66.04</td>\n",
" <td>86.49</td>\n",
" <td>71.82</td>\n",
" <td>46.78</td>\n",
" <td>81.93</td>\n",
" <td>57.47</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>mistralai/Mistral-7B-Instruct-v0.2</td>\n",
" <td>mistralai/Mistral-7B-Instruct-v0.2</td>\n",
" <td>187</td>\n",
" <td>⭕</td>\n",
" <td>65.71</td>\n",
" <td>63.14</td>\n",
" <td>84.88</td>\n",
" <td>60.78</td>\n",
" <td>68.26</td>\n",
" <td>77.19</td>\n",
" <td>40.03</td>\n",
" <td>instruction-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>huggyllama/llama-65b</td>\n",
" <td>huggyllama/llama-65b</td>\n",
" <td>250</td>\n",
" <td>🟢</td>\n",
" <td>62.79</td>\n",
" <td>63.48</td>\n",
" <td>86.09</td>\n",
" <td>63.93</td>\n",
" <td>43.43</td>\n",
" <td>82.56</td>\n",
" <td>37.23</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>HuggingFaceH4/zephyr-7b-beta</td>\n",
" <td>HuggingFaceH4/zephyr-7b-beta</td>\n",
" <td>274</td>\n",
" <td>🔶</td>\n",
" <td>61.95</td>\n",
" <td>62.03</td>\n",
" <td>84.36</td>\n",
" <td>61.07</td>\n",
" <td>57.45</td>\n",
" <td>77.74</td>\n",
" <td>29.04</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>teknium/OpenHermes-2p5-Mistral-7B</td>\n",
" <td>teknium/OpenHermes-2.5-Mistral-7B</td>\n",
" <td>291</td>\n",
" <td>🔶</td>\n",
" <td>61.52</td>\n",
" <td>64.93</td>\n",
" <td>84.18</td>\n",
" <td>63.64</td>\n",
" <td>52.24</td>\n",
" <td>78.06</td>\n",
" <td>26.08</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>teknium/OpenHermes-2-Mistral-7B</td>\n",
" <td>teknium/OpenHermes-2.5-Mistral-7B</td>\n",
" <td>291</td>\n",
" <td>🔶</td>\n",
" <td>61.52</td>\n",
" <td>64.93</td>\n",
" <td>84.18</td>\n",
" <td>63.64</td>\n",
" <td>52.24</td>\n",
" <td>78.06</td>\n",
" <td>26.08</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>WizardLM/WizardLM-70B-V1.0</td>\n",
" <td>WizardLM/WizardLM-70B-V1.0</td>\n",
" <td>299</td>\n",
" <td>⭕</td>\n",
" <td>61.25</td>\n",
" <td>65.44</td>\n",
" <td>84.41</td>\n",
" <td>64.05</td>\n",
" <td>54.81</td>\n",
" <td>80.82</td>\n",
" <td>17.97</td>\n",
" <td>instruction-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>mistralai/Mistral-7B-v0.1</td>\n",
" <td>mistralai/Mistral-7B-v0.1</td>\n",
" <td>314</td>\n",
" <td>🟢</td>\n",
" <td>60.97</td>\n",
" <td>59.98</td>\n",
" <td>83.31</td>\n",
" <td>64.16</td>\n",
" <td>42.15</td>\n",
" <td>78.37</td>\n",
" <td>37.83</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Open-Orca/Mistral-7B-OpenOrca</td>\n",
" <td>Open-Orca/Mistral-7B-OpenOrca</td>\n",
" <td>342</td>\n",
" <td>🔶</td>\n",
" <td>60.17</td>\n",
" <td>64.08</td>\n",
" <td>83.99</td>\n",
" <td>62.24</td>\n",
" <td>53.05</td>\n",
" <td>77.74</td>\n",
" <td>19.94</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>huggyllama/llama-30b</td>\n",
" <td>huggyllama/llama-30b</td>\n",
" <td>475</td>\n",
" <td>?</td>\n",
" <td>56.96</td>\n",
" <td>61.43</td>\n",
" <td>84.73</td>\n",
" <td>58.45</td>\n",
" <td>42.27</td>\n",
" <td>80.03</td>\n",
" <td>14.86</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Austism/chronos-hermes-13b</td>\n",
" <td>Austism/chronos-hermes-13b-v2</td>\n",
" <td>523</td>\n",
" <td>🔶</td>\n",
" <td>56.10</td>\n",
" <td>60.32</td>\n",
" <td>83.21</td>\n",
" <td>55.05</td>\n",
" <td>50.91</td>\n",
" <td>75.37</td>\n",
" <td>11.75</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Undi95/ReMM-SLERP-L2-13B</td>\n",
" <td>Undi95/ReMM-SLERP-L2-13B</td>\n",
" <td>527</td>\n",
" <td>🔶</td>\n",
" <td>56.03</td>\n",
" <td>60.92</td>\n",
" <td>83.56</td>\n",
" <td>55.33</td>\n",
" <td>51.97</td>\n",
" <td>75.22</td>\n",
" <td>9.17</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gryphe/MythoMax-L2-13b</td>\n",
" <td>Gryphe/MythoMax-L2-13b</td>\n",
" <td>531</td>\n",
" <td>🔶</td>\n",
" <td>56.00</td>\n",
" <td>60.92</td>\n",
" <td>83.56</td>\n",
" <td>55.33</td>\n",
" <td>51.97</td>\n",
" <td>75.22</td>\n",
" <td>9.02</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>NousResearch/Nous-Hermes-Llama2-13b</td>\n",
" <td>NousResearch/Nous-Hermes-Llama2-13b</td>\n",
" <td>532</td>\n",
" <td>🔶</td>\n",
" <td>55.97</td>\n",
" <td>61.52</td>\n",
" <td>83.29</td>\n",
" <td>55.11</td>\n",
" <td>50.38</td>\n",
" <td>75.45</td>\n",
" <td>10.08</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>lmsys/vicuna-13b-v1.5</td>\n",
" <td>lmsys/vicuna-13b-v1.5</td>\n",
" <td>563</td>\n",
" <td>🔶</td>\n",
" <td>55.41</td>\n",
" <td>57.08</td>\n",
" <td>81.24</td>\n",
" <td>56.67</td>\n",
" <td>51.51</td>\n",
" <td>74.66</td>\n",
" <td>11.30</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>lmsys/vicuna-13b-v1.3</td>\n",
" <td>lmsys/vicuna-13b-v1.5</td>\n",
" <td>563</td>\n",
" <td>🔶</td>\n",
" <td>55.41</td>\n",
" <td>57.08</td>\n",
" <td>81.24</td>\n",
" <td>56.67</td>\n",
" <td>51.51</td>\n",
" <td>74.66</td>\n",
" <td>11.30</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>lmsys/vicuna-13b-v1.5-16k</td>\n",
" <td>lmsys/vicuna-13b-v1.5-16k</td>\n",
" <td>596</td>\n",
" <td>🔶</td>\n",
" <td>54.97</td>\n",
" <td>56.74</td>\n",
" <td>80.37</td>\n",
" <td>55.28</td>\n",
" <td>51.96</td>\n",
" <td>72.38</td>\n",
" <td>13.12</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>mistralai/Mistral-7B-Instruct-v0.1</td>\n",
" <td>mistralai/Mistral-7B-Instruct-v0.1</td>\n",
" <td>597</td>\n",
" <td>⭕</td>\n",
" <td>54.96</td>\n",
" <td>54.52</td>\n",
" <td>75.63</td>\n",
" <td>55.38</td>\n",
" <td>56.28</td>\n",
" <td>73.72</td>\n",
" <td>14.25</td>\n",
" <td>instruction-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>WizardLM/WizardLM-13B-V1.2</td>\n",
" <td>WizardLM/WizardLM-13B-V1.2</td>\n",
" <td>608</td>\n",
" <td>🔶</td>\n",
" <td>54.76</td>\n",
" <td>59.04</td>\n",
" <td>82.21</td>\n",
" <td>54.64</td>\n",
" <td>47.27</td>\n",
" <td>71.90</td>\n",
" <td>13.50</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>NousResearch/Nous-Hermes-13b</td>\n",
" <td>NousResearch/Nous-Hermes-13b</td>\n",
" <td>641</td>\n",
" <td>🔶</td>\n",
" <td>54.04</td>\n",
" <td>56.57</td>\n",
" <td>82.11</td>\n",
" <td>50.44</td>\n",
" <td>51.50</td>\n",
" <td>75.30</td>\n",
" <td>8.34</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NousResearch/Nous-Capybara-7B-V1p9</td>\n",
" <td>NousResearch/Nous-Capybara-7B</td>\n",
" <td>723</td>\n",
" <td>🔶</td>\n",
" <td>52.70</td>\n",
" <td>55.29</td>\n",
" <td>80.73</td>\n",
" <td>48.72</td>\n",
" <td>51.13</td>\n",
" <td>73.32</td>\n",
" <td>6.97</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>lmsys/vicuna-7b-v1.5</td>\n",
" <td>lmsys/vicuna-7b-v1.5</td>\n",
" <td>761</td>\n",
" <td>🔶</td>\n",
" <td>52.06</td>\n",
" <td>53.24</td>\n",
" <td>77.39</td>\n",
" <td>51.04</td>\n",
" <td>50.34</td>\n",
" <td>72.14</td>\n",
" <td>8.19</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>lmsys/vicuna-7b-v1.3</td>\n",
" <td>lmsys/vicuna-7b-v1.5</td>\n",
" <td>761</td>\n",
" <td>🔶</td>\n",
" <td>52.06</td>\n",
" <td>53.24</td>\n",
" <td>77.39</td>\n",
" <td>51.04</td>\n",
" <td>50.34</td>\n",
" <td>72.14</td>\n",
" <td>8.19</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>NousResearch/Nous-Hermes-llama-2-7b</td>\n",
" <td>NousResearch/Nous-Hermes-llama-2-7b</td>\n",
" <td>770</td>\n",
" <td>🔶</td>\n",
" <td>51.87</td>\n",
" <td>55.12</td>\n",
" <td>78.94</td>\n",
" <td>48.34</td>\n",
" <td>49.01</td>\n",
" <td>74.03</td>\n",
" <td>5.76</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>NousResearch/Nous-Hermes-Llama2-70b</td>\n",
" <td>NousResearch/Nous-Hermes-llama-2-7b</td>\n",
" <td>770</td>\n",
" <td>🔶</td>\n",
" <td>51.87</td>\n",
" <td>55.12</td>\n",
" <td>78.94</td>\n",
" <td>48.34</td>\n",
" <td>49.01</td>\n",
" <td>74.03</td>\n",
" <td>5.76</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>huggyllama/llama-13b</td>\n",
" <td>huggyllama/llama-13b</td>\n",
" <td>792</td>\n",
" <td>🟢</td>\n",
" <td>51.33</td>\n",
" <td>56.14</td>\n",
" <td>80.92</td>\n",
" <td>47.61</td>\n",
" <td>39.48</td>\n",
" <td>76.24</td>\n",
" <td>7.58</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>WizardLM/WizardCoder-Python-34B-V1.0</td>\n",
" <td>WizardLM/WizardCoder-Python-34B-V1.0</td>\n",
" <td>822</td>\n",
" <td>⭕</td>\n",
" <td>50.46</td>\n",
" <td>52.13</td>\n",
" <td>74.78</td>\n",
" <td>49.15</td>\n",
" <td>48.85</td>\n",
" <td>68.35</td>\n",
" <td>9.48</td>\n",
" <td>instruction-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>togethercomputer/Llama-2-7B-32K-Instruct</td>\n",
" <td>togethercomputer/Llama-2-7B-32K-Instruct</td>\n",
" <td>834</td>\n",
" <td>🔶</td>\n",
" <td>50.02</td>\n",
" <td>51.11</td>\n",
" <td>78.51</td>\n",
" <td>46.11</td>\n",
" <td>44.86</td>\n",
" <td>73.88</td>\n",
" <td>5.69</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>togethercomputer/llama-2-7b</td>\n",
" <td>togethercomputer/Llama-2-7B-32K-Instruct</td>\n",
" <td>834</td>\n",
" <td>🔶</td>\n",
" <td>50.02</td>\n",
" <td>51.11</td>\n",
" <td>78.51</td>\n",
" <td>46.11</td>\n",
" <td>44.86</td>\n",
" <td>73.88</td>\n",
" <td>5.69</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Nexusflow/NexusRaven-V2-13B</td>\n",
" <td>Nexusflow/NexusRaven-V2-13B</td>\n",
" <td>884</td>\n",
" <td>🔶</td>\n",
" <td>48.21</td>\n",
" <td>45.14</td>\n",
" <td>67.40</td>\n",
" <td>44.88</td>\n",
" <td>44.54</td>\n",
" <td>66.38</td>\n",
" <td>20.92</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>togethercomputer/llama-2-70b</td>\n",
" <td>togethercomputer/LLaMA-2-7B-32K</td>\n",
" <td>913</td>\n",
" <td>🔶</td>\n",
" <td>47.07</td>\n",
" <td>47.53</td>\n",
" <td>76.14</td>\n",
" <td>43.33</td>\n",
" <td>39.23</td>\n",
" <td>71.90</td>\n",
" <td>4.32</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>togethercomputer/LLaMA-2-7B-32K</td>\n",
" <td>togethercomputer/LLaMA-2-7B-32K</td>\n",
" <td>913</td>\n",
" <td>🔶</td>\n",
" <td>47.07</td>\n",
" <td>47.53</td>\n",
" <td>76.14</td>\n",
" <td>43.33</td>\n",
" <td>39.23</td>\n",
" <td>71.90</td>\n",
" <td>4.32</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>huggyllama/llama-7b</td>\n",
" <td>huggyllama/llama-7b</td>\n",
" <td>930</td>\n",
" <td>?</td>\n",
" <td>46.37</td>\n",
" <td>50.94</td>\n",
" <td>77.81</td>\n",
" <td>35.69</td>\n",
" <td>34.33</td>\n",
" <td>71.43</td>\n",
" <td>8.04</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>togethercomputer/GPT-JT-6B-v1</td>\n",
" <td>togethercomputer/GPT-JT-6B-v1</td>\n",
" <td>987</td>\n",
" <td>🔶</td>\n",
" <td>43.13</td>\n",
" <td>40.87</td>\n",
" <td>67.15</td>\n",
" <td>47.19</td>\n",
" <td>37.07</td>\n",
" <td>65.27</td>\n",
" <td>1.21</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>togethercomputer/GPT-NeoXT-Chat-Base-20B</td>\n",
" <td>togethercomputer/GPT-NeoXT-Chat-Base-20B</td>\n",
" <td>990</td>\n",
" <td>🔶</td>\n",
" <td>43.02</td>\n",
" <td>45.65</td>\n",
" <td>74.03</td>\n",
" <td>29.92</td>\n",
" <td>34.51</td>\n",
" <td>67.09</td>\n",
" <td>6.90</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>togethercomputer/RedPajama-INCITE-7B-Instruct</td>\n",
" <td>togethercomputer/RedPajama-INCITE-7B-Instruct</td>\n",
" <td>1006</td>\n",
" <td>🔶</td>\n",
" <td>42.38</td>\n",
" <td>44.11</td>\n",
" <td>72.02</td>\n",
" <td>37.62</td>\n",
" <td>33.96</td>\n",
" <td>64.96</td>\n",
" <td>1.59</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>togethercomputer/GPT-JT-Moderation-6B</td>\n",
" <td>togethercomputer/GPT-JT-Moderation-6B</td>\n",
" <td>1022</td>\n",
" <td>?</td>\n",
" <td>41.80</td>\n",
" <td>40.53</td>\n",
" <td>67.66</td>\n",
" <td>41.63</td>\n",
" <td>37.33</td>\n",
" <td>62.67</td>\n",
" <td>0.99</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>EleutherAI/gpt-neox-20b</td>\n",
" <td>EleutherAI/gpt-neox-20b</td>\n",
" <td>1026</td>\n",
" <td>🟢</td>\n",
" <td>41.69</td>\n",
" <td>45.73</td>\n",
" <td>73.45</td>\n",
" <td>25.00</td>\n",
" <td>31.61</td>\n",
" <td>68.90</td>\n",
" <td>5.46</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>togethercomputer/RedPajama-INCITE-7B-Base</td>\n",
" <td>togethercomputer/RedPajama-INCITE-7B-Base</td>\n",
" <td>1031</td>\n",
" <td>🟢</td>\n",
" <td>41.49</td>\n",
" <td>46.25</td>\n",
" <td>71.63</td>\n",
" <td>27.68</td>\n",
" <td>33.03</td>\n",
" <td>67.32</td>\n",
" <td>3.03</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
" <td>OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5</td>\n",
" <td>1040</td>\n",
" <td>🔶</td>\n",
" <td>41.31</td>\n",
" <td>45.73</td>\n",
" <td>68.59</td>\n",
" <td>26.82</td>\n",
" <td>37.81</td>\n",
" <td>65.90</td>\n",
" <td>3.03</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>EleutherAI/gpt-j-6b</td>\n",
" <td>EleutherAI/gpt-j-6b</td>\n",
" <td>1083</td>\n",
" <td>🟢</td>\n",
" <td>40.10</td>\n",
" <td>41.38</td>\n",
" <td>67.54</td>\n",
" <td>26.78</td>\n",
" <td>35.96</td>\n",
" <td>65.98</td>\n",
" <td>2.96</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>togethercomputer/Pythia-Chat-Base-7B-v0.16</td>\n",
" <td>togethercomputer/Pythia-Chat-Base-7B</td>\n",
" <td>1094</td>\n",
" <td>🔶</td>\n",
" <td>39.81</td>\n",
" <td>40.02</td>\n",
" <td>68.67</td>\n",
" <td>27.44</td>\n",
" <td>34.63</td>\n",
" <td>64.01</td>\n",
" <td>4.09</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>togethercomputer/RedPajama-INCITE-Chat-3B-v1</td>\n",
" <td>togethercomputer/RedPajama-INCITE-Chat-3B-v1</td>\n",
" <td>1105</td>\n",
" <td>🔶</td>\n",
" <td>39.53</td>\n",
" <td>42.83</td>\n",
" <td>67.62</td>\n",
" <td>26.23</td>\n",
" <td>34.44</td>\n",
" <td>65.51</td>\n",
" <td>0.53</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>databricks/dolly-v2-12b</td>\n",
" <td>databricks/dolly-v2-12b</td>\n",
" <td>1108</td>\n",
" <td>🔶</td>\n",
" <td>39.46</td>\n",
" <td>42.41</td>\n",
" <td>72.53</td>\n",
" <td>25.92</td>\n",
" <td>33.83</td>\n",
" <td>60.85</td>\n",
" <td>1.21</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>togethercomputer/RedPajama-INCITE-7B-Chat</td>\n",
" <td>togethercomputer/RedPajama-INCITE-7B-Chat</td>\n",
" <td>1111</td>\n",
" <td>🔶</td>\n",
" <td>39.37</td>\n",
" <td>42.06</td>\n",
" <td>70.82</td>\n",
" <td>26.94</td>\n",
" <td>36.09</td>\n",
" <td>59.83</td>\n",
" <td>0.45</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>databricks/dolly-v2-7b</td>\n",
" <td>databricks/dolly-v2-7b</td>\n",
" <td>1115</td>\n",
" <td>🔶</td>\n",
" <td>39.24</td>\n",
" <td>44.54</td>\n",
" <td>69.64</td>\n",
" <td>25.18</td>\n",
" <td>34.88</td>\n",
" <td>60.06</td>\n",
" <td>1.14</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>togethercomputer/RedPajama-INCITE-Instruct-3B-v1</td>\n",
" <td>togethercomputer/RedPajama-INCITE-Instruct-3B-v1</td>\n",
" <td>1123</td>\n",
" <td>🔶</td>\n",
" <td>39.06</td>\n",
" <td>41.55</td>\n",
" <td>65.48</td>\n",
" <td>25.03</td>\n",
" <td>36.41</td>\n",
" <td>64.48</td>\n",
" <td>1.36</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>EleutherAI/pythia-12b-v0</td>\n",
" <td>EleutherAI/pythia-12b</td>\n",
" <td>1136</td>\n",
" <td>🟢</td>\n",
" <td>38.82</td>\n",
" <td>39.59</td>\n",
" <td>68.82</td>\n",
" <td>26.76</td>\n",
" <td>31.85</td>\n",
" <td>64.17</td>\n",
" <td>1.74</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>EleutherAI/pythia-1b-v0</td>\n",
" <td>EleutherAI/pythia-12b</td>\n",
" <td>1136</td>\n",
" <td>🟢</td>\n",
" <td>38.82</td>\n",
" <td>39.59</td>\n",
" <td>68.82</td>\n",
" <td>26.76</td>\n",
" <td>31.85</td>\n",
" <td>64.17</td>\n",
" <td>1.74</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>togethercomputer/RedPajama-INCITE-Base-3B-v1</td>\n",
" <td>togethercomputer/RedPajama-INCITE-Base-3B-v1</td>\n",
" <td>1144</td>\n",
" <td>🟢</td>\n",
" <td>38.54</td>\n",
" <td>40.19</td>\n",
" <td>64.77</td>\n",
" <td>27.03</td>\n",
" <td>33.23</td>\n",
" <td>64.72</td>\n",
" <td>1.29</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>EleutherAI/pythia-6.9b</td>\n",
" <td>EleutherAI/pythia-6.7b</td>\n",
" <td>1157</td>\n",
" <td>🟢</td>\n",
" <td>38.06</td>\n",
" <td>40.10</td>\n",
" <td>65.00</td>\n",
" <td>24.64</td>\n",
" <td>32.85</td>\n",
" <td>64.72</td>\n",
" <td>1.06</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Phind/Phind-CodeLlama-34B-v2</td>\n",
" <td>Phind/Phind-CodeLlama-34B-v2</td>\n",
" <td>1180</td>\n",
" <td>⭕</td>\n",
" <td>36.89</td>\n",
" <td>24.57</td>\n",
" <td>27.60</td>\n",
" <td>25.76</td>\n",
" <td>48.37</td>\n",
" <td>71.82</td>\n",
" <td>23.20</td>\n",
" <td>instruction-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Phind/Phind-CodeLlama-34B-Python-v1</td>\n",
" <td>Phind/Phind-CodeLlama-34B-Python-v1</td>\n",
" <td>1198</td>\n",
" <td>🔶</td>\n",
" <td>36.33</td>\n",
" <td>24.66</td>\n",
" <td>29.77</td>\n",
" <td>27.95</td>\n",
" <td>45.27</td>\n",
" <td>68.82</td>\n",
" <td>21.53</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>HuggingFaceH4/starchat-alpha</td>\n",
" <td>HuggingFaceH4/starchat-alpha</td>\n",
" <td>1218</td>\n",
" <td>?</td>\n",
" <td>35.49</td>\n",
" <td>31.57</td>\n",
" <td>49.43</td>\n",
" <td>30.76</td>\n",
" <td>43.66</td>\n",
" <td>55.09</td>\n",
" <td>2.43</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>WizardLM/WizardCoder-15B-V1.0</td>\n",
" <td>WizardLM/WizardCoder-15B-V1.0</td>\n",
" <td>1246</td>\n",
" <td>🔶</td>\n",
" <td>34.64</td>\n",
" <td>32.34</td>\n",
" <td>47.20</td>\n",
" <td>29.43</td>\n",
" <td>41.56</td>\n",
" <td>55.17</td>\n",
" <td>2.12</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>stabilityai/stablelm-base-alpha-7b</td>\n",
" <td>stabilityai/stablelm-base-alpha-7b</td>\n",
" <td>1259</td>\n",
" <td>🟢</td>\n",
" <td>34.37</td>\n",
" <td>32.00</td>\n",
" <td>51.78</td>\n",
" <td>26.21</td>\n",
" <td>40.19</td>\n",
" <td>55.41</td>\n",
" <td>0.61</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>stabilityai/stablelm-base-alpha-3b</td>\n",
" <td>stabilityai/stablelm-base-alpha-3b</td>\n",
" <td>1310</td>\n",
" <td>🟢</td>\n",
" <td>31.50</td>\n",
" <td>26.45</td>\n",
" <td>42.24</td>\n",
" <td>25.43</td>\n",
" <td>40.50</td>\n",
" <td>53.91</td>\n",
" <td>0.45</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>bigcode/starcoder</td>\n",
" <td>bigcode/tiny_starcoder_py</td>\n",
" <td>1366</td>\n",
" <td>🟢</td>\n",
" <td>29.41</td>\n",
" <td>20.99</td>\n",
" <td>28.77</td>\n",
" <td>26.79</td>\n",
" <td>47.68</td>\n",
" <td>51.22</td>\n",
" <td>0.99</td>\n",
" <td>pretrained</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>databricks/dolly-v2-3b</td>\n",
" <td>databricks/dolly-v2-3b</td>\n",
" <td>1462</td>\n",
" <td>🔶</td>\n",
" <td>22.83</td>\n",
" <td>25.26</td>\n",
" <td>26.55</td>\n",
" <td>24.70</td>\n",
" <td>0.00</td>\n",
" <td>59.43</td>\n",
" <td>1.06</td>\n",
" <td>fine-tuned</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" together_model \\\n",
"42 upstage/SOLAR-10.7B-Instruct-v1.0 \n",
"4 NousResearch/Nous-Hermes-2-Yi-34B \n",
"24 mistralai/Mixtral-8x7B-Instruct-v0.1 \n",
"16 garage-bAInd/Platypus2-70B-instruct \n",
"25 openchat/openchat-3.5-1210 \n",
"43 mistralai/Mixtral-8x7B-v0.1 \n",
"22 mistralai/Mistral-7B-Instruct-v0.2 \n",
"17 huggyllama/llama-65b \n",
"44 HuggingFaceH4/zephyr-7b-beta \n",
"27 teknium/OpenHermes-2p5-Mistral-7B \n",
"26 teknium/OpenHermes-2-Mistral-7B \n",
"15 WizardLM/WizardLM-70B-V1.0 \n",
"23 mistralai/Mistral-7B-v0.1 \n",
"8 Open-Orca/Mistral-7B-OpenOrca \n",
"59 huggyllama/llama-30b \n",
"0 Austism/chronos-hermes-13b \n",
"11 Undi95/ReMM-SLERP-L2-13B \n",
"1 Gryphe/MythoMax-L2-13b \n",
"5 NousResearch/Nous-Hermes-Llama2-13b \n",
"19 lmsys/vicuna-13b-v1.5 \n",
"56 lmsys/vicuna-13b-v1.3 \n",
"18 lmsys/vicuna-13b-v1.5-16k \n",
"21 mistralai/Mistral-7B-Instruct-v0.1 \n",
"14 WizardLM/WizardLM-13B-V1.2 \n",
"50 NousResearch/Nous-Hermes-13b \n",
"3 NousResearch/Nous-Capybara-7B-V1p9 \n",
"20 lmsys/vicuna-7b-v1.5 \n",
"55 lmsys/vicuna-7b-v1.3 \n",
"7 NousResearch/Nous-Hermes-llama-2-7b \n",
"6 NousResearch/Nous-Hermes-Llama2-70b \n",
"57 huggyllama/llama-13b \n",
"13 WizardLM/WizardCoder-Python-34B-V1.0 \n",
"32 togethercomputer/Llama-2-7B-32K-Instruct \n",
"41 togethercomputer/llama-2-7b \n",
"2 Nexusflow/NexusRaven-V2-13B \n",
"40 togethercomputer/llama-2-70b \n",
"31 togethercomputer/LLaMA-2-7B-32K \n",
"51 huggyllama/llama-7b \n",
"28 togethercomputer/GPT-JT-6B-v1 \n",
"30 togethercomputer/GPT-NeoXT-Chat-Base-20B \n",
"36 togethercomputer/RedPajama-INCITE-7B-Instruct \n",
"29 togethercomputer/GPT-JT-Moderation-6B \n",
"49 EleutherAI/gpt-neox-20b \n",
"34 togethercomputer/RedPajama-INCITE-7B-Base \n",
"53 OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 \n",
"54 EleutherAI/gpt-j-6b \n",
"33 togethercomputer/Pythia-Chat-Base-7B-v0.16 \n",
"38 togethercomputer/RedPajama-INCITE-Chat-3B-v1 \n",
"47 databricks/dolly-v2-12b \n",
"35 togethercomputer/RedPajama-INCITE-7B-Chat \n",
"63 databricks/dolly-v2-7b \n",
"39 togethercomputer/RedPajama-INCITE-Instruct-3B-v1 \n",
"52 EleutherAI/pythia-12b-v0 \n",
"45 EleutherAI/pythia-1b-v0 \n",
"37 togethercomputer/RedPajama-INCITE-Base-3B-v1 \n",
"46 EleutherAI/pythia-6.9b \n",
"10 Phind/Phind-CodeLlama-34B-v2 \n",
"9 Phind/Phind-CodeLlama-34B-Python-v1 \n",
"58 HuggingFaceH4/starchat-alpha \n",
"12 WizardLM/WizardCoder-15B-V1.0 \n",
"60 stabilityai/stablelm-base-alpha-7b \n",
"61 stabilityai/stablelm-base-alpha-3b \n",
"62 bigcode/starcoder \n",
"48 databricks/dolly-v2-3b \n",
"\n",
" openllm_model openllm_rank T \\\n",
"42 upstage/SOLAR-10.7B-Instruct-v1.0 3 🟦 \n",
"4 NousResearch/Nous-Hermes-2-Yi-34B 13 🔶 \n",
"24 mistralai/Mixtral-8x7B-Instruct-v0.1 21 ⭕ \n",
"16 garage-bAInd/Platypus2-70B-instruct 87 🔶 \n",
"25 openchat/openchat-3.5-1210 98 🔶 \n",
"43 mistralai/Mixtral-8x7B-v0.1 111 🟢 \n",
"22 mistralai/Mistral-7B-Instruct-v0.2 187 ⭕ \n",
"17 huggyllama/llama-65b 250 🟢 \n",
"44 HuggingFaceH4/zephyr-7b-beta 274 🔶 \n",
"27 teknium/OpenHermes-2.5-Mistral-7B 291 🔶 \n",
"26 teknium/OpenHermes-2.5-Mistral-7B 291 🔶 \n",
"15 WizardLM/WizardLM-70B-V1.0 299 ⭕ \n",
"23 mistralai/Mistral-7B-v0.1 314 🟢 \n",
"8 Open-Orca/Mistral-7B-OpenOrca 342 🔶 \n",
"59 huggyllama/llama-30b 475 ? \n",
"0 Austism/chronos-hermes-13b-v2 523 🔶 \n",
"11 Undi95/ReMM-SLERP-L2-13B 527 🔶 \n",
"1 Gryphe/MythoMax-L2-13b 531 🔶 \n",
"5 NousResearch/Nous-Hermes-Llama2-13b 532 🔶 \n",
"19 lmsys/vicuna-13b-v1.5 563 🔶 \n",
"56 lmsys/vicuna-13b-v1.5 563 🔶 \n",
"18 lmsys/vicuna-13b-v1.5-16k 596 🔶 \n",
"21 mistralai/Mistral-7B-Instruct-v0.1 597 ⭕ \n",
"14 WizardLM/WizardLM-13B-V1.2 608 🔶 \n",
"50 NousResearch/Nous-Hermes-13b 641 🔶 \n",
"3 NousResearch/Nous-Capybara-7B 723 🔶 \n",
"20 lmsys/vicuna-7b-v1.5 761 🔶 \n",
"55 lmsys/vicuna-7b-v1.5 761 🔶 \n",
"7 NousResearch/Nous-Hermes-llama-2-7b 770 🔶 \n",
"6 NousResearch/Nous-Hermes-llama-2-7b 770 🔶 \n",
"57 huggyllama/llama-13b 792 🟢 \n",
"13 WizardLM/WizardCoder-Python-34B-V1.0 822 ⭕ \n",
"32 togethercomputer/Llama-2-7B-32K-Instruct 834 🔶 \n",
"41 togethercomputer/Llama-2-7B-32K-Instruct 834 🔶 \n",
"2 Nexusflow/NexusRaven-V2-13B 884 🔶 \n",
"40 togethercomputer/LLaMA-2-7B-32K 913 🔶 \n",
"31 togethercomputer/LLaMA-2-7B-32K 913 🔶 \n",
"51 huggyllama/llama-7b 930 ? \n",
"28 togethercomputer/GPT-JT-6B-v1 987 🔶 \n",
"30 togethercomputer/GPT-NeoXT-Chat-Base-20B 990 🔶 \n",
"36 togethercomputer/RedPajama-INCITE-7B-Instruct 1006 🔶 \n",
"29 togethercomputer/GPT-JT-Moderation-6B 1022 ? \n",
"49 EleutherAI/gpt-neox-20b 1026 🟢 \n",
"34 togethercomputer/RedPajama-INCITE-7B-Base 1031 🟢 \n",
"53 OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 1040 🔶 \n",
"54 EleutherAI/gpt-j-6b 1083 🟢 \n",
"33 togethercomputer/Pythia-Chat-Base-7B 1094 🔶 \n",
"38 togethercomputer/RedPajama-INCITE-Chat-3B-v1 1105 🔶 \n",
"47 databricks/dolly-v2-12b 1108 🔶 \n",
"35 togethercomputer/RedPajama-INCITE-7B-Chat 1111 🔶 \n",
"63 databricks/dolly-v2-7b 1115 🔶 \n",
"39 togethercomputer/RedPajama-INCITE-Instruct-3B-v1 1123 🔶 \n",
"52 EleutherAI/pythia-12b 1136 🟢 \n",
"45 EleutherAI/pythia-12b 1136 🟢 \n",
"37 togethercomputer/RedPajama-INCITE-Base-3B-v1 1144 🟢 \n",
"46 EleutherAI/pythia-6.7b 1157 🟢 \n",
"10 Phind/Phind-CodeLlama-34B-v2 1180 ⭕ \n",
"9 Phind/Phind-CodeLlama-34B-Python-v1 1198 🔶 \n",
"58 HuggingFaceH4/starchat-alpha 1218 ? \n",
"12 WizardLM/WizardCoder-15B-V1.0 1246 🔶 \n",
"60 stabilityai/stablelm-base-alpha-7b 1259 🟢 \n",
"61 stabilityai/stablelm-base-alpha-3b 1310 🟢 \n",
"62 bigcode/tiny_starcoder_py 1366 🟢 \n",
"48 databricks/dolly-v2-3b 1462 🔶 \n",
"\n",
" Average ⬆️ ARC HellaSwag MMLU TruthfulQA Winogrande GSM8K \\\n",
"42 74.20 71.08 88.16 66.21 71.43 83.58 64.75 \n",
"4 73.74 66.89 85.49 76.70 60.37 82.95 70.05 \n",
"24 72.62 70.22 87.63 71.16 64.58 81.37 60.73 \n",
"16 69.30 71.84 87.94 70.48 62.26 82.72 40.56 \n",
"25 68.89 64.93 84.92 64.62 52.15 80.74 65.96 \n",
"43 68.42 66.04 86.49 71.82 46.78 81.93 57.47 \n",
"22 65.71 63.14 84.88 60.78 68.26 77.19 40.03 \n",
"17 62.79 63.48 86.09 63.93 43.43 82.56 37.23 \n",
"44 61.95 62.03 84.36 61.07 57.45 77.74 29.04 \n",
"27 61.52 64.93 84.18 63.64 52.24 78.06 26.08 \n",
"26 61.52 64.93 84.18 63.64 52.24 78.06 26.08 \n",
"15 61.25 65.44 84.41 64.05 54.81 80.82 17.97 \n",
"23 60.97 59.98 83.31 64.16 42.15 78.37 37.83 \n",
"8 60.17 64.08 83.99 62.24 53.05 77.74 19.94 \n",
"59 56.96 61.43 84.73 58.45 42.27 80.03 14.86 \n",
"0 56.10 60.32 83.21 55.05 50.91 75.37 11.75 \n",
"11 56.03 60.92 83.56 55.33 51.97 75.22 9.17 \n",
"1 56.00 60.92 83.56 55.33 51.97 75.22 9.02 \n",
"5 55.97 61.52 83.29 55.11 50.38 75.45 10.08 \n",
"19 55.41 57.08 81.24 56.67 51.51 74.66 11.30 \n",
"56 55.41 57.08 81.24 56.67 51.51 74.66 11.30 \n",
"18 54.97 56.74 80.37 55.28 51.96 72.38 13.12 \n",
"21 54.96 54.52 75.63 55.38 56.28 73.72 14.25 \n",
"14 54.76 59.04 82.21 54.64 47.27 71.90 13.50 \n",
"50 54.04 56.57 82.11 50.44 51.50 75.30 8.34 \n",
"3 52.70 55.29 80.73 48.72 51.13 73.32 6.97 \n",
"20 52.06 53.24 77.39 51.04 50.34 72.14 8.19 \n",
"55 52.06 53.24 77.39 51.04 50.34 72.14 8.19 \n",
"7 51.87 55.12 78.94 48.34 49.01 74.03 5.76 \n",
"6 51.87 55.12 78.94 48.34 49.01 74.03 5.76 \n",
"57 51.33 56.14 80.92 47.61 39.48 76.24 7.58 \n",
"13 50.46 52.13 74.78 49.15 48.85 68.35 9.48 \n",
"32 50.02 51.11 78.51 46.11 44.86 73.88 5.69 \n",
"41 50.02 51.11 78.51 46.11 44.86 73.88 5.69 \n",
"2 48.21 45.14 67.40 44.88 44.54 66.38 20.92 \n",
"40 47.07 47.53 76.14 43.33 39.23 71.90 4.32 \n",
"31 47.07 47.53 76.14 43.33 39.23 71.90 4.32 \n",
"51 46.37 50.94 77.81 35.69 34.33 71.43 8.04 \n",
"28 43.13 40.87 67.15 47.19 37.07 65.27 1.21 \n",
"30 43.02 45.65 74.03 29.92 34.51 67.09 6.90 \n",
"36 42.38 44.11 72.02 37.62 33.96 64.96 1.59 \n",
"29 41.80 40.53 67.66 41.63 37.33 62.67 0.99 \n",
"49 41.69 45.73 73.45 25.00 31.61 68.90 5.46 \n",
"34 41.49 46.25 71.63 27.68 33.03 67.32 3.03 \n",
"53 41.31 45.73 68.59 26.82 37.81 65.90 3.03 \n",
"54 40.10 41.38 67.54 26.78 35.96 65.98 2.96 \n",
"33 39.81 40.02 68.67 27.44 34.63 64.01 4.09 \n",
"38 39.53 42.83 67.62 26.23 34.44 65.51 0.53 \n",
"47 39.46 42.41 72.53 25.92 33.83 60.85 1.21 \n",
"35 39.37 42.06 70.82 26.94 36.09 59.83 0.45 \n",
"63 39.24 44.54 69.64 25.18 34.88 60.06 1.14 \n",
"39 39.06 41.55 65.48 25.03 36.41 64.48 1.36 \n",
"52 38.82 39.59 68.82 26.76 31.85 64.17 1.74 \n",
"45 38.82 39.59 68.82 26.76 31.85 64.17 1.74 \n",
"37 38.54 40.19 64.77 27.03 33.23 64.72 1.29 \n",
"46 38.06 40.10 65.00 24.64 32.85 64.72 1.06 \n",
"10 36.89 24.57 27.60 25.76 48.37 71.82 23.20 \n",
"9 36.33 24.66 29.77 27.95 45.27 68.82 21.53 \n",
"58 35.49 31.57 49.43 30.76 43.66 55.09 2.43 \n",
"12 34.64 32.34 47.20 29.43 41.56 55.17 2.12 \n",
"60 34.37 32.00 51.78 26.21 40.19 55.41 0.61 \n",
"61 31.50 26.45 42.24 25.43 40.50 53.91 0.45 \n",
"62 29.41 20.99 28.77 26.79 47.68 51.22 0.99 \n",
"48 22.83 25.26 26.55 24.70 0.00 59.43 1.06 \n",
"\n",
" Type \n",
"42 RL-tuned \n",
"4 fine-tuned \n",
"24 instruction-tuned \n",
"16 fine-tuned \n",
"25 fine-tuned \n",
"43 pretrained \n",
"22 instruction-tuned \n",
"17 pretrained \n",
"44 fine-tuned \n",
"27 fine-tuned \n",
"26 fine-tuned \n",
"15 instruction-tuned \n",
"23 pretrained \n",
"8 fine-tuned \n",
"59 \n",
"0 fine-tuned \n",
"11 fine-tuned \n",
"1 fine-tuned \n",
"5 fine-tuned \n",
"19 fine-tuned \n",
"56 fine-tuned \n",
"18 fine-tuned \n",
"21 instruction-tuned \n",
"14 fine-tuned \n",
"50 fine-tuned \n",
"3 fine-tuned \n",
"20 fine-tuned \n",
"55 fine-tuned \n",
"7 fine-tuned \n",
"6 fine-tuned \n",
"57 pretrained \n",
"13 instruction-tuned \n",
"32 fine-tuned \n",
"41 fine-tuned \n",
"2 fine-tuned \n",
"40 fine-tuned \n",
"31 fine-tuned \n",
"51 \n",
"28 fine-tuned \n",
"30 fine-tuned \n",
"36 fine-tuned \n",
"29 \n",
"49 pretrained \n",
"34 pretrained \n",
"53 fine-tuned \n",
"54 pretrained \n",
"33 fine-tuned \n",
"38 fine-tuned \n",
"47 fine-tuned \n",
"35 fine-tuned \n",
"63 fine-tuned \n",
"39 fine-tuned \n",
"52 pretrained \n",
"45 pretrained \n",
"37 pretrained \n",
"46 pretrained \n",
"10 instruction-tuned \n",
"9 fine-tuned \n",
"58 \n",
"12 fine-tuned \n",
"60 pretrained \n",
"61 pretrained \n",
"62 pretrained \n",
"48 fine-tuned "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(results).sort_values(by=['openllm_rank'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment