Blaze (Balázs Galambosi) gblazex

## openchat_3.5-Nous.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / openchat_3.5-Nous.md
            
            
              Created
              January 8, 2024 20:13
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


openchat_3.5
42.67
72.92
47.27
42.51
51.34


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
24.02
±
2.69


acc_norm
24.80
±
2.72


agieval_logiqa_en
0
acc
38.86
±
1.91


## mistral-ft-optimized-1218-Nous.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / mistral-ft-optimized-1218-Nous.md
            
            
              Created
              January 8, 2024 17:52
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


mistral-ft-optimized-1218
44.74
75.6
59.89
47.17
56.85


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
25.20
±
2.73


acc_norm
24.02
±
2.69


agieval_logiqa_en
0
acc
39.32
±
1.92


## zephyr-7b-beta-Nous.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / zephyr-7b-beta-Nous.md
            
            
              Created
              January 8, 2024 20:38
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


zephyr-7b-beta
37.33
71.83
55.1
39.7
50.99


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
21.26
±
2.57


acc_norm
20.47
±
2.54


agieval_logiqa_en
0
acc
33.33
±
1.85


## model_outputs.json
[
  {
    "dataset":"helpful_base",
    "instruction":"What are the names of some famous actors that started their careers on Broadway?",
    "output":"1. Meryl Streep\n2. Angela Lansbury\n3. Audra McDonald\n4. Bernadette Peters\n5. Idina Menzel\n6. Patti LuPone\n7. Hugh Jackman\n8. James Earl Jones\n9. Liza Minnelli\n10. Nathan Lane",
    "generator":"dolphin-2.2.1-mistral-7b"
  },
  {
    "dataset":"helpful_base",
    "instruction":"How did US states get their names?",

## neuronovo-7B-v0.2-Nous.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / neuronovo-7B-v0.2-Nous.md
            
            
              Created
              January 14, 2024 01:42
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


neuronovo-7B-v0.2
44.95
76.49
71.57
47.48
60.12


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
25.98
±
2.76


acc_norm
25.59
±
2.74


agieval_logiqa_en
0
acc
37.48
±
1.90


## distilabeled-Marcoro14-7B-slerp-Nous.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / distilabeled-Marcoro14-7B-slerp-Nous.md
            
            
              Last active
              January 13, 2024 23:04
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


distilabeled-Marcoro14-7B-slerp
45.38
76.48
65.68
48.18
58.93


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
27.56
±
2.81


acc_norm
25.98
±
2.76


agieval_logiqa_en
0
acc
39.17
±
1.91


## openchat-3.5-1210-Nous.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / openchat-3.5-1210-Nous.md
            
            
              Created
              January 10, 2024 04:00
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


openchat-3.5-1210
42.62
72.84
53.21
43.88
53.14


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
22.44
±
2.62


acc_norm
24.41
±
2.70


agieval_logiqa_en
0
acc
41.17
±
1.93


## MistralTrix-v1-Nous.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / MistralTrix-v1-Nous.md
            
            
              Created
              January 10, 2024 03:40
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


MistralTrix-v1
44.98
76.62
71.44
47.17
60.05


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
25.59
±
2.74


acc_norm
24.80
±
2.72


agieval_logiqa_en
0
acc
37.48
±
1.90


## Mistral-7B-Instruct-v0.2-Nous.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / Mistral-7B-Instruct-v0.2-Nous.md
            
            
              Created
              January 10, 2024 03:10
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


Mistral-7B-Instruct-v0.2
38.5
71.64
66.82
42.29
54.81


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
23.62
±
2.67


acc_norm
22.05
±
2.61


agieval_logiqa_en
0
acc
36.10
±
1.88


## dolphin-2.2.1-mistral-7b-Nous.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                gblazex
                / dolphin-2.2.1-mistral-7b-Nous.md
            
            
              Created
              January 9, 2024 18:16
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench
Average


dolphin-2.2.1-mistral-7b
38.64
72.24
54.09
39.22
51.05


AGIEval


Task
Version
Metric
Value

Stderr


agieval_aqua_rat
0
acc
23.23
±
2.65


acc_norm
21.26
±
2.57


agieval_logiqa_en
0
acc
35.48
±
1.88
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	24.02	±	2.69
		acc_norm	24.80	±	2.72
agieval_logiqa_en	0	acc	38.86	±	1.91
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	25.20	±	2.73
		acc_norm	24.02	±	2.69
agieval_logiqa_en	0	acc	39.32	±	1.92
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	21.26	±	2.57
		acc_norm	20.47	±	2.54
agieval_logiqa_en	0	acc	33.33	±	1.85
	[
	{
	"dataset":"helpful_base",
	"instruction":"What are the names of some famous actors that started their careers on Broadway?",
	"output":"1. Meryl Streep\n2. Angela Lansbury\n3. Audra McDonald\n4. Bernadette Peters\n5. Idina Menzel\n6. Patti LuPone\n7. Hugh Jackman\n8. James Earl Jones\n9. Liza Minnelli\n10. Nathan Lane",
	"generator":"dolphin-2.2.1-mistral-7b"
	},
	{
	"dataset":"helpful_base",
	"instruction":"How did US states get their names?",
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	25.98	±	2.76
		acc_norm	25.59	±	2.74
agieval_logiqa_en	0	acc	37.48	±	1.90
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	27.56	±	2.81
		acc_norm	25.98	±	2.76
agieval_logiqa_en	0	acc	39.17	±	1.91
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	22.44	±	2.62
		acc_norm	24.41	±	2.70
agieval_logiqa_en	0	acc	41.17	±	1.93
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	23.62	±	2.67
		acc_norm	22.05	±	2.61
agieval_logiqa_en	0	acc	36.10	±	1.88
Task	Version	Metric	Value		Stderr
agieval_aqua_rat	0	acc	23.23	±	2.65
		acc_norm	21.26	±	2.57
agieval_logiqa_en	0	acc	35.48	±	1.88