Skip to content

Instantly share code, notes, and snippets.

@bdashore3
Created September 14, 2023 03:56
Show Gist options
  • Save bdashore3/41ce50c1889020cfd9dfe79099d99132 to your computer and use it in GitHub Desktop.
Save bdashore3/41ce50c1889020cfd9dfe79099d99132 to your computer and use it in GitHub Desktop.
Pyg-13b-supercot2-measurement
This file has been truncated, but you can view the full file.
{
"measurement": [
{
"key": "model.layers.0.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.01824951171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0180511474609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.009307861328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0092620849609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0279388427734375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.01788330078125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.00928497314453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.00922393798828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0094757080078125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.01071929931640625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.00921630859375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.00672149658203125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0059967041015625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0059967041015625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00586700439453125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0055694580078125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.005756378173828125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.0.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.01971435546875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0194091796875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0091094970703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.00902557373046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.02423095703125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.019195556640625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.00907135009765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.00899505615234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.00925445556640625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.009979248046875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.00896453857421875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.00547027587890625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0042877197265625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0042877197265625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00394439697265625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00336456298828125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.003856658935546875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.0.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.06103515625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.050872802734375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.033966064453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0271453857421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.050811767578125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.039276123046875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0289459228515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0223388671875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0231170654296875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.02703857421875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0190277099609375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01351165771484375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.007659912109375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.006824493408203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.007015228271484375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.004070281982421875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00460052490234375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.0.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.0221405029296875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0130462646484375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.00966644287109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.009185791015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.013214111328125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0116729736328125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.01013946533203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.006069183349609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.00634765625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.006702423095703125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.005809783935546875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0035686492919921875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.003139495849609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.00274658203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0022945404052734375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0020351409912109375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00241851806640625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.0.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.07269287109375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.0677490234375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.0596923828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.032470703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.036285400390625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.0335693359375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.032958984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.0294189453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.02789306640625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.018402099609375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.01617431640625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.00928497314453125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.00860595703125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.00806427001953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.00522613525390625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.00507354736328125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.004730224609375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.0.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1103515625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.10302734375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.09088134765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.04925537109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.054962158203125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.05078125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.0499267578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.044586181640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.042236328125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.0277099609375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.0240631103515625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.01381683349609375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.01216888427734375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.01128387451171875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.007366180419921875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.00714111328125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.0052642822265625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.0.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.0623779296875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.050933837890625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.031829833984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.0253753662109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.04730224609375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.041656494140625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.02734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.0214996337890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.0213775634765625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.02093505859375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.01885986328125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.01143646240234375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.00865936279296875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.00812530517578125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.007663726806640625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0063018798828125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.00634002685546875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.1.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.0157470703125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0124969482421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.008453369140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.006744384765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.01219940185546875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.00969696044921875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.00732421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.005523681640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.005878448486328125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.006450653076171875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.00478363037109375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0033206939697265625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.001903533935546875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0016984939575195312,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0017747879028320312,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0011072158813476562,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.001140594482421875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.1.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.01482391357421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.01154327392578125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0078277587890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.00620269775390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.01006317138671875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.00884246826171875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.006801605224609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.00496673583984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.00506591796875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.005096435546875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.004230499267578125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.002567291259765625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0017156600952148438,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0015039443969726562,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0013866424560546875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0009746551513671875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.001010894775390625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.1.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.083251953125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0701904296875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0555419921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.037200927734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.04620361328125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.04241943359375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.03912353515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0301055908203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0280914306640625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.02362060546875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0203399658203125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01183319091796875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.009490966796875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.00811004638671875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00626373291015625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.005428314208984375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0044403076171875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.1.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.147705078125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.09344482421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.06488037109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.06329345703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.089111328125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0794677734375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0704345703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.04052734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.04290771484375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.04595947265625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0391845703125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0236358642578125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01898193359375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0154876708984375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01372528076171875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01151275634765625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.01265716552734375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.1.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.11834716796875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.11199951171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.0999755859375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.05230712890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.0577392578125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.053375244140625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.052825927734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.04815673828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.045928955078125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.029205322265625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.025421142578125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0146484375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.01336669921875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0126190185546875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.008148193359375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.00800323486328125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.006740570068359375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.1.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1494140625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.1416015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1270751953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.0662841796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.0728759765625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.0673828125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.06683349609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.061065673828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.058197021484375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.036468505859375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.03173828125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.01812744140625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.015869140625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.01486968994140625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.00940704345703125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.00923919677734375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.00586700439453125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.1.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.1392822265625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.1268310546875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.10498046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.06683349609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.0888671875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.080078125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.068359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.061676025390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.05914306640625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.0494384765625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.046875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.022369384765625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.01727294921875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.0164031982421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.01434326171875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.011932373046875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.010589599609375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.2.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.0352783203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.032318115234375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0275726318359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.015869140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.020263671875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0171661376953125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0161895751953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.01401519775390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0133056640625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.01025390625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.00821685791015625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.005153656005859375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.00399017333984375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0036296844482421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0027408599853515625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0023708343505859375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.001804351806640625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.2.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.02862548828125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.026153564453125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.02227783203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0128326416015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.01509857177734375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0138092041015625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.01309967041015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.01129150390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.01064300537109375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.00760650634765625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.006557464599609375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.00380706787109375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0031986236572265625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.002895355224609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.002017974853515625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0018644332885742188,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0013942718505859375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.2.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.1324462890625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.12310791015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.10809326171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.060577392578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.06781005859375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.06268310546875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0616455078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.05419921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0511474609375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.034271240234375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0298004150390625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01708984375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0146636962890625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.01336669921875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00881195068359375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00847625732421875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00542449951171875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.2.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.1640625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.13232421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.09552001953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.07281494140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.09063720703125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.08197021484375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.07757568359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0540771484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.04974365234375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.04681396484375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0401611328125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.024017333984375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.020538330078125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0173492431640625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0141143798828125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.012969970703125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.012542724609375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.2.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.165283203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.1566162109375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1409912109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.0753173828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.08306884765625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.0767822265625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.07611083984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.0693359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.0665283203125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.042266845703125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.03656005859375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.021148681640625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0187835693359375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0176239013671875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0114593505859375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01128387451171875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.00836181640625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.2.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2005615234375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.190185546875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1715087890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.09149169921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.1009521484375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.09320068359375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.09246826171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.084228515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.08087158203125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.050994873046875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.044189453125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0254364013671875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0221710205078125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0207061767578125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0133514404296875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01311492919921875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.0083465576171875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.2.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.1937255859375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.177734375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.1566162109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.086669921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.099853515625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.091796875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.0887451171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.0780029296875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.07501220703125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.05047607421875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.04364013671875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.0252685546875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.0216064453125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.01995849609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.01361083984375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0129241943359375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.00957489013671875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.3.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.048248291015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.04449462890625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.03826904296875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0219879150390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.026580810546875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0236968994140625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.02239990234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.01953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.018646240234375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0136260986328125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.011383056640625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.00688934326171875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0055084228515625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.005031585693359375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.003665924072265625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0032711029052734375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0024318695068359375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.3.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.039794921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.03668212890625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.031494140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.018096923828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0213775634765625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.01947021484375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0184478759765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.01605224609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.01525115966796875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.01081085205078125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0092926025390625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0054168701171875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.00455474853515625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.00415802001953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0028839111328125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00267791748046875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.002056121826171875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.3.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.177978515625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1658935546875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.14599609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0821533203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.092529296875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.08489990234375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.08349609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.073486328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0697021484375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.046875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0404052734375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0233612060546875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.019927978515625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0181884765625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01212310791015625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01165008544921875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.007381439208984375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.3.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.1297607421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.11181640625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0806884765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.059112548828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0823974609375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0704345703125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.061309814453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.04779052734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.04315185546875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.042266845703125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.035125732421875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0225372314453125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01837158203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0165863037109375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0139007568359375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.012176513671875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.01279449462890625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.3.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1842041015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.173828125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.156005859375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.085693359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.0947265625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.08758544921875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.086669921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.0782470703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.0748291015625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.04852294921875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.042266845703125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0243377685546875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.02215576171875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.020751953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.013458251953125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.0131988525390625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01114654541015625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.3.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2257080078125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.2132568359375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.19140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.10479736328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.11590576171875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.10699462890625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.10601806640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.09564208984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.09136962890625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.058868408203125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.05108642578125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0294189453125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0258636474609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0240020751953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0156402587890625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01531219482421875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01067352294921875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.3.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.0265045166015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.0234375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.0204925537109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.0140228271484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.0159759521484375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.01415252685546875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.01445770263671875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.012420654296875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.01241302490234375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.01134490966796875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.00939178466796875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.00385284423828125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.0035552978515625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.00324249267578125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.0025882720947265625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0020313262939453125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.0017747879028320312,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.4.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.06341552734375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.058074951171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0499267578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.028900146484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.033447265625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.03082275390625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0295257568359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0254364013671875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.024017333984375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.01690673828125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.014678955078125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.00844573974609375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.00714874267578125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.006465911865234375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00441741943359375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0041351318359375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.002986907958984375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.4.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.056243896484375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.051513671875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.044189453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.025634765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0298919677734375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0274505615234375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0261688232421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0225677490234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0213470458984375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.01514434814453125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.01306915283203125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.007572174072265625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.00634002685546875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.005725860595703125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0039520263671875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0036792755126953125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00261688232421875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.4.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.1845703125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.171630859375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.150634765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0855712890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0960693359375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.08880615234375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.08709716796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0762939453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.07196044921875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0487060546875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.042205810546875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.024261474609375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.020782470703125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0188751220703125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0124969482421875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01200103759765625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.007648468017578125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.4.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.183349609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1605224609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1302490234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0855712890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.10113525390625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0927734375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.08807373046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.06402587890625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.052459716796875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.045135498046875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.026580810546875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0229034423828125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.020050048828125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01494598388671875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0139312744140625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.012451171875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.4.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.19482421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.1837158203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1641845703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.0908203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.1002197265625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.0927734375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.09185791015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.0823974609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.07830810546875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.05084228515625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.044219970703125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0253753662109375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0222930908203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.020599365234375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.01329803466796875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.0130157470703125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.00891876220703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.4.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2364501953125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.2227783203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.199462890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.1102294921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.12139892578125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11248779296875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.11151123046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.09991455078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.09503173828125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.061553955078125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.05364990234375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.030670166015625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0266571044921875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.02459716796875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.01580810546875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01544952392578125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.009613037109375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.4.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.2342529296875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.212158203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.1829833984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.10687255859375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.1224365234375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.11285400390625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.109619140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.09356689453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.088134765625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.06219482421875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.053863525390625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.031097412109375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.026580810546875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.02398681640625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.0166473388671875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.015777587890625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.01143646240234375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.5.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.07391357421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0677490234375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.058013916015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.033782958984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0391845703125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.036224365234375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.03448486328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.029693603515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.027984619140625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.01983642578125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.01727294921875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.00992584228515625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.00844573974609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.00763702392578125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00522613525390625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0048675537109375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.003681182861328125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.5.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.0643310546875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.058929443359375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0504150390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0294342041015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.03460693359375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.031768798828125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.030059814453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0258941650390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0245361328125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0175323486328125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.01513671875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.008758544921875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.00732421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.006622314453125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.004596710205078125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0042572021484375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0031280517578125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.5.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.195068359375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.180908203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1585693359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.090576171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.10198974609375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.093994140625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.09210205078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.08050537109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.07598876953125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.051727294921875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0447998046875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0258026123046875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.02203369140625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.01995849609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0133209228515625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01275634765625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0081634521484375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.5.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.18115234375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1588134765625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.12445068359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.08355712890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0982666015625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0899658203125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.08648681640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0679931640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.06005859375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.050689697265625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.043731689453125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.025726318359375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0225067138671875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0196533203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01458740234375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01374053955078125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.01244354248046875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.5.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1812744140625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.1705322265625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.152099609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.0845947265625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.09375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.086669921875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.085693359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.0765380859375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.07275390625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.0477294921875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.04150390625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.023834228515625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.02105712890625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0194549560546875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.01264190673828125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.0123443603515625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.009002685546875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.5.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2415771484375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.2274169921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.202880859375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.11285400390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.12493896484375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11553955078125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.11431884765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.10205078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.09698486328125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.06329345703125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.05511474609375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.03155517578125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0273590087890625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.025146484375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.016326904296875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01593017578125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.00989532470703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.5.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.232421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.21044921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.1802978515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.106201171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.12213134765625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.1126708984375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.10919189453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.09271240234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.08721923828125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.06207275390625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.05389404296875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.031097412109375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.02667236328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.0240478515625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.0167236328125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0158233642578125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.01187896728515625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.6.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.08935546875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0828857421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.07208251953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.041351318359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.04718017578125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.043609619140625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.042083740234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0367431640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.034698486328125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.023956298828125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.020843505859375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01198577880859375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0103302001953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0094146728515625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0063323974609375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00598907470703125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00447845458984375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.6.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.074951171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0694580078125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.06036376953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.03466796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0400390625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0367431640625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.035247802734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.030853271484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.029266357421875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0203094482421875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0175323486328125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01016998291015625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.008575439453125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0078277587890625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.005329132080078125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00502777099609375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00356292724609375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.6.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2110595703125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1968994140625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1734619140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.09844970703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.1103515625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.101806640625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.09991455078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.087890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.08306884765625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.055938720703125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.048492431640625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.027862548828125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.02386474609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.021697998046875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01433563232421875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01378631591796875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0086669921875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.6.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.18505859375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1563720703125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1229248046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.08514404296875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.1029052734375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.09381103515625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0882568359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.06683349609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0611572265625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0528564453125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0455322265625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0265960693359375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0228118896484375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0194854736328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01488494873046875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01367950439453125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0124664306640625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.6.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1761474609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.1651611328125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1473388671875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.08233642578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.09124755859375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.08441162109375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.08349609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.0743408203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.07049560546875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.04638671875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.04034423828125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0231475830078125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0202484130859375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.018646240234375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.01214599609375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.0118255615234375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.00811767578125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.6.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2440185546875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.2294921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.2047119140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.1142578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.1265869140625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.1170654296875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.11572265625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.103271484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.097900390625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.06427001953125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.055877685546875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.032073974609375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.027740478515625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0254669189453125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.016632080078125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.016204833984375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01020050048828125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.6.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.2381591796875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.2149658203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.1845703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.1090087890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.125732421875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.11572265625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.112060546875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.09490966796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.0892333984375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.0638427734375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.055328369140625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.032012939453125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.0273284912109375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.0245819091796875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.0171966552734375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0162506103515625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.01218414306640625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.7.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.08746337890625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.08111572265625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.070556640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.040435791015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0462646484375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.042694091796875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.041107177734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0360107421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0340576171875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0234832763671875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0204010009765625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01174163818359375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.010009765625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0091094970703125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0061492919921875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00579833984375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.004123687744140625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.7.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.07427978515625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.06903076171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.05987548828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.034332275390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0396728515625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.03662109375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.03497314453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.03057861328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.02899169921875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.020111083984375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0174713134765625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01004791259765625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0084991455078125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.007740020751953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0052490234375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.004913330078125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00348663330078125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.7.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.207763671875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.19384765625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1707763671875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.09710693359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.10906982421875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.10040283203125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.09857177734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.086669921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.08197021484375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.055419921875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0478515625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.027618408203125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0235595703125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.02142333984375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01422119140625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01361846923828125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00860595703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.7.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.20263671875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1768798828125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1416015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.09271240234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.11114501953125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.10137939453125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.096923828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.07684326171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0692138671875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.05731201171875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.04949951171875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0289764404296875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0251922607421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0223541259765625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0165252685546875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01534271240234375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.01428985595703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.7.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1634521484375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.153564453125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.136474609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.0765380859375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.0849609375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.07867431640625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.07757568359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.069091796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.0655517578125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.043212890625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.037628173828125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.021575927734375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0189361572265625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.017425537109375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0113372802734375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.0110321044921875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.007778167724609375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.7.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2384033203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.22412109375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.19970703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.1119384765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.1240234375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11474609375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.1134033203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.10107421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.09588623046875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.06317138671875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.054901123046875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.031524658203125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.027618408203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0254058837890625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0165557861328125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.0161285400390625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01123046875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.7.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.2279052734375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.2059326171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.1761474609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.104248046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.12139892578125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.11090087890625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.107177734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.09088134765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.08538818359375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.06146240234375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.05322265625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.031005859375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.026702880859375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.024139404296875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.017059326171875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0161285400390625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.01287078857421875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.8.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.09759521484375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.09075927734375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0791015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.045166015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.051605224609375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.047637939453125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.045928955078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.040252685546875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0380859375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0261993408203125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.022735595703125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01308441162109375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0111846923828125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.01018524169921875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.006862640380859375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.006488800048828125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0046234130859375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.8.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.081298828125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0755615234375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.06561279296875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.037628173828125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0433349609375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.039947509765625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.0382080078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.033538818359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.031768798828125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.021942138671875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.019073486328125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0109710693359375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0092926025390625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.00848388671875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0057373046875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.005401611328125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0038242340087890625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.8.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2215576171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.20654296875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.18212890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.10333251953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.116455078125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.10699462890625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.10504150390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.09246826171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.08758544921875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.059112548828125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.051025390625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0294952392578125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.025054931640625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0228271484375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01519012451171875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0145263671875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0091094970703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.8.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.1998291015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.17626953125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.142822265625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.0927734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.10797119140625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.09906005859375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.09539794921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.07635498046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.06878662109375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.055816650390625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.048370361328125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.028289794921875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0249786376953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0219879150390625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0161590576171875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01532745361328125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.01380157470703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.8.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1702880859375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.16015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1424560546875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.08001708984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.08880615234375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.0821533203125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.08099365234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.07220458984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.068603515625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.04534912109375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.039398193359375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.02264404296875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0200042724609375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0184478759765625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0120391845703125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01171112060546875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.0087432861328125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.8.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.238037109375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.223876953125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.199462890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.11163330078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.1239013671875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11474609375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.1131591796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.1007080078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.095703125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.0628662109375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.054718017578125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0313720703125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0271759033203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0249176025390625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0162811279296875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01580810546875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01004791259765625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.8.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.2406005859375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.2171630859375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.18603515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.1102294921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.12744140625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.11712646484375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.11322021484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.095947265625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.090087890625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.06475830078125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.05621337890625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.032562255859375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.0280303955078125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.025299072265625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.0176544189453125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0166778564453125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.013153076171875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.9.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.10589599609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.09832763671875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.08599853515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.04913330078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.056182861328125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0517578125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.049957275390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.043853759765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.041534423828125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.028594970703125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0248565673828125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01432037353515625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.012451171875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.01142120361328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.007640838623046875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00724029541015625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.005733489990234375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.9.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.091064453125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.08477783203125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.07391357421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.042266845703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.048583984375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0447998046875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.042938232421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.037750244140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.035858154296875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.024658203125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.021392822265625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0123291015625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01047515869140625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0095672607421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0064697265625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.006092071533203125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.004352569580078125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.9.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2197265625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.205322265625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.180908203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.10272216796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.1158447265625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.10614013671875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.1041259765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0919189453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.08709716796875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.058807373046875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.050689697265625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.029327392578125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.024993896484375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.02276611328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01515960693359375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.014495849609375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0092620849609375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.9.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2117919921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1829833984375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.144775390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.09722900390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.11602783203125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.1063232421875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.10089111328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.078857421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.07110595703125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.05963134765625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.05206298828125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.030303955078125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.02703857421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0237579345703125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0174407958984375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0162811279296875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0160064697265625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.9.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.175537109375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.1650390625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.14697265625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.08245849609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.09161376953125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.0848388671875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.08355712890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.074462890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.07061767578125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.04669189453125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.040679931640625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.023345947265625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.020599365234375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.01898193359375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0123748779296875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01202392578125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.00890350341796875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.9.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.241943359375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.227294921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.202392578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.11328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.1258544921875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11639404296875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.1148681640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.10235595703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.0970458984375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.06390380859375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.055572509765625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0318603515625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.027557373046875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.025299072265625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0165252685546875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.016021728515625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01016998291015625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.9.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.2476806640625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.2236328125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.191162109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.11370849609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.1314697265625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.12091064453125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.116943359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.09881591796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.09271240234375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.06707763671875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.058074951171875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.033660888671875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.029022216796875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.026153564453125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.0184173583984375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0173492431640625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.0137939453125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.10.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.11126708984375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1036376953125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0904541015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.05169677734375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.058868408203125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.0543212890625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.052490234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0460205078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0435791015625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.029937744140625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0260009765625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0149688720703125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01287078857421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.011749267578125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00787353515625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00746917724609375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0054931640625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.10.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.09185791015625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.08551025390625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.074462890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.042510986328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.048980712890625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.045257568359375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.043243408203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0379638671875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.035980224609375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0247955322265625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0215911865234375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01241302490234375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0105438232421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.009613037109375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.006500244140625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00611114501953125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.004367828369140625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.10.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2200927734375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.20556640625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1806640625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.1026611328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.11590576171875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.10687255859375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.10418701171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.09185791015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0867919921875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.05877685546875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.050994873046875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.029296875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.024993896484375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.02276611328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01512908935546875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01445770263671875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00933074951171875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.10.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2244873046875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1995849609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1636962890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.10394287109375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.12164306640625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.11199951171875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.1072998046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.08740234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.079833984375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0628662109375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0545654296875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.031829833984375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0281219482421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.02508544921875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.01806640625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0170440673828125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0157470703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.10.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.170654296875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.160400390625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1424560546875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.08026123046875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.08935546875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.08251953125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.08135986328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.072265625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.06866455078125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.04559326171875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.039642333984375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0228271484375,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0201873779296875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.018646240234375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.01224517822265625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01190185546875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.0090789794921875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.10.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2392578125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.224609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.199951171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.112060546875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.12457275390625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11505126953125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.11370849609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.10101318359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.09576416015625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.063232421875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.054931640625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.031524658203125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0272674560546875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.024993896484375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.016357421875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.015899658203125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01006317138671875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.10.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.2509765625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.22705078125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.194091796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.11529541015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.1322021484375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.121826171875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.11834716796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.10015869140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.0938720703125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.0673828125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.058380126953125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.03387451171875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.029266357421875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.0263671875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.01849365234375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0175628662109375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.013641357421875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.11.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.1094970703125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.101806640625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.08880615234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.05078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0579833984375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.05352783203125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.051605224609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.045318603515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.042877197265625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.02947998046875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0255889892578125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0147247314453125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01256561279296875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.01146697998046875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.00772857666015625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.007297515869140625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0052032470703125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.11.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.09210205078125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.08575439453125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.07464599609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.042694091796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.0491943359375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.045440673828125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.04345703125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.038177490234375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.036224365234375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0249176025390625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0217132568359375,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01245880126953125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01055145263671875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0096435546875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.006519317626953125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.006130218505859375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.004314422607421875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.11.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2216796875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.2069091796875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.181884765625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.10357666015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.1170654296875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.10772705078125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.1051025390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.092529296875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.08734130859375,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.059326171875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0513916015625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.02960205078125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.02520751953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0229644775390625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.015289306640625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.01457977294921875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.00942230224609375,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.11.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2191162109375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.1954345703125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.16015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.10125732421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.12371826171875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.11322021484375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.1053466796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.08642578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.080078125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0634765625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.05499267578125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.03192138671875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.027191162109375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.024444580078125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0176544189453125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0160064697265625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.01493072509765625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.11.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.171875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.16162109375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.143310546875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.08087158203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.09002685546875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.08331298828125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.08197021484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.07275390625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.069091796875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.045989990234375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.0400390625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.02301025390625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0204620361328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.01885986328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0123443603515625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01197052001953125,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.00933837890625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.11.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.2381591796875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.2236328125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.1986083984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.1116943359375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.124267578125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11480712890625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.11322021484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.10040283203125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.09527587890625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.06298828125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.054840087890625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.03143310546875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0272216796875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0249481201171875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.016265869140625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.015777587890625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01018524169921875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.11.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.254150390625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.229736328125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.19580078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.1168212890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.13427734375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.0,
"err": 0.12384033203125,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1715892650462965,
"total_bits": 224481536.0,
"err": 0.1201171875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.523441116898148,
"total_bits": 249385216.0,
"err": 0.10137939453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6644983362268517,
"total_bits": 259369088.0,
"err": 0.0947265625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 285332544.0,
"err": 0.068603515625,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 291983616.0,
"err": 0.059478759765625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 356111424.0,
"err": 0.03448486328125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.227144820601852,
"total_bits": 369971456.0,
"err": 0.0296478271484375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.523441116898148,
"total_bits": 390942976.0,
"err": 0.026611328125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 426890304.0,
"err": 0.0188140869140625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.235026945891204,
"total_bits": 441308224.0,
"err": 0.0178375244140625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 575099136.0,
"err": 0.01373291015625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.12.self_attn.q_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.1148681640625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.10687255859375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.09344482421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.053375244140625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.060760498046875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.05621337890625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.054229736328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.047637939453125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.04498291015625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.030853271484375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.026824951171875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.01541900634765625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01316070556640625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.01200103759765625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0080413818359375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0076141357421875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0052947998046875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.12.self_attn.k_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.09600830078125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.0894775390625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.0780029296875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.04461669921875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.051300048828125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.047454833984375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.045379638671875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.039886474609375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.0377197265625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.0259552001953125,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.02264404296875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.012969970703125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.01099395751953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.01003265380859375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.006778717041015625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.00637054443359375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.004428863525390625,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.12.self_attn.v_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.2298583984375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.2144775390625,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.1881103515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.10736083984375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.12158203125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.11187744140625,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.10906982421875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.09600830078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.09063720703125,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.061737060546875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.053497314453125,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0308074951171875,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0262451171875,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0239410400390625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0159912109375,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.0152435302734375,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.0100555419921875,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.12.self_attn.o_proj",
"numel": 26214400,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.17529296875,
"total_bits": 57024000.0,
"err": 0.230224609375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 62266880.00000001,
"err": 0.206298828125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.72529296875,
"total_bits": 71441920.0,
"err": 0.171142578125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.22529296875,
"total_bits": 84549120.0,
"err": 0.10699462890625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 79464320.0,
"err": 0.125244140625,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 81927680.0,
"err": 0.1146240234375,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.17529296875,
"total_bits": 83238400.0,
"err": 0.1099853515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.52529296875,
"total_bits": 92413440.0,
"err": 0.0904541015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.662646484375,
"total_bits": 96014080.0,
"err": 0.08355712890625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.0313232421875,
"total_bits": 105678720.0,
"err": 0.064208984375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.12529296875,
"total_bits": 108142080.0,
"err": 0.0555419921875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.0313232421875,
"total_bits": 131893120.0,
"err": 0.0323486328125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.22529296875,
"total_bits": 136977920.0,
"err": 0.0283203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.52529296875,
"total_bits": 144842240.0,
"err": 0.0252227783203125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.0313232421875,
"total_bits": 158107520.0,
"err": 0.0180206298828125,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.2313232421875,
"total_bits": 163350400.0,
"err": 0.016998291015625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.12529296875,
"total_bits": 212999679.99999997,
"err": 0.01508331298828125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.12.mlp.gate_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.1766357421875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.165771484375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.147216796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.08319091796875,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.0926513671875,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.08563232421875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.0843505859375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.0748291015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.07098388671875,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.04730224609375,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.04119873046875,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.0236968994140625,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0210723876953125,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.019439697265625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0127410888671875,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.01236724853515625,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.0097198486328125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.12.mlp.up_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1751085069444445,
"total_bits": 153951744.0,
"err": 0.24169921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.3751085069444446,
"total_bits": 168107520.0,
"err": 0.22705078125,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.7251085069444443,
"total_bits": 192880128.0,
"err": 0.20166015625,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.2251085069444443,
"total_bits": 228269568.0,
"err": 0.11334228515625,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.031277126736111,
"total_bits": 214550400.0,
"err": 0.126220703125,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.1251085069444446,
"total_bits": 221191680.0,
"err": 0.11669921875,
"qparams": {
"group_size": 32,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.05:4b/0.95:3b 32g s4",
"bpw": 3.1751085069444445,
"total_bits": 224730624.0,
"err": 0.11505126953125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.4:4b/0.6:3b 32g s4",
"bpw": 3.5251085069444446,
"total_bits": 249503232.0,
"err": 0.10205078125,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "0.6:4b/0.4:3b 64g s4",
"bpw": 3.6625542534722224,
"total_bits": 259231488.0,
"err": 0.09674072265625,
"qparams": {
"group_size": 64,
"bits": [
4,
3
],
"bits_prop": [
0.6,
0.4
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 128g s4",
"bpw": 4.031277126736111,
"total_bits": 285329280.0,
"err": 0.06414794921875,
"qparams": {
"group_size": 128,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:4b 32g s4",
"bpw": 4.125108506944445,
"total_bits": 291970560.0,
"err": 0.0556640625,
"qparams": {
"group_size": 32,
"bits": [
4
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:5b 128g s4",
"bpw": 5.031277126736111,
"total_bits": 356108160.0,
"err": 0.032012939453125,
"qparams": {
"group_size": 128,
"bits": [
5
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:6b/0.9:5b 32g s4",
"bpw": 5.225108506944444,
"total_bits": 369827328.0,
"err": 0.0276031494140625,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "0.4:6b/0.6:5b 32g s4",
"bpw": 5.525108506944444,
"total_bits": 391060992.0,
"err": 0.0252532958984375,
"qparams": {
"group_size": 32,
"bits": [
6,
5
],
"bits_prop": [
0.4,
0.6
],
"scale_bits": 4
}
},
{
"desc": "1.0:6b 128g s4",
"bpw": 6.031277126736111,
"total_bits": 426887040.0,
"err": 0.0166015625,
"qparams": {
"group_size": 128,
"bits": [
6
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "0.1:8b/0.9:6b 128g s4",
"bpw": 6.231277126736111,
"total_bits": 441042816.0,
"err": 0.016082763671875,
"qparams": {
"group_size": 128,
"bits": [
8,
6
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:8b 32g s4",
"bpw": 8.125108506944445,
"total_bits": 575086080.0,
"err": 0.01020050048828125,
"qparams": {
"group_size": 32,
"bits": [
8
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
}
]
},
{
"key": "model.layers.12.mlp.down_proj",
"numel": 70778880,
"options": [
{
"desc": "0.05:3b/0.95:2b 32g s4",
"bpw": 2.1715892650462965,
"total_bits": 153702656.0,
"err": 0.258544921875,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.05,
0.95
],
"scale_bits": 4
}
},
{
"desc": "0.25:3b/0.75:2b 32g s4",
"bpw": 2.37529296875,
"total_bits": 168120576.0,
"err": 0.2333984375,
"qparams": {
"group_size": 32,
"bits": [
3,
2
],
"bits_prop": [
0.25,
0.75
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4",
"bpw": 2.727144820601852,
"total_bits": 193024256.0,
"err": 0.198486328125,
"qparams": {
"group_size": 32,
"bits": [
4,
3,
2
],
"bits_prop": [
0.1,
0.4,
0.5
],
"scale_bits": 4
}
},
{
"desc": "0.1:4b/0.9:3b 32g s4",
"bpw": 3.227144820601852,
"total_bits": 228413696.0,
"err": 0.118896484375,
"qparams": {
"group_size": 32,
"bits": [
4,
3
],
"bits_prop": [
0.1,
0.9
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 128g s4",
"bpw": 3.0313232421875,
"total_bits": 214553664.0,
"err": 0.136474609375,
"qparams": {
"group_size": 128,
"bits": [
3
],
"bits_prop": [
1.0
],
"scale_bits": 4
}
},
{
"desc": "1.0:3b 32g s4",
"bpw": 3.12529296875,
"total_bits": 221204736.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment