Skip to content

Instantly share code, notes, and snippets.

@daskol
Created March 28, 2023 12:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daskol/7969f5fa24e3a7ac4027641683e807a9 to your computer and use it in GitHub Desktop.
Save daskol/7969f5fa24e3a7ac4027641683e807a9 to your computer and use it in GitHub Desktop.
Post-Train Quantization for llama.cpp in Python
import numpy as np
def pack(xs, dtype='q4_0'):
assert dtype == 'q4_0', 'Only quantized int4 type is supported.'
assert xs.size % 2 == 0, 'Only arrays of even length.'
# Estimate magnitude of array elements and its inverse.
amax = abs(xs).max()
magnitude = amax / 0b0111
precision = np.float32(1) / magnitude if magnitude else np.float32(0)
# Quantize elements.
xs = xs.flatten()
xs = (xs * precision).astype(np.int8) + 8
ys = xs[::2] | (xs[1::2] << 4)
# Append magnitude to the end of int8 array for unpacking.
footer = amax.tobytes() + magnitude.astype(np.float32).tobytes()
zs = np.frombuffer(footer, np.int8)
return np.hstack([ys, zs])
def unpack(xs: np.ndarray):
assert xs.ndim == 1, 'Only int8 sequences are supported.'
assert xs.size >= 8, 'Too short array.'
# Restore magnitude of quantization.
amax, magnitude = np.frombuffer(xs[-8:].tobytes(), np.float32)
# Restore sequence elements to array with stride 2 (interleaving).
xs = xs[:-8]
zs = np.zeros((xs.size, 2), np.float32)
zs[:, 0] = magnitude * ((xs & 0x0f) - 8)
zs[:, 1] = magnitude * (((xs & 0xf0) >> 4) - 8)
# Flatten array in order to restore sequence of elements.
return zs.flatten()
def test_pack_unpack():
xs = np.random.randn(100)
xs = np.arange(100)
xs -= xs.size // 2
print('original')
print(xs)
print('packed')
ys = pack(xs.astype(np.float32))
print(ys)
print('unpacked')
zs = unpack(ys)
print(zs)
print('absolute errors')
aerr = zs - xs
print(aerr)
print('relateive error')
rerr = np.linalg.norm(aerr) / np.linalg.norm(xs)
print(rerr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment