aminnj/embeddingsearch.py

## embeddingsearch.py
import functools
import jax
import numpy as np
import jax.numpy as jnp
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
model = model.to("cpu")

sentences = [
    "This is the first sentence.",
    "Here is another sentence.",
    "Cats are red",
    "Dogs are blue",
    "Venus is square",
    "Math is hard",
    "English is also hard",
    "Need another sentence",
    "Almost there.",
    "Tenth sentence is the last one",
]

dbvecs = model.encode(sentences, precision="ubinary")

# pack uint8s into uint32 (4x reduction in vec size)
# since the popcount operation is a 64bit operation, this gives a 4x speedup
# we would want to pack it into uint64, but jax doesn't support it
dbvecs = dbvecs.view("uint32")

dbvecs = jnp.array(dbvecs)

# simulate having 1M vectors to search
dbvecs = jnp.vstack([dbvecs]*100_000)
sentences = sentences*100_000

@functools.partial(jax.jit, static_argnames=["k", "recall_target"])
def get_nearest_k(qvec, dbvecs, k=5, recall_target=0.95):
    xor_result = jax.lax.bitwise_xor(qvec, dbvecs)

    # Compute the population count (number of 1 bits) and sum along the last axis
    dists = jax.lax.population_count(xor_result).sum(axis=-1)
    dists = dists.astype(jnp.float32)

    # min was slow for some reason, so using max with flipped distances
    dists, indices = jax.lax.approx_max_k(-dists, k=k, recall_target=recall_target)
    return -dists, indices

t0 = time.time()
qvec = model.encode(["Difficult school subject"], precision="ubinary")
qvec = qvec.view("uint32")
qvec = jnp.array(qvec)
t1 = time.time()
print(f"Encoded query string into vector in {(t1-t0)*1000:.1f}ms")

# warmup
_ = get_nearest_k(qvec, dbvecs[:1000])

t0 = time.time()
dists, indices = get_nearest_k(qvec, dbvecs)
t1 = time.time()
print(f"Searched {len(dbvecs)} vectors in {(t1-t0)*1000:.1f}ms")

print(
    np.vstack([np.array(sentences)[indices], dists])
)
"""
Encoded query string into vector in 12.8ms
Searched 1000000 vectors in 15.5ms
[['Math is hard' 'Math is hard' 'Math is hard' 'Math is hard' 'Math is hard']
 ['114.0' '114.0' '114.0' '114.0' '114.0']]
"""
	import functools
	import jax
	import numpy as np
	import jax.numpy as jnp
	from sentence_transformers import SentenceTransformer

	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	model = model.to("cpu")

	sentences = [
	"This is the first sentence.",
	"Here is another sentence.",
	"Cats are red",
	"Dogs are blue",
	"Venus is square",
	"Math is hard",
	"English is also hard",
	"Need another sentence",
	"Almost there.",
	"Tenth sentence is the last one",
	]

	dbvecs = model.encode(sentences, precision="ubinary")

	# pack uint8s into uint32 (4x reduction in vec size)
	# since the popcount operation is a 64bit operation, this gives a 4x speedup
	# we would want to pack it into uint64, but jax doesn't support it
	dbvecs = dbvecs.view("uint32")

	dbvecs = jnp.array(dbvecs)

	# simulate having 1M vectors to search
	dbvecs = jnp.vstack([dbvecs]*100_000)
	sentences = sentences*100_000

	@functools.partial(jax.jit, static_argnames=["k", "recall_target"])
	def get_nearest_k(qvec, dbvecs, k=5, recall_target=0.95):
	xor_result = jax.lax.bitwise_xor(qvec, dbvecs)

	# Compute the population count (number of 1 bits) and sum along the last axis
	dists = jax.lax.population_count(xor_result).sum(axis=-1)
	dists = dists.astype(jnp.float32)

	# min was slow for some reason, so using max with flipped distances
	dists, indices = jax.lax.approx_max_k(-dists, k=k, recall_target=recall_target)
	return -dists, indices

	t0 = time.time()
	qvec = model.encode(["Difficult school subject"], precision="ubinary")
	qvec = qvec.view("uint32")
	qvec = jnp.array(qvec)
	t1 = time.time()
	print(f"Encoded query string into vector in {(t1-t0)*1000:.1f}ms")

	# warmup
	_ = get_nearest_k(qvec, dbvecs[:1000])

	t0 = time.time()
	dists, indices = get_nearest_k(qvec, dbvecs)
	t1 = time.time()
	print(f"Searched {len(dbvecs)} vectors in {(t1-t0)*1000:.1f}ms")

	print(
	np.vstack([np.array(sentences)[indices], dists])
	)
	"""
	Encoded query string into vector in 12.8ms
	Searched 1000000 vectors in 15.5ms
	[['Math is hard' 'Math is hard' 'Math is hard' 'Math is hard' 'Math is hard']
	['114.0' '114.0' '114.0' '114.0' '114.0']]
	"""