This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pyarrow.parquet as pq | |
from sklearn.neighbors import NearestNeighbors | |
# load data/%d-en.parquet files into a single numpy metrix | |
# vector dimensions are 1024 | |
# load data | |
tbls = [] | |
for i in range(10): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow.parquet as pq | |
import numpy as np | |
DATA_SETS =[ | |
{"name": "wiki768", "files": [ | |
"train-00000-of-00004-1a1932c9ca1c7152.parquet", | |
"train-00001-of-00004-f4a4f5540ade14b4.parquet", | |
"train-00002-of-00004-ff770df3ab420d14.parquet", | |
"train-00003-of-00004-85b3dbbc960e92ec.parquet", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env/python | |
import os | |
import subprocess | |
import benchUtil | |
import constants | |
LUCENE_CHECKOUT = 'lucene_candidate' | |
# test parameters. This script will run KnnGraphTester on every combination of these parameters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.openjdk.jmh.annotations.Benchmark; | |
import org.openjdk.jmh.annotations.BenchmarkMode; | |
import org.openjdk.jmh.annotations.Fork; | |
import org.openjdk.jmh.annotations.Measurement; | |
import org.openjdk.jmh.annotations.Mode; | |
import org.openjdk.jmh.annotations.OutputTimeUnit; | |
import org.openjdk.jmh.annotations.Param; | |
import org.openjdk.jmh.annotations.Scope; | |
import org.openjdk.jmh.annotations.Setup; | |
import org.openjdk.jmh.annotations.State; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import loaders | |
from sentence_transformers import SentenceTransformer | |
# Load queries, qrels, etc. and create embeddings for the queries | |
queries = loaders.load_jsonl(jsonl_path=Path("./data/queries.jsonl")) | |
embedding_model = SentenceTransformer(model_id, device="mps") | |
query_embeddings = embedding_model.encode([d['text'] for d in queries]) | |
query_embeddings = query_embeddings.tolist() | |
query_and_embeddings = [dict(item, **{'embedding': embedding}) for (item, embedding) in zip(queries, query_embeddings)] | |
qrels = loaders.load_beir_qrels(qrels_file=Path("./data/qrels/test.tsv")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
------------------------------------------------------ | |
_______ __ _____ | |
/ ____(_)___ ____ _/ / / ___/_________ ________ | |
/ /_ / / __ \/ __ `/ / \__ \/ ___/ __ \/ ___/ _ \ | |
/ __/ / / / / / /_/ / / ___/ / /__/ /_/ / / / __/ | |
/_/ /_/_/ /_/\__,_/_/ /____/\___/\____/_/ \___/ | |
------------------------------------------------------ | |
| Metric | Task | Baseline | Contender | Diff | Unit | Diff % | | |
|--------------------------------------------------------------:|---------------------------------------------:|---------------:|---------------:|-------------:|-------:|---------:| |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.apache.pylucene.codecs; | |
import org.apache.lucene.codecs.lucene95.Lucene95Codec; | |
import org.apache.lucene.codecs.KnnVectorsFormat; | |
public class PyLucene95Codec extends Lucene95Codec { | |
private long pythonObject; | |
public void pythonExtension(long pythonObject){ | |
this.pythonObject = pythonObject; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.apache.pylucene.codecs; | |
import org.apache.lucene.codecs.lucene94.Lucene94Codec; | |
import org.apache.lucene.codecs.KnnVectorsFormat; | |
public class PyLucene94Codec extends Lucene94Codec { | |
private long pythonObject; | |
public void pythonExtension(long pythonObject){ | |
this.pythonObject = pythonObject; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
ann-benchmarks interface for Apache Lucene. | |
""" | |
import sklearn.preprocessing | |
import numpy as np | |
from struct import Struct | |
import lucene |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.openjdk.jmh.annotations.Benchmark; | |
import org.openjdk.jmh.annotations.BenchmarkMode; | |
import org.openjdk.jmh.annotations.Fork; | |
import org.openjdk.jmh.annotations.Level; | |
import org.openjdk.jmh.annotations.Measurement; | |
import org.openjdk.jmh.annotations.Mode; | |
import org.openjdk.jmh.annotations.OutputTimeUnit; | |
import org.openjdk.jmh.annotations.Param; | |
import org.openjdk.jmh.annotations.Scope; | |
import org.openjdk.jmh.annotations.Setup; |
NewerOlder