Skip to content

Instantly share code, notes, and snippets.

View jobergum's full-sized avatar
🎯
Focusing

Jo Kristian Bergum jobergum

🎯
Focusing
View GitHub Profile
vespa-fbench -n 1 -c 0 -q /tmp/query.txt -p 5 -s 120 localhost 8080
*** HTTP keep-alive statistics ***
connection reuse count -- 7297
***************** Benchmark Summary *****************
clients: 1
ran for: 24 seconds
cycle time: 0 ms
lower response limit: 0 bytes
skipped requests: 0
<engine>
<proton>
<tuning>
<searchnode>
<summary>
<io><read>directio</read></io>
<store>
<cache>
<maxsize-percent>5</maxsize-percent>
<compression><type>none</type></compression>
schema passage {
document passage {
field doc_id type long {} #duplicated for every passage extracted from doc
field title type string {} #duplicated for every passage from doc
field passage type string {}
field embedding type tensor<float>(x[768]) {}
}
rank-profile hybrid {
inputs {
query(q): tensor<float>(x[768])
@jobergum
jobergum / create_feed.py
Last active January 19, 2022 21:39
Vespa grouping performance question
import json
import numpy as np
for i in range(0,10000000):
doc = {
"put": "id:foo:grouping::%i" % i,
"fields": {
"a1": np.random.randint(0,10),
"a2": np.random.randint(0,10),
"a3": np.random.randint(0,10),
{
trace: {
children: [
{
message: "Using query profile 'default' of type 'root'"
},
{
children: [
{
timestamp: 0,
<?xml version="1.0" encoding="utf-8" ?>
<services version="1.0">
<admin version="2.0">
<!-- Logserver where logs are forwarded from all nodes -->
<logserver hostalias="admin0" />
<!-- Configserver stores state in zookeeper a-->
<configservers>
<configserver hostalias="admin1" />
<configserver hostalias="admin2" />
<configserver hostalias="admin3" />
{
"yql": "select id from vector where ([{\"targetHits\": 100, \"approximate\":true}]nearestNeighbor(doc_embedding,q1_embedding);",
"hits": 100
"ranking.features.query(q1_embedding): [0.21,0.12,....],
"ranking.features.query(q2_embedding): [0.21,0.12,....],
"ranking.features.query(threshold): 0.01,
"ranking.profile": "threshold-something-else"
}
<container id='feed' version='1.0'>
<document-api/>
<nodes>
<node hostalias="feednode1"/>
<node hostalias="feednode2"/>
<node hostalias="feednode3"/>
</nodes>
</container>
<container id='default' version='1.0'>
import torch
from transformers import BertPreTrainedModel
from transformers import BertModel
class SentenceEncoder(BertPreTrainedModel):
def __init__(self,config):
super().__init__(config)
self.bert = BertModel(config)
self.init_weights()
[
{
"question": "who ran the fastest 40 yard dash in the nfl",
"answers": [
"Jakeem Grant",
"John Ross"
],
"ctxs": [
{
"id": 6036353,