Skip to content

Commit 31cb1c6

Browse files
committed
Adding minimal scaling tutorial Sample app
1 parent 2657359 commit 31cb1c6

4 files changed

Lines changed: 254 additions & 0 deletions

File tree

scaling-tutorial/.vespaignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# This file excludes unnecessary files from the application package. See
2+
# https://docs.vespa.ai/en/reference/vespaignore.html for more information.
3+
.DS_Store
4+
.gitignore
5+
README.md
6+
ext/
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import json
2+
3+
with (
4+
open("ext/corpus.jsonl", "r") as infile,
5+
open("ext/corpus_transformed_full.jsonl", "w") as outfile_full,
6+
open("ext/corpus_transformed_500000.jsonl", "w") as outfile_500000,
7+
open("ext/corpus_transformed_50000.jsonl", "w") as outfile_50000,
8+
open("ext/corpus_transformed_1000.jsonl", "w") as outfile_1000,
9+
):
10+
for line in infile:
11+
doc = json.loads(line)
12+
doc_id = doc["docid"]
13+
transformed = {
14+
"put": f"id:msmarco:passage::{doc_id}",
15+
"fields": {
16+
"text": doc["text"],
17+
"title": doc["title"],
18+
"id": doc_id,
19+
},
20+
}
21+
outfile_full.write(json.dumps(transformed) + "\n")
22+
outfile_500000.write(json.dumps(transformed) + "\n")
23+
outfile_50000.write(json.dumps(transformed) + "\n")
24+
outfile_1000.write(json.dumps(transformed) + "\n")
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
2+
3+
schema passage {
4+
5+
document passage {
6+
7+
field id type string {
8+
indexing: summary | attribute
9+
}
10+
field text type string {
11+
indexing: summary | index
12+
index: enable-bm25
13+
}
14+
}
15+
fieldset default {
16+
fields: text
17+
}
18+
19+
field text_token_ids type tensor<float>(d0[64]) {
20+
# hf tokenizer - token ids used by cross-encoder
21+
indexing: input text | embed tokenizer | attribute
22+
attribute: paged
23+
}
24+
25+
field e5_embedding type tensor<bfloat16>(x[384]) {
26+
# Using the e5 embedding model defined in services.xml
27+
indexing: input text | embed e5_embedding_model | attribute | index
28+
attribute {
29+
distance-metric: angular
30+
}
31+
index { # override default hnsw settings
32+
hnsw {
33+
max-links-per-node: 32
34+
neighbors-to-explore-at-insert: 400
35+
}
36+
}
37+
}
38+
39+
field colbert_embeddings type tensor<int8>(dt{}, x[16]) {
40+
# No index - used for ranking, not retrieval
41+
indexing: input text | embed colbert_embedding_model | attribute
42+
attribute: paged
43+
}
44+
45+
onnx-model ranker {
46+
file: models/model.onnx
47+
input input_ids: input_ids
48+
input attention_mask: attention_mask
49+
input token_type_ids: token_type_ids
50+
gpu-device: 0
51+
}
52+
53+
rank-profile bm25 {
54+
first-phase {
55+
expression: bm25(text)
56+
}
57+
}
58+
59+
rank-profile e5-similarity {
60+
inputs {
61+
query(q) tensor<float>(x[384])
62+
}
63+
first-phase {
64+
expression: closeness(field, e5_embedding)
65+
}
66+
}
67+
68+
rank-profile e5-colbert inherits e5-similarity {
69+
inputs {
70+
query(qt) tensor<float>(qt{},x[128])
71+
query(q) tensor<float>(x[384])
72+
}
73+
function cos_sim() {
74+
expression: cos(distance(field, e5_embedding))
75+
}
76+
function max_sim() {
77+
expression {
78+
sum(
79+
reduce(
80+
sum(
81+
query(qt) * unpack_bits(attribute(colbert_embeddings)), x
82+
),
83+
max, dt
84+
),
85+
qt
86+
)
87+
}
88+
}
89+
90+
second-phase {
91+
rerank-count: 100
92+
expression: max_sim()
93+
}
94+
match-features: max_sim() cos_sim()
95+
}
96+
97+
rank-profile bm25-colbert inherits e5-colbert {
98+
# Overrides the first-phase expression fo e5-colbert rank-profile
99+
first-phase {
100+
expression: bm25(text)
101+
}
102+
}
103+
104+
rank-profile e5-colbert-rrf inherits e5-colbert {
105+
global-phase {
106+
rerank-count: 200
107+
expression: reciprocal_rank(cos_sim) + reciprocal_rank(max_sim)
108+
}
109+
match-features: max_sim() cos_sim()
110+
}
111+
112+
rank-profile e5-colbert-cross-encoder-rrf {
113+
inputs {
114+
query(q) tensor<float>(x[384])
115+
query(qt) tensor<float>(qt{},x[128])
116+
query(query_token_ids) tensor<float>(d0[32])
117+
}
118+
function input_ids() {
119+
expression: tokenInputIds(96, query(query_token_ids), attribute(text_token_ids))
120+
}
121+
function token_type_ids() {
122+
expression: tokenTypeIds(96, query(query_token_ids), attribute(text_token_ids))
123+
}
124+
function attention_mask() {
125+
expression: tokenAttentionMask(96, query(query_token_ids), attribute(text_token_ids))
126+
}
127+
function colbert_max_sim() {
128+
expression {
129+
sum(
130+
reduce(
131+
sum(
132+
query(qt) * unpack_bits(attribute(colbert_embeddings)), x
133+
),
134+
max, dt
135+
),
136+
qt
137+
)
138+
}
139+
}
140+
function e5_cos_sim() {
141+
expression: cos(distance(field, e5_embedding))
142+
}
143+
function cross_encoder() {
144+
expression: onnx(ranker){d0:0,d1:0}
145+
}
146+
first-phase {
147+
expression: e5_cos_sim
148+
}
149+
second-phase {
150+
rerank-count: 1000
151+
expression: colbert_max_sim()
152+
}
153+
global-phase {
154+
rerank-count: 12
155+
expression {
156+
reciprocal_rank(e5_cos_sim) +
157+
reciprocal_rank(colbert_max_sim) +
158+
reciprocal_rank(cross_encoder)
159+
}
160+
}
161+
match-features: colbert_max_sim e5_cos_sim
162+
}
163+
}

scaling-tutorial/services.xml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
3+
<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties" minimum-required-vespa-version="8.311.28">
4+
5+
<container id="default" version="1.0">
6+
7+
<nodes deploy:environment="dev" count="1">
8+
<resources vcpu="1.0" memory="8Gb" architecture="arm64" storage-type="local" disk="59Gb"/>
9+
</nodes>
10+
11+
<search/>
12+
<document-api/>
13+
14+
<!-- See https://docs.vespa.ai/en/embedding.html#huggingface-embedder -->
15+
<component id="e5_embedding_model" type="hugging-face-embedder">
16+
<transformer-model url="https://huggingface.co/intfloat/e5-small-v2/resolve/main/model.onnx"/>
17+
<tokenizer-model url="https://huggingface.co/intfloat/e5-small-v2/raw/main/tokenizer.json"/>
18+
<prepend>
19+
<query>query:</query>
20+
<document>passage:</document>
21+
</prepend>
22+
</component>
23+
24+
<!-- See https://docs.vespa.ai/en/embedding.html#colbert-embedder -->
25+
<component id="colbert_embedding_model" type="colbert-embedder">
26+
<transformer-model url="https://huggingface.co/colbert-ir/colbertv2.0/resolve/main/model.onnx"/>
27+
<tokenizer-model url="https://huggingface.co/colbert-ir/colbertv2.0/raw/main/tokenizer.json"/>
28+
</component>
29+
30+
<!-- See https://docs.vespa.ai/en/reference/embedding-reference.html#huggingface-tokenizer-embedder-->
31+
<component id="tokenizer" type="hugging-face-tokenizer">
32+
<model path="models/tokenizer.json"/>
33+
</component>
34+
35+
</container>
36+
37+
<content id="msmarco" version="1.0">
38+
<min-redundancy>1</min-redundancy>
39+
<documents>
40+
<document mode="index" type="passage"/>
41+
</documents>
42+
<nodes count="1">
43+
<resources vcpu="1.0" memory="8Gb" architecture="arm64" storage-type="local" disk="59Gb"/>
44+
</nodes>
45+
<engine>
46+
<proton>
47+
<tuning>
48+
<searchnode>
49+
<requestthreads>
50+
<persearch>4</persearch>
51+
</requestthreads>
52+
<feeding>
53+
<concurrency>1.0</concurrency>
54+
</feeding>
55+
</searchnode>
56+
</tuning>
57+
</proton>
58+
</engine>
59+
</content>
60+
61+
</services>

0 commit comments

Comments
 (0)