Adding minimal scaling tutorial Sample app

kaibor737 · kaibor737 · commit 31cb1c6f9152 · 2026-04-20T10:56:47.000+02:00
diff --git a/scaling-tutorial/.vespaignore b/scaling-tutorial/.vespaignore
@@ -0,0 +1,6 @@
+# This file excludes unnecessary files from the application package. See
+# https://docs.vespa.ai/en/reference/vespaignore.html for more information.
+.DS_Store
+.gitignore
+README.md
+ext/
diff --git a/scaling-tutorial/ext/transform_ms_marco.py b/scaling-tutorial/ext/transform_ms_marco.py
@@ -0,0 +1,24 @@
+import json
+
+with (
+    open("ext/corpus.jsonl", "r") as infile, 
+    open("ext/corpus_transformed_full.jsonl", "w") as outfile_full,
+    open("ext/corpus_transformed_500000.jsonl", "w") as outfile_500000,
+    open("ext/corpus_transformed_50000.jsonl", "w") as outfile_50000,
+    open("ext/corpus_transformed_1000.jsonl", "w") as outfile_1000,
+    ):
+    for line in infile:
+        doc = json.loads(line)
+        doc_id = doc["docid"]
+        transformed = {
+            "put": f"id:msmarco:passage::{doc_id}",
+            "fields": {
+                "text": doc["text"],
+                "title": doc["title"],
+                "id": doc_id,
+            },
+        }
+        outfile_full.write(json.dumps(transformed) + "\n")
+        outfile_500000.write(json.dumps(transformed) + "\n")
+        outfile_50000.write(json.dumps(transformed) + "\n")
+        outfile_1000.write(json.dumps(transformed) + "\n")
diff --git a/scaling-tutorial/schemas/passage.sd b/scaling-tutorial/schemas/passage.sd
@@ -0,0 +1,163 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+schema passage {
+
+  document passage {
+
+    field id type string {
+      indexing: summary | attribute
+    }
+    field text type string {
+      indexing: summary | index
+      index: enable-bm25
+    }
+  }
+  fieldset default {
+    fields: text
+  }
+
+  field text_token_ids type tensor<float>(d0[64])  {
+    # hf tokenizer - token ids used by cross-encoder 
+    indexing: input text | embed tokenizer | attribute 
+    attribute: paged
+  }
+
+  field e5_embedding type tensor<bfloat16>(x[384]) {
+    # Using the e5 embedding model defined in services.xml
+    indexing: input text | embed e5_embedding_model | attribute | index
+    attribute {
+      distance-metric: angular
+    }
+    index { # override default hnsw settings 
+      hnsw {
+        max-links-per-node: 32
+        neighbors-to-explore-at-insert: 400
+      } 
+    }
+  }
+
+  field colbert_embeddings type tensor<int8>(dt{}, x[16]) {
+    # No index - used for ranking, not retrieval 
+    indexing: input text | embed colbert_embedding_model | attribute
+    attribute: paged
+  }
+
+  onnx-model ranker {
+    file: models/model.onnx
+    input input_ids: input_ids
+    input attention_mask: attention_mask
+    input token_type_ids: token_type_ids
+    gpu-device: 0
+  }
+
+  rank-profile bm25 {
+    first-phase {
+      expression: bm25(text)
+    }
+  }
+
+  rank-profile e5-similarity {
+    inputs {
+      query(q) tensor<float>(x[384])
+    }
+    first-phase {
+      expression: closeness(field, e5_embedding)
+    } 
+  }
+
+  rank-profile e5-colbert inherits e5-similarity {
+    inputs {
+      query(qt) tensor<float>(qt{},x[128])
+      query(q) tensor<float>(x[384])
+    }
+    function cos_sim() {
+      expression: cos(distance(field, e5_embedding))
+    }
+    function max_sim() {
+      expression {
+        sum(
+          reduce(
+            sum(
+              query(qt) * unpack_bits(attribute(colbert_embeddings)), x
+            ),
+            max, dt
+          ),
+          qt
+        )
+       }
+    }
+    
+    second-phase {
+      rerank-count: 100
+      expression: max_sim()
+    }
+    match-features: max_sim() cos_sim()
+  }
+
+  rank-profile bm25-colbert inherits e5-colbert {
+    # Overrides the first-phase expression fo e5-colbert rank-profile 
+    first-phase {
+      expression: bm25(text)
+    }
+  }
+
+  rank-profile e5-colbert-rrf inherits e5-colbert {
+    global-phase {
+      rerank-count: 200
+      expression: reciprocal_rank(cos_sim) + reciprocal_rank(max_sim)
+    }
+    match-features: max_sim() cos_sim()
+  }
+
+  rank-profile e5-colbert-cross-encoder-rrf {
+    inputs {
+      query(q) tensor<float>(x[384])
+      query(qt) tensor<float>(qt{},x[128])
+      query(query_token_ids) tensor<float>(d0[32])
+    }
+    function input_ids() {
+        expression: tokenInputIds(96, query(query_token_ids), attribute(text_token_ids))
+    }
+    function token_type_ids() {
+      expression: tokenTypeIds(96, query(query_token_ids), attribute(text_token_ids))
+    }
+    function attention_mask() {
+      expression: tokenAttentionMask(96, query(query_token_ids), attribute(text_token_ids))
+    }
+    function colbert_max_sim() {
+      expression {
+        sum(
+          reduce(
+            sum(
+              query(qt) * unpack_bits(attribute(colbert_embeddings)), x
+            ),
+            max, dt
+          ),
+          qt
+        )
+       }
+    }
+    function e5_cos_sim() {
+      expression: cos(distance(field, e5_embedding))
+    }
+    function cross_encoder() {
+      expression: onnx(ranker){d0:0,d1:0}
+    }
+    first-phase {
+        expression: e5_cos_sim
+    }
+    second-phase {
+      rerank-count: 1000
+      expression: colbert_max_sim()
+    }
+    global-phase {
+      rerank-count: 12
+      expression {
+        reciprocal_rank(e5_cos_sim) + 
+        reciprocal_rank(colbert_max_sim) + 
+        reciprocal_rank(cross_encoder)
+      }
+    }
+    match-features: colbert_max_sim e5_cos_sim
+  }
+}
diff --git a/scaling-tutorial/services.xml b/scaling-tutorial/services.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties" minimum-required-vespa-version="8.311.28">
+
+  <container id="default" version="1.0">
+
+    <nodes deploy:environment="dev" count="1">
+      <resources vcpu="1.0" memory="8Gb" architecture="arm64" storage-type="local" disk="59Gb"/>
+    </nodes>
+   
+    <search/>
+    <document-api/>
+
+     <!-- See https://docs.vespa.ai/en/embedding.html#huggingface-embedder -->
+    <component id="e5_embedding_model" type="hugging-face-embedder">
+            <transformer-model url="https://huggingface.co/intfloat/e5-small-v2/resolve/main/model.onnx"/>
+            <tokenizer-model url="https://huggingface.co/intfloat/e5-small-v2/raw/main/tokenizer.json"/>
+            <prepend>
+                <query>query:</query>
+                <document>passage:</document>
+            </prepend>
+    </component>
+
+    <!-- See https://docs.vespa.ai/en/embedding.html#colbert-embedder -->
+    <component id="colbert_embedding_model" type="colbert-embedder">
+      <transformer-model url="https://huggingface.co/colbert-ir/colbertv2.0/resolve/main/model.onnx"/>
+      <tokenizer-model url="https://huggingface.co/colbert-ir/colbertv2.0/raw/main/tokenizer.json"/>
+    </component>
+
+     <!-- See https://docs.vespa.ai/en/reference/embedding-reference.html#huggingface-tokenizer-embedder-->
+    <component id="tokenizer" type="hugging-face-tokenizer">
+      <model path="models/tokenizer.json"/>
+    </component>
+
+  </container>
+
+  <content id="msmarco" version="1.0">
+    <min-redundancy>1</min-redundancy>
+    <documents>
+      <document mode="index" type="passage"/>
+    </documents>
+    <nodes count="1">
+      <resources vcpu="1.0" memory="8Gb" architecture="arm64" storage-type="local" disk="59Gb"/>
+    </nodes> 
+    <engine>
+      <proton>
+        <tuning>
+          <searchnode>
+            <requestthreads>
+              <persearch>4</persearch>
+            </requestthreads>
+            <feeding>
+              <concurrency>1.0</concurrency>
+            </feeding>
+          </searchnode>
+        </tuning>
+      </proton>
+    </engine>
+  </content>
+
+</services>