Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ All the database client supported
| pinecone | `pip install vectordb-bench[pinecone]` |
| weaviate | `pip install vectordb-bench[weaviate]` |
| elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
| pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
| pgvector, pgvectorscale, pgdiskann, alloydb, vectorchord | `pip install vectordb-bench[pgvector]` |
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
| redis | `pip install vectordb-bench[redis]` |
| memorydb | `pip install vectordb-bench[memorydb]` |
Expand Down Expand Up @@ -86,6 +86,7 @@ Options:
Commands:
pgvectorhnsw
pgvectorivfflat
vectorchordrq
test
weaviate
```
Expand Down Expand Up @@ -179,6 +180,34 @@ Options:
--help Show this message and exit.
```

### Run VectorChord (vchordrq) from command line

VectorChord is a PostgreSQL extension for scalable vector similarity search using IVF + RaBitQ indexing.
It is fully compatible with pgvector data types and provides faster queries and index builds.

```shell
vectordbbench vectorchordrq \
--user-name postgres --password '<password>' \
--host localhost --port 5432 --db-name vectordb \
--case-type Performance1536D50K \
--lists 1000 --probes 10 --epsilon 1.9 \
--spherical-centroids --build-threads 8 \
--max-parallel-workers 15
```

Key VectorChord-specific options:
| Option | Description |
|--------|-------------|
| `--lists` | Number of IVF lists for vchordrq index |
| `--probes` | Number of probes during search (default: 10) |
| `--epsilon` | Reranking precision factor, 0.0-4.0 (default: 1.9) |
| `--residual-quantization` | Enable residual quantization |
| `--spherical-centroids` | L2-normalize centroids (recommended for cosine/IP) |
| `--build-threads` | Number of threads for index building (1-255) |
| `--degree-of-parallelism` | Degree of parallelism for index build (1-256) |
| `--max-parallel-workers` | Sets max_parallel_workers & max_parallel_maintenance_workers |
| `--max-scan-tuples` | Max tuples to scan before stopping (-1 for unlimited) |

### Run awsopensearch from command line

```shell
Expand Down Expand Up @@ -756,7 +785,7 @@ Now we can only run one task at the same time.
### Code Structure
![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
### Client
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, CockroachDB, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, VectorChord, Redis, Chroma, CockroachDB, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
### Benchmark Cases
We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
#### Capacity Case
Expand Down
14 changes: 14 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class DB(Enum):
Zvec = "Zvec"
Endee = "Endee"
Lindorm = "Lindorm"
VectorChord = "VectorChord"
PolarDB = "PolarDB"

@property
Expand Down Expand Up @@ -247,6 +248,10 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915

return LindormVector

if self == DB.VectorChord:
from .vectorchord.vectorchord import VectorChord

return VectorChord
if self == DB.PolarDB:
from .polardb.polardb import PolarDB

Expand Down Expand Up @@ -441,6 +446,10 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915

return LindormConfig

if self == DB.VectorChord:
from .vectorchord.config import VectorChordConfig

return VectorChordConfig
if self == DB.PolarDB:
from .polardb.config import PolarDBConfig

Expand Down Expand Up @@ -617,6 +626,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915

return _lindorm_vector_case_config.get(index_type)

if self == DB.VectorChord:
from .vectorchord.config import _vectorchord_case_config

return _vectorchord_case_config.get(index_type)

# DB.Pinecone, DB.Redis
return EmptyDBCaseConfig

Expand Down
2 changes: 2 additions & 0 deletions vectordb_bench/backend/clients/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class IndexType(StrEnum):
GPU_IVF_PQ = "GPU_IVF_PQ"
GPU_CAGRA = "GPU_CAGRA"
SCANN = "scann"
VCHORDRQ = "vchordrq"
VCHORDG = "vchordg"
SCANN_MILVUS = "SCANN_MILVUS"
Hologres_HGraph = "HGraph"
Hologres_Graph = "Graph"
Expand Down
Empty file.
267 changes: 267 additions & 0 deletions vectordb_bench/backend/clients/vectorchord/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
import os
from typing import Annotated, Unpack

import click
from pydantic import SecretStr

from vectordb_bench.backend.clients import DB

from ....cli.cli import (
CommonTypedDict,
cli,
click_parameter_decorators_from_typed_dict,
run,
)


class VectorChordTypedDict(CommonTypedDict):
user_name: Annotated[
str,
click.option("--user-name", type=str, help="Db username", required=True),
]
password: Annotated[
str,
click.option(
"--password",
type=str,
help="Postgres database password",
default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
show_default="$POSTGRES_PASSWORD",
),
]

host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
port: Annotated[
int,
click.option(
"--port",
type=int,
help="Postgres database port",
default=5432,
show_default=True,
required=False,
),
]
db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
max_parallel_workers: Annotated[
int | None,
click.option(
"--max-parallel-workers",
type=int,
help="Sets the maximum number of parallel workers for index creation",
required=False,
),
]
quantization_type: Annotated[
str | None,
click.option(
"--quantization-type",
type=click.Choice(["vector", "halfvec", "rabitq8", "rabitq4"]),
help="Quantization type for vectors",
default="vector",
show_default=True,
),
]


class VectorChordRQTypedDict(VectorChordTypedDict):
lists: Annotated[
int | None,
click.option(
"--lists",
type=int,
help="Number of IVF lists for vchordrq index",
),
]
probes: Annotated[
int | None,
click.option(
"--probes",
type=int,
help="Number of probes during search",
default=10,
show_default=True,
),
]
epsilon: Annotated[
float | None,
click.option(
"--epsilon",
type=float,
help="Reranking precision factor (0.0-4.0, higher is more accurate but slower)",
default=1.9,
show_default=True,
),
]
residual_quantization: Annotated[
bool,
click.option(
"--residual-quantization/--no-residual-quantization",
type=bool,
help="Enable residual quantization for improved accuracy",
default=False,
show_default=True,
),
]
rerank_in_table: Annotated[
bool,
click.option(
"--rerank-in-table/--no-rerank-in-table",
type=bool,
help="Read vectors from table instead of storing in index (saves storage, degrades query performance)",
default=False,
show_default=True,
),
]
spherical_centroids: Annotated[
bool,
click.option(
"--spherical-centroids/--no-spherical-centroids",
type=bool,
help="L2-normalize centroids during K-means (recommended for cosine/IP)",
default=False,
show_default=True,
),
]
build_threads: Annotated[
int | None,
click.option(
"--build-threads",
type=int,
help="Number of threads for index building (range: 1-255)",
),
]
degree_of_parallelism: Annotated[
int | None,
click.option(
"--degree-of-parallelism",
type=int,
help="Degree of parallelism for index build (range: 1-256, default: 32)",
),
]
max_scan_tuples: Annotated[
int | None,
click.option(
"--max-scan-tuples",
type=int,
help="Max tuples to scan before stopping (-1 for unlimited)",
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(VectorChordRQTypedDict)
def VectorChordRQ(
**parameters: Unpack[VectorChordRQTypedDict],
):
from .config import VectorChordConfig, VectorChordRQConfig

run(
db=DB.VectorChord,
db_config=VectorChordConfig(
db_label=parameters["db_label"],
user_name=SecretStr(parameters["user_name"]),
password=SecretStr(parameters["password"]),
host=parameters["host"],
port=parameters["port"],
db_name=parameters["db_name"],
),
db_case_config=VectorChordRQConfig(
quantization_type=parameters["quantization_type"],
lists=parameters["lists"],
probes=parameters["probes"],
epsilon=parameters["epsilon"],
residual_quantization=parameters["residual_quantization"],
rerank_in_table=parameters["rerank_in_table"],
spherical_centroids=parameters["spherical_centroids"],
build_threads=parameters["build_threads"],
degree_of_parallelism=parameters["degree_of_parallelism"],
max_scan_tuples=parameters["max_scan_tuples"],
max_parallel_workers=parameters["max_parallel_workers"],
),
**parameters,
)


class VectorChordGraphTypedDict(VectorChordTypedDict):
m: Annotated[
int | None,
click.option(
"--m",
type=int,
help="Max neighbors per vertex (default: 32)",
),
]
ef_construction: Annotated[
int | None,
click.option(
"--ef-construction",
type=int,
help="Dynamic list size during insertion (default: 64)",
),
]
bits: Annotated[
int | None,
click.option(
"--bits",
type=int,
help="RaBitQ quantization ratio (1 or 2, default: 2)",
),
]
ef_search: Annotated[
int | None,
click.option(
"--ef-search",
type=int,
help="Dynamic list size for search (default: 64)",
default=64,
show_default=True,
),
]
beam_search: Annotated[
int | None,
click.option(
"--beam-search",
type=int,
help="Batch vertex access width during search (default: 1)",
),
]
max_scan_tuples: Annotated[
int | None,
click.option(
"--max-scan-tuples",
type=int,
help="Max tuples to scan before stopping (-1 for unlimited)",
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(VectorChordGraphTypedDict)
def VectorChordGraph(
**parameters: Unpack[VectorChordGraphTypedDict],
):
from .config import VectorChordConfig, VectorChordGraphConfig

run(
db=DB.VectorChord,
db_config=VectorChordConfig(
db_label=parameters["db_label"],
user_name=SecretStr(parameters["user_name"]),
password=SecretStr(parameters["password"]),
host=parameters["host"],
port=parameters["port"],
db_name=parameters["db_name"],
),
db_case_config=VectorChordGraphConfig(
quantization_type=parameters["quantization_type"],
m=parameters["m"],
ef_construction=parameters["ef_construction"],
bits=parameters["bits"],
ef_search=parameters["ef_search"],
beam_search=parameters["beam_search"],
max_parallel_workers=parameters["max_parallel_workers"],
max_scan_tuples=parameters["max_scan_tuples"],
),
**parameters,
)
Loading
Loading