suitenumerique · StephanMeijer · May 5, 2026 · May 5, 2026
diff --git a/docs/env.md b/docs/env.md
@@ -13,8 +13,6 @@ These are the environment variables you can set for the `find-backend` container
 | API_USERS_LIST_THROTTLE_RATE_SUSTAINED          | Throttle rate for api                                                                                                       | 180/hour                                                                |
 | CACHES_DEFAULT_TIMEOUT                          | Cache default timeout                                                                                                       | 30                                                                      |
 | CACHES_KEY_PREFIX                               | The prefix used to every cache keys.                                                                                        | docs                                                                    |
-| CHUNK_SIZE                                      | approximate number of characters of document content chunks                                                                 | 512                                                                     |
-| CHUNK_OVERLAP                                   | approximate number of characters of document content overlapping                                                            | 50                                                                      |
 | DB_ENGINE                                       | Engine to use for database connections                                                                                      | django.db.backends.postgresql_psycopg2                                  |
 | DB_HOST                                         | Host of the database                                                                                                        | localhost                                                               |
 | DB_NAME                                         | Name of the database                                                                                                        | impress                                                                 |
@@ -41,16 +39,9 @@ These are the environment variables you can set for the `find-backend` container
 | DJANGO_SECRET_KEY                               | Secret key                                                                                                                  |                                                                         |
 | DJANGO_SERVER_TO_SERVER_API_TOKENS              |                                                                                                                             | []                                                                      |
 | DOCUMENT_IMAGE_MAX_SIZE                         | Maximum size of document in bytes                                                                                           | 10485760                                                                |
-| EMBEDDING_API_KEY                               | API key of the embedding api                                                                                                |                                                                         |
-| EMBEDDING_API_MODEL_NAME                        | Name of the embedding model used on the api                                                                                 | embeddings-small                                                        |
-| EMBEDDING_API_PATH                              | URL of the embedding api                                                                                                    |                                                                         |
-| EMBEDDING_DIMENSION                             | Size of the embedding vector                                                                                                | 1024                                                                    |
-| EMBEDDING_REQUEST_TIMEOUT                       | time out in seconds of the embedding requests                                                                               | 10                                                                      |
 | FRONTEND_CSS_URL                                | To add a external css file to the app                                                                                       |                                                                         |
 | FRONTEND_HOMEPAGE_FEATURE_ENABLED               | Frontend feature flag to display the homepage                                                                               | false                                                                   |
 | FRONTEND_THEME                                  | Frontend theme to use                                                                                                       |                                                                         |
-| HYBRID_SEARCH_ENABLED                           | Flag to enable hybrid (an then semantic) search                                                                             | True                                                                    |
-| HYBRID_SEARCH_WEIGHTS                           | Weights used in the weighted sum of the hybrid search score                                                                 | [0.3, 0.7]                                                              |
 | LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD         | Language detection confidence threshold                                                                                     | 0.75                                                                    |
 | LOGGING_LEVEL_LOGGERS_APP                       | Application logging level. options are "DEBUG", "INFO", "WARN", "ERROR", "CRITICAL"                                         | INFO                                                                    |
 | LOGGING_LEVEL_LOGGERS_ROOT                      | Default logging level. options are "DEBUG", "INFO", "WARN", "ERROR", "CRITICAL"                                             | INFO                                                                    |

diff --git a/docs/setup-indexer.md b/docs/setup-indexer.md
@@ -43,39 +43,6 @@ This threshold can be controlled with LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD en
 LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD=0.75
 ```
 
-### Semantic search
-
-Find offers a semantic search feature. You can either use pure full-text search or a hybrid full-text + semantic search. To enable the hybrid search, add the following settings. 
-
-```python
-# Enable flag
-HYBRID_SEARCH_ENABLED = True
-
-# weighted sum: full_text_weight, semantic_search_weight
-HYBRID_SEARCH_WEIGHTS = 0.7,0.3
-
-# Embedding
-CHUNK_SIZE=512
-CHUNK_OVERLAP=50
-EMBEDDING_API_PATH = https://embedding.api.example.com/full/path/
-EMBEDDING_API_KEY = your-embedding-api-key
-EMBEDDING_REQUEST_TIMEOUT = 10
-EMBEDDING_API_MODEL_NAME = embedding-api-model-name
-EMBEDDING_DIMENSION = 1024
-```
-
-The hybrid search computes a score for full-text and semantic search and combines them through a weighted sum. HYBRID_SEARCH_WEIGHTS contains the weights of full-text and semantic respectively. 
-
-You need to use an embedding api similar to https://albert.api.etalab.gouv.fr/documentation#tag/Embeddings/operation/embeddings_v1_embeddings_post. 
-
-### document chunking
-
-The indexing process embeds documents by converting their content into vector representations (embeddings). When a document exceeds the character dimension defined by CHUNK_SIZE, it's divided into smaller segments (chunks), with each chunk embedded independently. Each chunk must be smaller than the embedding model's context window . 
-
-The chunking algorithm works recursively. It attempts to create the largest possible segments first, then subdivides them further if they still exceed the size limit defined by CHUNK_SIZE. Segments can share overlapping content between them (set CHUNK_OVERLAP=0 to disable overlapping). 
-
-During the search, the matching of a document is the matching of its best chunk.
-
 ## trigrams
 
 Find uses trigrams to improve the robustness of the full text search engine to spelling variations and errors. It can be configured by two environment variables. 

diff --git a/env.d/development/common.dist b/env.d/development/common.dist
@@ -50,12 +50,3 @@ OIDC_RS_SIGN_ALGO=RS256
 
 OIDC_RS_BACKEND_CLASS="core.authentication.FinderResourceServerBackend"
 OIDC_RS_ENCRYPTION_KEY_TYPE="RSA"
-
-# Hybrid Search settings
-HYBRID_SEARCH_ENABLED=True
-EMBEDDING_API_KEY=ThisIsAnExampleKeyForDevPurposeOnly
-EMBEDDING_API_PATH=https://albert.api.etalab.gouv.fr/v1/embeddings
-
-## Multi-embedding: chunk documents and embed each chunk
-CHUNK_SIZE=512
-CHUNK_OVERLAP=50
diff --git a/src/backend/core/admin.py b/src/backend/core/admin.py
@@ -1,12 +1,8 @@
 """Admin config for find's core app"""
 
-from django.contrib import admin, messages
-from django.shortcuts import redirect, render
-from django.urls import path, reverse
-
-from core.management.commands.create_search_pipeline import (
-    ensure_search_pipeline_exists,
-)
+from django.contrib import admin
+from django.shortcuts import render
+from django.urls import path
 
 from . import selftests_builtin  # pylint: disable=unused-import
 from .models import Service
@@ -22,35 +18,6 @@ class ServiceAdmin(admin.ModelAdmin):
     list_filter = ("is_active", "created_at")
     ordering = ("-created_at",)
     readonly_fields = ("created_at", "token")
-    change_list_template = "admin/core/services/change_list.html"
-
-    def get_urls(self):
-        urls = super().get_urls()
-        custom_urls = [
-            path(
-                "ensure-search-pipeline/",
-                self.admin_site.admin_view(self.ensure_search_pipeline_view),
-                name="core_service_ensure_search_pipeline",
-            ),
-        ]
-        return custom_urls + urls
-
-    def ensure_search_pipeline_view(self, request):
-        """Run the management command function to assert the pipeline exists."""
-        changelist_url = reverse("admin:core_service_changelist")
-
-        try:
-            ensure_search_pipeline_exists()
-        except Exception as exc:  # noqa: BLE001# pylint: disable=broad-exception-caught
-            self.message_user(
-                request, f"Failed to ensure search pipeline: {exc}", messages.ERROR
-            )
-        else:
-            self.message_user(
-                request, "Search pipeline presence insured", messages.SUCCESS
-            )
-
-        return redirect(changelist_url)
 
 
 def selftest_view(request):

diff --git a/src/backend/core/apps.py b/src/backend/core/apps.py
@@ -3,13 +3,6 @@
 from django.apps import AppConfig
 from django.utils.translation import gettext_lazy as _
 
-from core.management.commands.create_search_pipeline import (
-    ensure_search_pipeline_exists,
-)
-from core.services.opensearch import (
-    check_hybrid_search_enabled,
-)
-
 
 class CoreConfig(AppConfig):
     """Configuration class for the Find core app."""
@@ -19,8 +12,4 @@ class CoreConfig(AppConfig):
     verbose_name = _("Find core application")
 
     def ready(self):
-        """
-        Ensure search pipeline exists if hybrid search is enabled.
-        """
-        if check_hybrid_search_enabled():
-            ensure_search_pipeline_exists()
+        pass
diff --git a/src/backend/core/enums.py b/src/backend/core/enums.py
@@ -13,16 +13,6 @@ class ReachEnum(str, Enum):
     RESTRICTED = "restricted"
 
 
-# Search type
-
-
-class SearchTypeEnum(str, Enum):
-    """Search type options"""
-
-    HYBRID = "hybrid"
-    FULL_TEXT = "full-text"
-
-
 # Fields
 
 CREATED_AT = "created_at"

diff --git a/src/backend/core/management/commands/create_search_pipeline.py b/src/backend/core/management/commands/create_search_pipeline.py
diff --git a/src/backend/core/management/commands/reindex_with_embedding.py b/src/backend/core/management/commands/reindex_with_embedding.py
diff --git a/src/backend/core/schemas.py b/src/backend/core/schemas.py
@@ -18,7 +18,6 @@
 )
 
 from . import enums
-from .services.opensearch import check_hybrid_search_enabled
 
 
 class DocumentSchema(BaseModel):
@@ -119,22 +118,6 @@ class SearchQueryParametersSchema(BaseModel):
     order_by: Optional[Literal[enums.ORDER_BY_OPTIONS]] = Field(default=enums.RELEVANCE)
     order_direction: Optional[Literal["asc", "desc"]] = Field(default="desc")
     nb_results: Optional[conint(ge=1, le=300)] = Field(default=50)
-    search_type: Optional[enums.SearchTypeEnum] = None
-
-    @model_validator(mode="after")
-    def set_default_search_type(self):
-        """
-        Set default search_type dynamically.
-        If search_type is not provided, it will be set to hybrid if it is configured
-        and fall back on full text otherwise.
-        """
-        if self.search_type is None:
-            self.search_type = (
-                enums.SearchTypeEnum.HYBRID
-                if check_hybrid_search_enabled()
-                else enums.SearchTypeEnum.FULL_TEXT
-            )
-        return self
 
 
 class DeleteDocumentsSchema(BaseModel):