Skip to content

Commit d340d06

Browse files
authored
Merge pull request #36598 from vespa-engine/toregge/add-document-id-provider-interface-used-by-document-id-dfw
Add IDocumentIdProvider, used by DocumentIdDFW to get document id string
2 parents e55cae6 + c245b61 commit d340d06

19 files changed

Lines changed: 158 additions & 30 deletions

File tree

searchcore/src/tests/proton/documentdb/configurer_test.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ Fixture::initViewSet(ViewSet &views)
223223
views._service.write().field_writer(), views._service.write().shared(), views._hwInfo);
224224
auto summaryMgr = make_shared<SummaryManager>
225225
(_summaryExecutor, search::LogDocumentStore::Config(), search::GrowStrategy(), BASE_DIR,
226-
TuneFileSummary(), views._fileHeaderContext,views._noTlSyncer, search::IBucketizer::SP());
226+
TuneFileSummary(), views._fileHeaderContext,views._noTlSyncer, search::IBucketizer::SP(),
227+
std::shared_ptr<const search::IDocumentIdProvider>());
227228
auto metaStore = make_shared<DocumentMetaStoreContext>(make_shared<bucketdb::BucketDBOwner>());
228229
auto indexWriter = std::make_shared<IndexWriter>(indexMgr);
229230
auto attrWriter = std::make_shared<AttributeWriter>(attrMgr);

searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ SummaryManager::SummarySetup::
9191
SummarySetup(const std::string & baseDir, const SummaryConfig & summaryCfg,
9292
const JuniperrcConfig & juniperCfg,
9393
search::IAttributeManager::SP attributeMgr, search::IDocumentStore::SP docStore,
94+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider,
9495
std::shared_ptr<const DocumentTypeRepo> repo,
9596
const search::index::Schema& schema)
9697
: _docsumWriter(),
@@ -99,6 +100,7 @@ SummarySetup(const std::string & baseDir, const SummaryConfig & summaryCfg,
99100
_juniperConfig(),
100101
_attributeMgr(std::move(attributeMgr)),
101102
_docStore(std::move(docStore)),
103+
_document_id_provider(std::move(document_id_provider)),
102104
_repo(std::move(repo))
103105
{
104106
_juniperConfig = std::make_unique<juniper::Juniper>(&_juniperProps, _wordFolder.get());
@@ -126,6 +128,9 @@ SummaryManager::SummarySetup::createDocsumStore()
126128
return std::make_unique<DocumentStoreAdapter>(*_docStore, *_repo);
127129
}
128130

131+
std::shared_ptr<const search::IDocumentIdProvider> SummaryManager::SummarySetup::get_document_id_provider() const noexcept {
132+
return _document_id_provider;
133+
}
129134

130135
ISummaryManager::ISummarySetup::SP
131136
SummaryManager::createSummarySetup(const SummaryConfig & summaryCfg,
@@ -134,16 +139,18 @@ SummaryManager::createSummarySetup(const SummaryConfig & summaryCfg,
134139
const search::index::Schema& schema)
135140
{
136141
return std::make_shared<SummarySetup>(_baseDir, summaryCfg,
137-
juniperCfg, attributeMgr, _docStore, repo, schema);
142+
juniperCfg, attributeMgr, _docStore, _document_id_provider, repo, schema);
138143
}
139144

140145
SummaryManager::SummaryManager(vespalib::Executor &shared_executor, const LogDocumentStore::Config & storeConfig,
141146
const search::GrowStrategy & growStrategy, const std::string &baseDir,
142147
const TuneFileSummary &tuneFileSummary,
143148
const FileHeaderContext &fileHeaderContext, search::transactionlog::SyncProxy &tlSyncer,
144-
search::IBucketizer::SP bucketizer)
149+
search::IBucketizer::SP bucketizer,
150+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider)
145151
: _baseDir(baseDir),
146-
_docStore()
152+
_docStore(),
153+
_document_id_provider(std::move(document_id_provider))
147154
{
148155
_docStore = std::make_shared<LogDocumentStore>(shared_executor, baseDir, storeConfig, growStrategy, tuneFileSummary,
149156
fileHeaderContext, tlSyncer, std::move(bucketizer));

searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ class SummaryManager : public ISummaryManager
2828
std::unique_ptr<juniper::Juniper> _juniperConfig;
2929
search::IAttributeManager::SP _attributeMgr;
3030
search::IDocumentStore::SP _docStore;
31+
std::shared_ptr<const search::IDocumentIdProvider> _document_id_provider;
3132
const std::shared_ptr<const document::DocumentTypeRepo> _repo;
3233
public:
3334
SummarySetup(const std::string & baseDir,
3435
const SummaryConfig & summaryCfg,
3536
const JuniperrcConfig & juniperCfg,
3637
search::IAttributeManager::SP attributeMgr,
3738
search::IDocumentStore::SP docStore,
39+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider,
3840
std::shared_ptr<const document::DocumentTypeRepo> repo,
3941
const search::index::Schema& schema);
4042

@@ -45,11 +47,13 @@ class SummaryManager : public ISummaryManager
4547

4648
const search::IAttributeManager * getAttributeManager() const override { return _attributeMgr.get(); }
4749
const juniper::Juniper * getJuniper() const override { return _juniperConfig.get(); }
50+
[[nodiscard]] std::shared_ptr<const search::IDocumentIdProvider> get_document_id_provider() const noexcept override;
4851
};
4952

5053
private:
5154
std::string _baseDir;
52-
std::shared_ptr<search::IDocumentStore> _docStore;
55+
std::shared_ptr<search::IDocumentStore> _docStore;
56+
std::shared_ptr<const search::IDocumentIdProvider> _document_id_provider;
5357

5458
public:
5559
using SP = std::shared_ptr<SummaryManager>;
@@ -60,7 +64,8 @@ class SummaryManager : public ISummaryManager
6064
const search::TuneFileSummary &tuneFileSummary,
6165
const search::common::FileHeaderContext &fileHeaderContext,
6266
search::transactionlog::SyncProxy &tlSyncer,
63-
std::shared_ptr<search::IBucketizer> bucketizer);
67+
std::shared_ptr<search::IBucketizer> bucketizer,
68+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider);
6469
~SummaryManager() override;
6570

6671
void putDocument(uint64_t syncToken, search::DocumentIdT lid, const document::Document & doc);

searchcore/src/vespa/searchcore/proton/docsummary/summarymanagerinitializer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ SummaryManagerInitializer(const search::GrowStrategy &grow,
1717
const search::common::FileHeaderContext &fileHeaderContext,
1818
search::transactionlog::SyncProxy &tlSyncer,
1919
IBucketizerSP bucketizer,
20+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider,
2021
std::shared_ptr<SummaryManager::SP> result)
2122
: proton::initializer::InitializerTask(),
2223
_grow(grow),
@@ -28,6 +29,7 @@ SummaryManagerInitializer(const search::GrowStrategy &grow,
2829
_fileHeaderContext(fileHeaderContext),
2930
_tlSyncer(tlSyncer),
3031
_bucketizer(std::move(bucketizer)),
32+
_document_id_provider(std::move(document_id_provider)),
3133
_result(std::move(result))
3234
{ }
3335

@@ -42,7 +44,7 @@ SummaryManagerInitializer::run()
4244
MemoryUsageLogger::log("start load document store", _subDbName);
4345
*_result = std::make_shared<SummaryManager>
4446
(_shared_executor, _storeCfg, _grow, _baseDir,
45-
_tuneFile, _fileHeaderContext, _tlSyncer, _bucketizer);
47+
_tuneFile, _fileHeaderContext, _tlSyncer, _bucketizer, _document_id_provider);
4648
EventLogger::loadDocumentStoreComplete(_subDbName, timer.elapsed());
4749
MemoryUsageLogger::log("finish load document store", _subDbName);
4850
}

searchcore/src/vespa/searchcore/proton/docsummary/summarymanagerinitializer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class SummaryManagerInitializer : public initializer::InitializerTask
2424
const search::common::FileHeaderContext &_fileHeaderContext;
2525
search::transactionlog::SyncProxy &_tlSyncer;
2626
const IBucketizerSP _bucketizer;
27+
std::shared_ptr<const search::IDocumentIdProvider> _document_id_provider;
2728
std::shared_ptr<SummaryManager::SP> _result;
2829

2930
public:
@@ -39,6 +40,7 @@ class SummaryManagerInitializer : public initializer::InitializerTask
3940
const search::common::FileHeaderContext & fileHeaderContext,
4041
search::transactionlog::SyncProxy &tlSyncer,
4142
IBucketizerSP bucketizer,
43+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider,
4244
std::shared_ptr<SummaryManager::SP> result);
4345
~SummaryManagerInitializer() override;
4446
void run() override;

searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,13 +231,15 @@ createSummaryManagerInitializer(const search::LogDocumentStore::Config & storeCf
231231
const AllocStrategy& alloc_strategy,
232232
const search::TuneFileSummary &tuneFile,
233233
search::IBucketizer::SP bucketizer,
234+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider,
234235
std::shared_ptr<SummaryManager::SP> result) const
235236
{
236237
GrowStrategy grow = alloc_strategy.get_grow_strategy();
237238
std::string baseDir(_baseDir + "/summary");
238239
return std::make_shared<SummaryManagerInitializer>
239240
(grow, baseDir, getSubDbName(), _writeService.shared(),
240-
storeCfg, tuneFile, _fileHeaderContext, _tlSyncer, std::move(bucketizer), std::move(result));
241+
storeCfg, tuneFile, _fileHeaderContext, _tlSyncer, std::move(bucketizer),
242+
std::move(document_id_provider), std::move(result));
241243
}
242244

243245
void
@@ -321,10 +323,13 @@ StoreOnlyDocSubDB::createInitializer(const DocumentDBConfig &configSnapshot, Ser
321323
configSnapshot.getTuneFileDocumentDBSP()->_attr,
322324
result->writableResult().writableDocumentMetaStore());
323325
result->addDocumentMetaStoreInitTask(dmsInitTask);
326+
auto dms = result->result().documentMetaStore()->documentMetaStore();
327+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider; // No document id provider yet.
324328
auto summaryTask = createSummaryManagerInitializer(createStoreConfig(configSnapshot.getStoreConfig(), _subDbType),
325329
alloc_strategy,
326330
configSnapshot.getTuneFileDocumentDBSP()->_summary,
327-
result->result().documentMetaStore()->documentMetaStore(),
331+
std::move(dms),
332+
std::move(document_id_provider),
328333
result->writableResult().writableSummaryManager());
329334
result->addDependency(summaryTask);
330335
summaryTask->addDependency(dmsInitTask);

searchcore/src/vespa/searchcore/proton/server/storeonlydocsubdb.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class StoreOnlyDocSubDB : public DocSubDB
167167
const AllocStrategy& alloc_strategy,
168168
const search::TuneFileSummary &tuneFile,
169169
search::IBucketizer::SP bucketizer,
170+
std::shared_ptr<const search::IDocumentIdProvider> document_id_provider,
170171
std::shared_ptr<SummaryManager::SP> result) const;
171172

172173
void setupSummaryManager(SummaryManager::SP summaryManager);
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
2+
3+
#pragma once
4+
5+
#include <cstdint>
6+
#include <string>
7+
8+
namespace search {
9+
10+
/*
11+
* This class provides a document id string view for the document with the given lid (local document id).
12+
* An empty string view indicates that the document id is not available.
13+
*/
14+
class IDocumentIdProvider {
15+
public:
16+
virtual ~IDocumentIdProvider() = default;
17+
[[nodiscard]] virtual std::string_view get_document_id_string_view(uint32_t lid) const noexcept = 0;
18+
};
19+
20+
}

searchsummary/src/tests/docsummary/document_id_dfw/document_id_dfw_test.cpp

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <vespa/document/fieldvalue/document.h>
66
#include <vespa/document/repo/newconfigbuilder.h>
77
#include <vespa/document/repo/documenttyperepo.h>
8+
#include <vespa/searchlib/common/i_document_id_provider.h>
89
#include <vespa/searchlib/common/matching_elements.h>
910
#include <vespa/searchsummary/docsummary/docsumstate.h>
1011
#include <vespa/searchsummary/docsummary/docsum_store_document.h>
@@ -14,7 +15,7 @@
1415
#include <vespa/searchsummary/docsummary/summary_elements_selector.h>
1516
#include <vespa/vespalib/data/slime/slime.h>
1617
#include <vespa/vespalib/gtest/gtest.h>
17-
#include <iostream>
18+
#include <map>
1819
#include <memory>
1920

2021
using document::Document;
@@ -40,6 +41,7 @@ namespace {
4041

4142
const int32_t doc_type_id = 787121340;
4243
const std::string doc_type_name = "test";
44+
const std::string provided_document_id("id:provided:test::1");
4345

4446
std::unique_ptr<const DocumentTypeRepo>
4547
make_doc_type_repo()
@@ -55,21 +57,51 @@ struct MyGetDocsumsStateCallback : GetDocsumsStateCallback {
5557
std::unique_ptr<MatchingElements> fill_matching_elements(const MatchingElementsFields &) override { abort(); }
5658
};
5759

60+
class MyDocumentIdProvider : public search::IDocumentIdProvider {
61+
std::map<uint32_t, std::string> _ids;
62+
public:
63+
MyDocumentIdProvider();
64+
~MyDocumentIdProvider() override;
65+
[[nodiscard]] std::string_view get_document_id_string_view(uint32_t lid) const noexcept override;
66+
};
67+
68+
MyDocumentIdProvider::MyDocumentIdProvider()
69+
: search::IDocumentIdProvider(),
70+
_ids()
71+
{
72+
_ids.emplace(1, provided_document_id);
73+
}
74+
75+
MyDocumentIdProvider::~MyDocumentIdProvider() = default;
76+
77+
std::string_view MyDocumentIdProvider::get_document_id_string_view(uint32_t lid) const noexcept {
78+
auto itr = _ids.find(lid);
79+
if (itr != _ids.end()) {
80+
return itr->second;
81+
} else {
82+
return {};
83+
}
84+
}
85+
86+
}
87+
5888
class DocumentIdDFWTest : public ::testing::Test
5989
{
6090
std::string _field_name;
6191
vespalib::Memory _field_name_view;
6292
std::unique_ptr<ResultConfig> _result_config;
6393
std::unique_ptr<const DocumentTypeRepo> _repo;
6494
const DocumentType* _document_type;
95+
std::shared_ptr<const search::IDocumentIdProvider> _document_id_provider;
6596

6697
protected:
6798
DocumentIdDFWTest();
6899
~DocumentIdDFWTest() override;
69100

70101
std::unique_ptr<IDocsumStoreDocument> make_docsum_store_document(const std::string &id);
71-
vespalib::Slime write(const IDocsumStoreDocument* doc);
102+
Slime write(uint32_t lid, const IDocsumStoreDocument* doc);
72103
vespalib::Memory get_field_name_view() const noexcept { return _field_name_view; }
104+
void enable_document_id_provider() { _document_id_provider = std::make_shared<MyDocumentIdProvider>(); }
73105
};
74106

75107
DocumentIdDFWTest::DocumentIdDFWTest()
@@ -78,7 +110,8 @@ DocumentIdDFWTest::DocumentIdDFWTest()
78110
_field_name_view(_field_name.data(), _field_name.size()),
79111
_result_config(std::make_unique<ResultConfig>()),
80112
_repo(make_doc_type_repo()),
81-
_document_type(_repo->getDocumentType(doc_type_name))
113+
_document_type(_repo->getDocumentType(doc_type_name)),
114+
_document_id_provider()
82115
{
83116
auto* cfg = _result_config->addResultClass("default", 0);
84117
cfg->addConfigEntry(_field_name);
@@ -96,41 +129,60 @@ DocumentIdDFWTest::make_docsum_store_document(const std::string& id)
96129
}
97130

98131
vespalib::Slime
99-
DocumentIdDFWTest::write(const IDocsumStoreDocument* doc)
132+
DocumentIdDFWTest::write(uint32_t lid, const IDocsumStoreDocument* doc)
100133
{
101134
Slime slime;
102135
SlimeInserter top_inserter(slime);
103136
Cursor & docsum = top_inserter.insertObject();
104137
ObjectInserter field_inserter(docsum, _field_name_view);
105-
DocumentIdDFW writer;
138+
DocumentIdDFW writer(_document_id_provider);
106139
MyGetDocsumsStateCallback callback;
107140
GetDocsumsState state(callback);
108-
writer.insert_field(0, doc, state, ElementIds::select_all(), field_inserter);
141+
writer.insert_field(lid, doc, state, ElementIds::select_all(), field_inserter);
109142
return slime;
110143
}
111144

112145
TEST_F(DocumentIdDFWTest, insert_document_id)
113146
{
114147
std::string id("id::test::0");
115148
auto doc = make_docsum_store_document(id);
116-
auto slime = write(doc.get());
149+
auto slime = write(1, doc.get());
117150
EXPECT_TRUE(slime.get()[get_field_name_view()].valid());
118151
EXPECT_EQ(id, slime.get()[get_field_name_view()].asString().make_string());
119152
}
120153

121154
TEST_F(DocumentIdDFWTest, insert_document_id_no_document_doc)
122155
{
123156
auto doc = std::make_unique<DocsumStoreDocument>(std::unique_ptr<Document>());
124-
auto slime = write(doc.get());
157+
auto slime = write(1, doc.get());
125158
EXPECT_FALSE(slime.get()[get_field_name_view()].valid());
126159
}
127160

128161
TEST_F(DocumentIdDFWTest, insert_document_id_no_docsum_store_doc)
129162
{
130-
auto slime = write(nullptr);
163+
auto slime = write(1, nullptr);
131164
EXPECT_FALSE(slime.get()[get_field_name_view()].valid());
132165
}
133166

167+
TEST_F(DocumentIdDFWTest, insert_document_id_no_document_doc_but_document_id_provider)
168+
{
169+
enable_document_id_provider();
170+
auto doc = std::make_unique<DocsumStoreDocument>(std::unique_ptr<Document>());
171+
auto slime = write(1, doc.get());
172+
EXPECT_TRUE(slime.get()[get_field_name_view()].valid());
173+
EXPECT_EQ(provided_document_id, slime.get()[get_field_name_view()].asString().make_string());
174+
slime = write(2, doc.get());
175+
EXPECT_FALSE(slime.get()[get_field_name_view()].valid());
176+
}
177+
178+
TEST_F(DocumentIdDFWTest, insert_document_id_no_docsum_store_doc_but_document_id_provider)
179+
{
180+
enable_document_id_provider();
181+
auto slime = write(1, nullptr);
182+
EXPECT_TRUE(slime.get()[get_field_name_view()].valid());
183+
EXPECT_EQ(provided_document_id, slime.get()[get_field_name_view()].asString().make_string());
184+
slime = write(2, nullptr);
185+
EXPECT_FALSE(slime.get()[get_field_name_view()].valid());
134186
}
135187

136188
GTEST_MAIN_RUN_ALL_TESTS()

searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ DocsumFieldWriterFactory::create_docsum_field_writer(const std::string& field_na
129129
throw_if_nullptr(fieldWriter, command);
130130
}
131131
} else if (command == command::documentid) {
132-
fieldWriter = std::make_unique<DocumentIdDFW>();
132+
fieldWriter = std::make_unique<DocumentIdDFW>(_env.get_document_id_provider());
133133
} else {
134134
throw IllegalArgumentException("Unknown command '" + command + "'.");
135135
}

0 commit comments

Comments
 (0)