@@ -29,6 +29,7 @@ constexpr const char* k_tables_name = "tables";
2929constexpr const char * k_columns_name = " columns" ;
3030constexpr const char * k_indexes_name = " indexes" ;
3131constexpr const char * k_meta_name = " meta" ;
32+ constexpr const char * k_schemas_name = " schemas" ;
3233constexpr const char * k_databases_name = " databases" ;
3334
3435// Shared (cluster-wide) path: {root}/__deeplake_catalog/{name}
@@ -184,6 +185,17 @@ deeplake_api::catalog_table_schema make_indexes_schema()
184185 return schema;
185186}
186187
188+ deeplake_api::catalog_table_schema make_schemas_schema ()
189+ {
190+ deeplake_api::catalog_table_schema schema;
191+ schema.add (" schema_name" , deeplake_core::type::text (codecs::compression::null))
192+ .add (" owner" , deeplake_core::type::text (codecs::compression::null))
193+ .add (" state" , deeplake_core::type::text (codecs::compression::null))
194+ .add (" updated_at" , deeplake_core::type::generic (nd::type::scalar (nd::dtype::int64)))
195+ .set_primary_key (" schema_name" );
196+ return schema;
197+ }
198+
187199deeplake_api::catalog_table_schema make_meta_schema ()
188200{
189201 deeplake_api::catalog_table_schema schema;
@@ -272,32 +284,35 @@ int64_t ensure_db_catalog(const std::string& root_path, const std::string& db_na
272284 const auto tables_path = join_db_path (root_path, db_name, k_tables_name);
273285 const auto columns_path = join_db_path (root_path, db_name, k_columns_name);
274286 const auto indexes_path = join_db_path (root_path, db_name, k_indexes_name);
287+ const auto schemas_path = join_db_path (root_path, db_name, k_schemas_name);
275288 const auto meta_path = join_db_path (root_path, db_name, k_meta_name);
276289
277290 try {
278- // Launch all 4 per-database catalog table creation in parallel
291+ // Launch all 5 per-database catalog table creation in parallel
279292 icm::vector<async::promise<std::shared_ptr<deeplake_api::catalog_table>>> promises;
280- promises.reserve (4 );
293+ promises.reserve (5 );
281294 promises.push_back (
282295 deeplake_api::open_or_create_catalog_table (tables_path, make_tables_schema (), icm::string_map<>(creds)));
283296 promises.push_back (
284297 deeplake_api::open_or_create_catalog_table (columns_path, make_columns_schema (), icm::string_map<>(creds)));
285298 promises.push_back (
286299 deeplake_api::open_or_create_catalog_table (indexes_path, make_indexes_schema (), icm::string_map<>(creds)));
300+ promises.push_back (
301+ deeplake_api::open_or_create_catalog_table (schemas_path, make_schemas_schema (), icm::string_map<>(creds)));
287302 promises.push_back (
288303 deeplake_api::open_or_create_catalog_table (meta_path, make_meta_schema (), icm::string_map<>(creds)));
289304
290305 auto results = async::combine (std::move (promises)).get_future ().get ();
291- if (results.size () != 4 ) {
306+ if (results.size () != 5 ) {
292307 elog (ERROR,
293- " Failed to initialize per-db catalog at %s/%s: expected 4 catalog tables, got %zu" ,
308+ " Failed to initialize per-db catalog at %s/%s: expected 5 catalog tables, got %zu" ,
294309 root_path.c_str (),
295310 db_name.c_str (),
296311 static_cast <size_t >(results.size ()));
297312 }
298313
299- // Initialize per-db meta table if empty (index 3 is meta)
300- auto & meta_table = results[3 ];
314+ // Initialize per-db meta table if empty (index 4 is meta)
315+ auto & meta_table = results[4 ];
301316 if (meta_table) {
302317 auto snapshot = meta_table->read ().get_future ().get ();
303318 if (snapshot.row_count () == 0 ) {
@@ -447,6 +462,73 @@ std::vector<index_meta> load_indexes(const std::string&, const std::string&, icm
447462 return {};
448463}
449464
465+ std::vector<schema_meta> load_schemas (const std::string& root_path, const std::string& db_name, icm::string_map<> creds)
466+ {
467+ std::vector<schema_meta> out;
468+ try {
469+ auto table = open_db_catalog_table (root_path, db_name, k_schemas_name, std::move (creds));
470+ if (!table) {
471+ return out;
472+ }
473+ auto snapshot = table->read ().get_future ().get ();
474+ if (snapshot.row_count () == 0 ) {
475+ return out;
476+ }
477+
478+ std::unordered_map<std::string, schema_meta> latest;
479+ for (const auto & row : snapshot.rows ()) {
480+ auto schema_name_it = row.find (" schema_name" );
481+ auto state_it = row.find (" state" );
482+ if (schema_name_it == row.end () || state_it == row.end ()) {
483+ continue ;
484+ }
485+
486+ schema_meta meta;
487+ meta.schema_name = deeplake_api::array_to_string (schema_name_it->second );
488+ meta.state = deeplake_api::array_to_string (state_it->second );
489+ auto owner_it = row.find (" owner" );
490+ if (owner_it != row.end ()) {
491+ meta.owner = deeplake_api::array_to_string (owner_it->second );
492+ }
493+ auto updated_it = row.find (" updated_at" );
494+ if (updated_it != row.end ()) {
495+ auto updated_vec = load_int64_vector (updated_it->second );
496+ meta.updated_at = updated_vec.empty () ? 0 : updated_vec.front ();
497+ }
498+
499+ auto it = latest.find (meta.schema_name );
500+ if (it == latest.end () || it->second .updated_at <= meta.updated_at ) {
501+ latest[meta.schema_name ] = std::move (meta);
502+ }
503+ }
504+
505+ out.reserve (latest.size ());
506+ for (auto & [_, meta] : latest) {
507+ if (meta.state == " ready" ) {
508+ out.push_back (std::move (meta));
509+ }
510+ }
511+ return out;
512+ } catch (const std::exception& e) {
513+ elog (DEBUG1, " Failed to load catalog schemas for db '%s': %s (may be old catalog)" , db_name.c_str (), e.what ());
514+ return out;
515+ } catch (...) {
516+ elog (DEBUG1, " Failed to load catalog schemas for db '%s': unknown error (may be old catalog)" , db_name.c_str ());
517+ return out;
518+ }
519+ }
520+
521+ void upsert_schema (const std::string& root_path, const std::string& db_name, icm::string_map<> creds, const schema_meta& meta)
522+ {
523+ auto table = open_db_catalog_table (root_path, db_name, k_schemas_name, std::move (creds));
524+ icm::string_map<nd::array> row;
525+ row[" schema_name" ] = nd::adapt (meta.schema_name );
526+ row[" owner" ] = nd::adapt (meta.owner );
527+ row[" state" ] = nd::adapt (meta.state );
528+ row[" updated_at" ] = nd::adapt (meta.updated_at == 0 ? now_ms () : meta.updated_at );
529+ table->upsert (std::move (row)).get_future ().get ();
530+ }
531+
450532std::pair<std::vector<table_meta>, std::vector<column_meta>>
451533load_tables_and_columns (const std::string& root_path, const std::string& db_name, icm::string_map<> creds)
452534{
0 commit comments