Skip to content

Commit 37caace

Browse files
committed
feat: add Database.merge() and sqlite-utils merge command
Implements the ability to merge tables from one or more source SQLite databases into a destination database, as requested in #491. Python API: db.merge([src1, src2], alter=True, replace=False, ignore=False, tables=None) - source_dbs can be Database objects or file paths - Tables not in dest are created; existing tables have rows inserted - alter=True adds missing columns to existing destination tables - replace=True overwrites rows with matching primary keys - ignore=True skips rows with conflicting primary keys - tables= limits which tables are merged - Virtual tables and their shadow tables are automatically skipped CLI: sqlite-utils merge combined.db one.db two.db [options] - Supports --alter, --replace, --ignore, --pk, --table, --load-extension Closes #491
1 parent 8d74ffc commit 37caace

File tree

4 files changed

+418
-0
lines changed

4 files changed

+418
-0
lines changed

docs/cli.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,45 @@ That will look for SpatiaLite in a set of predictable locations. To load it from
10571057
10581058
sqlite-utils create-database empty.db --init-spatialite --load-extension /path/to/spatialite.so
10591059
1060+
.. _cli_merge:
1061+
1062+
Merging databases
1063+
=================
1064+
1065+
Use ``sqlite-utils merge`` to merge tables from one or more source databases into a destination database.
1066+
1067+
Tables that do not exist in the destination are created. Tables that already exist have their rows inserted.
1068+
1069+
.. code-block:: bash
1070+
1071+
sqlite-utils merge combined.db one.db two.db
1072+
1073+
To automatically add missing columns to existing destination tables, use ``--alter``:
1074+
1075+
.. code-block:: bash
1076+
1077+
sqlite-utils merge combined.db one.db two.db --alter
1078+
1079+
To replace rows that have conflicting primary keys, use ``--replace``:
1080+
1081+
.. code-block:: bash
1082+
1083+
sqlite-utils merge combined.db one.db two.db --replace
1084+
1085+
To skip rows that have conflicting primary keys, use ``--ignore``:
1086+
1087+
.. code-block:: bash
1088+
1089+
sqlite-utils merge combined.db one.db two.db --ignore
1090+
1091+
To merge only specific tables, use ``--table`` (can be specified multiple times):
1092+
1093+
.. code-block:: bash
1094+
1095+
sqlite-utils merge combined.db one.db two.db --table mytable
1096+
1097+
Virtual tables (such as FTS indexes) and their shadow tables are automatically skipped.
1098+
10601099
.. _cli_inserting_data:
10611100

10621101
Inserting JSON data

sqlite_utils/cli.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,6 +1541,63 @@ def create_database(path, enable_wal, init_spatialite, load_extension):
15411541
db.vacuum()
15421542

15431543

1544+
@cli.command(name="merge")
1545+
@click.argument(
1546+
"path",
1547+
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
1548+
required=True,
1549+
)
1550+
@click.argument(
1551+
"sources",
1552+
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False, exists=True),
1553+
nargs=-1,
1554+
required=True,
1555+
)
1556+
@click.option("pks", "--pk", help="Column to use as primary key", multiple=True)
1557+
@click.option("--alter", is_flag=True, help="Alter destination tables to add any missing columns")
1558+
@click.option(
1559+
"--replace", is_flag=True, help="Replace rows with matching primary keys"
1560+
)
1561+
@click.option(
1562+
"--ignore", is_flag=True, help="Ignore rows with conflicting primary keys"
1563+
)
1564+
@click.option(
1565+
"tables",
1566+
"--table",
1567+
help="Specific tables to merge (can be specified multiple times)",
1568+
multiple=True,
1569+
)
1570+
@load_extension_option
1571+
def merge_cmd(path, sources, pks, alter, replace, ignore, tables, load_extension):
1572+
"""
1573+
Merge tables from one or more SOURCE databases into a DEST database.
1574+
1575+
Tables that do not exist in DEST are created. Tables that already exist
1576+
have their rows inserted. Use --alter to add missing columns automatically.
1577+
1578+
Example:
1579+
1580+
\b
1581+
sqlite-utils merge combined.db one.db two.db
1582+
sqlite-utils merge combined.db one.db two.db --alter
1583+
sqlite-utils merge combined.db one.db two.db --replace --table mytable
1584+
"""
1585+
db = sqlite_utils.Database(path)
1586+
_register_db_for_cleanup(db)
1587+
_load_extensions(db, load_extension)
1588+
try:
1589+
db.merge(
1590+
sources,
1591+
pk=list(pks) if pks else None,
1592+
alter=alter,
1593+
replace=replace,
1594+
ignore=ignore,
1595+
tables=list(tables) if tables else None,
1596+
)
1597+
except OperationalError as e:
1598+
raise click.ClickException(str(e))
1599+
1600+
15441601
@cli.command(name="create-table")
15451602
@click.argument(
15461603
"path",

sqlite_utils/db.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,77 @@ def attach(self, alias: str, filepath: Union[str, pathlib.Path]) -> None:
535535
).strip()
536536
self.execute(attach_sql)
537537

538+
def merge(
539+
self,
540+
sources: Iterable[Union[str, pathlib.Path, "Database"]],
541+
*,
542+
pk: Optional[Any] = None,
543+
alter: bool = False,
544+
replace: bool = False,
545+
ignore: bool = False,
546+
tables: Optional[Iterable[str]] = None,
547+
) -> "Database":
548+
"""
549+
Merge tables from one or more source databases into this database.
550+
551+
Tables that do not exist in the destination are created with the source
552+
schema and all rows. Tables that already exist have rows inserted into
553+
them. Use ``alter=True`` to automatically add any missing columns to
554+
existing destination tables.
555+
556+
Virtual tables (e.g. FTS indexes) in source databases are skipped.
557+
558+
:param sources: One or more source databases. Each item may be a
559+
``Database`` instance, or a path to a SQLite database file.
560+
:param pk: Primary key column(s) to use for all merged tables. When
561+
``None``, each source table's own primary key(s) are used.
562+
:param alter: Add any missing columns to existing destination tables.
563+
:param replace: Replace rows whose primary key already exists in the
564+
destination table.
565+
:param ignore: Skip rows whose primary key already exists in the
566+
destination table.
567+
:param tables: If provided, only merge these named tables. Tables
568+
listed here that do not exist in a particular source are silently
569+
skipped.
570+
:return: ``self`` (the destination database).
571+
"""
572+
for source in sources:
573+
if isinstance(source, (str, pathlib.Path)):
574+
source = Database(source)
575+
source_table_names = source.table_names()
576+
# Collect virtual table names so their shadow tables can be skipped too.
577+
virtual_table_names = {
578+
name
579+
for name in source_table_names
580+
if source.table(name).virtual_table_using is not None
581+
}
582+
names_to_merge = list(tables) if tables is not None else source_table_names
583+
for table_name in names_to_merge:
584+
if table_name not in source_table_names:
585+
continue
586+
source_table = source.table(table_name)
587+
# Skip virtual tables (e.g. FTS indexes).
588+
if source_table.virtual_table_using is not None:
589+
continue
590+
# Skip shadow tables created by virtual tables (e.g. docs_fts_data).
591+
if any(table_name.startswith(vt + "_") for vt in virtual_table_names):
592+
continue
593+
if pk is not None:
594+
effective_pk: Any = pk[0] if len(pk) == 1 else list(pk)
595+
elif source_table.use_rowid:
596+
effective_pk = None
597+
else:
598+
source_pks = source_table.pks
599+
effective_pk = source_pks[0] if len(source_pks) == 1 else source_pks
600+
self[table_name].insert_all(
601+
source_table.rows,
602+
pk=effective_pk,
603+
alter=alter,
604+
replace=replace,
605+
ignore=ignore,
606+
)
607+
return self
608+
538609
def query(
539610
self, sql: str, params: Optional[Union[Sequence, Dict[str, Any]]] = None
540611
) -> Generator[dict, None, None]:

0 commit comments

Comments
 (0)