Skip to content

Commit 117fdba

Browse files
OL facets - PR3 - migrate data to facet tables
Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com>
1 parent 595fb8a commit 117fdba

9 files changed

Lines changed: 1175 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.29.0...HEAD)
44

5+
### Added
6+
7+
* Split `lineage_events` table to `dataset_facets`, `run_facets`, and `job_facets` tables. [`2152`](https://github.com/MarquezProject/marquez/pull/2152)
8+
[@wslulciuc](https://github.com/wslulciuc,), [@pawel-big-lebowski]( https://github.com/pawel-big-lebowski)
9+
* Performance improvement with migration procedure that requires manual steps if database has more than 100K lineage events.
10+
* Please read [here](https://github.com/MarquezProject/marquez/blob/main/api/src/main/resources/marquez/db/migration/V55__readme.md) to get more database migration details.
11+
512
## [0.29.0](https://github.com/MarquezProject/marquez/compare/0.28.0...0.29.0) - 2022-12-19
613

714
### Added

api/src/main/java/marquez/MarquezApp.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import marquez.api.filter.JobRedirectFilter;
3030
import marquez.cli.MetadataCommand;
3131
import marquez.cli.SeedCommand;
32+
import marquez.cli.V57MigrationCommand;
3233
import marquez.common.Utils;
3334
import marquez.db.DbMigration;
3435
import marquez.logging.LoggingMdcFilter;
@@ -149,6 +150,12 @@ public void registerResources(
149150
}
150151
}
151152

153+
@Override
154+
protected void addDefaultCommands(Bootstrap<MarquezConfig> bootstrap) {
155+
bootstrap.addCommand(new V57MigrationCommand<>(this));
156+
super.addDefaultCommands(bootstrap);
157+
}
158+
152159
private MarquezContext buildMarquezContext(
153160
MarquezConfig config, Environment env, ManagedDataSource source) {
154161
final JdbiFactory factory = new JdbiFactory();
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright 2018-2022 contributors to the Marquez project
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package marquez.cli;
7+
8+
import io.dropwizard.Application;
9+
import io.dropwizard.cli.EnvironmentCommand;
10+
import io.dropwizard.db.DataSourceFactory;
11+
import io.dropwizard.db.ManagedDataSource;
12+
import io.dropwizard.jdbi3.JdbiFactory;
13+
import io.dropwizard.setup.Environment;
14+
import javax.sql.DataSource;
15+
import lombok.extern.slf4j.Slf4j;
16+
import marquez.db.migrations.V57_1__BackfillFacets;
17+
import net.sourceforge.argparse4j.inf.Namespace;
18+
import net.sourceforge.argparse4j.inf.Subparser;
19+
import org.jdbi.v3.core.Jdbi;
20+
import org.jdbi.v3.jackson2.Jackson2Plugin;
21+
import org.jdbi.v3.postgres.PostgresPlugin;
22+
import org.jdbi.v3.sqlobject.SqlObjectPlugin;
23+
24+
/**
25+
* A command to manually run V55 database migration. This migration requires a heavy DB operation
26+
* which can be done asynchronously (with limited API downtime) due to separate migration command.
27+
*
28+
* <p>Please refer to @link marquez/db/migration/V55__readme.md for more details.
29+
*/
30+
@Slf4j
31+
public class V57MigrationCommand<MarquezConfig> extends EnvironmentCommand<marquez.MarquezConfig> {
32+
33+
private static final String COMMAND_NAME = "v55_migrate";
34+
private static final String COMMAND_DESCRIPTION =
35+
"""
36+
A command to manually run V55 database migration.
37+
Please refer to https://github.com/MarquezProject/marquez/blob/main/api/src/main/resources/marquez/db/migration/V55__readme.md for more details.
38+
""";
39+
40+
/**
41+
* Creates a new environment command.
42+
*
43+
* @param application the application providing this command
44+
*/
45+
public V57MigrationCommand(Application<marquez.MarquezConfig> application) {
46+
super(application, COMMAND_NAME, COMMAND_DESCRIPTION);
47+
}
48+
49+
@Override
50+
public void configure(Subparser subparser) {
51+
subparser
52+
.addArgument("--chunkSize")
53+
.dest("chunkSize")
54+
.type(Integer.class)
55+
.required(false)
56+
.setDefault(V57_1__BackfillFacets.DEFAULT_CHUNK_SIZE)
57+
.help("amount of lineage_events rows processed in a single SQL query and transaction.");
58+
addFileArgument(subparser);
59+
}
60+
61+
@Override
62+
protected void run(
63+
Environment environment, Namespace namespace, marquez.MarquezConfig configuration)
64+
throws Exception {
65+
log.info("Running v55 migration command");
66+
67+
final DataSourceFactory sourceFactory = configuration.getDataSourceFactory();
68+
final DataSource source = sourceFactory.build(environment.metrics(), "MarquezApp-source");
69+
70+
final JdbiFactory factory = new JdbiFactory();
71+
final Jdbi jdbi =
72+
factory
73+
.build(
74+
environment,
75+
configuration.getDataSourceFactory(),
76+
(ManagedDataSource) source,
77+
"postgresql-command")
78+
.installPlugin(new SqlObjectPlugin())
79+
.installPlugin(new PostgresPlugin())
80+
.installPlugin(new Jackson2Plugin());
81+
82+
V57_1__BackfillFacets migration = new V57_1__BackfillFacets();
83+
migration.setTriggeredByCommand(true);
84+
migration.setJdbi(jdbi);
85+
migration.setChunkSize(namespace.getInt("chunkSize"));
86+
migration.migrate(null);
87+
88+
log.info("Migration finished successfully");
89+
}
90+
}

0 commit comments

Comments
 (0)