Skip to content

Commit 2909864

Browse files
dataset symlinks provided (#2087)
Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com>
1 parent bb3d163 commit 2909864

16 files changed

Lines changed: 348 additions & 29 deletions

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.26.0...HEAD)
44
### Fixed
55
* Add support for `parentRun` facet as reported by older Airflow OpenLineage versions [@collado-mike](https://github.com/collado-mike)
6+
* Implemented dataset symlink feature which allows providing multiple names for a dataset and adds edges to lineage graph based on symlinks [`#2066`](https://github.com/MarquezProject/marquez/pull/2066) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
7+
68
## [0.26.0](https://github.com/MarquezProject/marquez/compare/0.25.0...0.26.0) - 2022-09-15
79

810
### Added

api/src/main/java/marquez/db/BaseDao.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ public interface BaseDao extends SqlObject {
3030
@CreateSqlObject
3131
NamespaceDao createNamespaceDao();
3232

33+
@CreateSqlObject
34+
DatasetSymlinkDao createDatasetSymlinkDao();
35+
3336
@CreateSqlObject
3437
RunDao createRunDao();
3538

api/src/main/java/marquez/db/Columns.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ private Columns() {}
8080
public static final String FIELD_UUIDS = "field_uuids";
8181
public static final String LIFECYCLE_STATE = "lifecycle_state";
8282

83+
/* DATASET SYMLINK ROW COLUMNS */
84+
public static final String IS_PRIMARY = "is_primary";
85+
8386
/* STREAM VERSION ROW COLUMNS */
8487
public static final String SCHEMA_LOCATION = "schema_location";
8588

api/src/main/java/marquez/db/DatasetDao.java

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import marquez.db.mappers.DatasetMapper;
2424
import marquez.db.mappers.DatasetRowMapper;
2525
import marquez.db.models.DatasetRow;
26+
import marquez.db.models.DatasetSymlinkRow;
2627
import marquez.db.models.DatasetVersionRow;
2728
import marquez.db.models.NamespaceRow;
2829
import marquez.db.models.SourceRow;
@@ -73,8 +74,7 @@ void updateLastModifiedAt(
7374
WITH selected_datasets AS (
7475
SELECT d.*
7576
FROM datasets_view d
76-
WHERE d.namespace_name = :namespaceName
77-
AND d.name = :datasetName
77+
WHERE CAST((:namespaceName, :datasetName) AS DATASET_NAME) = ANY(d.dataset_symlinks)
7878
), dataset_runs AS (
7979
SELECT d.uuid, d.name, d.namespace_name, dv.run_uuid, dv.lifecycle_state, event_time, event
8080
FROM selected_datasets d
@@ -229,7 +229,7 @@ INSERT INTO datasets (
229229
:description,
230230
:isDeleted,
231231
false
232-
) ON CONFLICT (namespace_uuid, name)
232+
) ON CONFLICT (uuid)
233233
DO UPDATE SET
234234
type = EXCLUDED.type,
235235
updated_at = EXCLUDED.updated_at,
@@ -275,7 +275,7 @@ DatasetRow upsert(
275275
+ ":sourceName, "
276276
+ ":name, "
277277
+ ":physicalName) "
278-
+ "ON CONFLICT (namespace_uuid, name) "
278+
+ "ON CONFLICT (uuid) "
279279
+ "DO UPDATE SET "
280280
+ "type = EXCLUDED.type, "
281281
+ "updated_at = EXCLUDED.updated_at, "
@@ -296,8 +296,10 @@ DatasetRow upsert(
296296
"""
297297
UPDATE datasets
298298
SET is_hidden = true
299-
WHERE namespace_name = :namespaceName
300-
AND name = :name
299+
FROM dataset_symlinks, namespaces
300+
WHERE dataset_symlinks.dataset_uuid = datasets.uuid
301+
AND namespaces.uuid = dataset_symlinks.namespace_uuid
302+
AND namespaces.name=:namespaceName AND dataset_symlinks.name=:name
301303
RETURNING *
302304
""")
303305
Optional<DatasetRow> delete(String namespaceName, String name);
@@ -310,6 +312,10 @@ default Dataset upsertDatasetMeta(
310312
createNamespaceDao()
311313
.upsertNamespaceRow(
312314
UUID.randomUUID(), now, namespaceName.getValue(), DEFAULT_NAMESPACE_OWNER);
315+
DatasetSymlinkRow symlinkRow =
316+
createDatasetSymlinkDao()
317+
.upsertDatasetSymlinkRow(
318+
UUID.randomUUID(), datasetName.getValue(), namespaceRow.getUuid(), true, null, now);
313319
SourceRow sourceRow =
314320
createSourceDao()
315321
.upsertOrDefault(
@@ -318,13 +324,12 @@ default Dataset upsertDatasetMeta(
318324
now,
319325
datasetMeta.getSourceName().getValue(),
320326
"");
321-
UUID newDatasetUuid = UUID.randomUUID();
322327
DatasetRow datasetRow;
323328

324329
if (datasetMeta.getDescription().isPresent()) {
325330
datasetRow =
326331
upsert(
327-
newDatasetUuid,
332+
symlinkRow.getUuid(),
328333
datasetMeta.getType(),
329334
now,
330335
namespaceRow.getUuid(),
@@ -338,7 +343,7 @@ default Dataset upsertDatasetMeta(
338343
} else {
339344
datasetRow =
340345
upsert(
341-
newDatasetUuid,
346+
symlinkRow.getUuid(),
342347
datasetMeta.getType(),
343348
now,
344349
namespaceRow.getUuid(),
@@ -349,7 +354,8 @@ default Dataset upsertDatasetMeta(
349354
datasetMeta.getPhysicalName().getValue());
350355
}
351356

352-
updateDatasetMetric(namespaceName, datasetMeta.getType(), newDatasetUuid, datasetRow.getUuid());
357+
updateDatasetMetric(
358+
namespaceName, datasetMeta.getType(), symlinkRow.getUuid(), datasetRow.getUuid());
353359

354360
TagDao tagDao = createTagDao();
355361
List<DatasetTagMapping> datasetTagMappings = new ArrayList<>();
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright 2018-2022 contributors to the Marquez project
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package marquez.db;
7+
8+
import java.time.Instant;
9+
import java.util.Optional;
10+
import java.util.UUID;
11+
import marquez.db.mappers.DatasetSymlinksRowMapper;
12+
import marquez.db.models.DatasetSymlinkRow;
13+
import org.jdbi.v3.sqlobject.config.RegisterRowMapper;
14+
import org.jdbi.v3.sqlobject.statement.SqlQuery;
15+
import org.jdbi.v3.sqlobject.statement.SqlUpdate;
16+
17+
@RegisterRowMapper(DatasetSymlinksRowMapper.class)
18+
public interface DatasetSymlinkDao extends BaseDao {
19+
20+
default DatasetSymlinkRow upsertDatasetSymlinkRow(
21+
UUID uuid, String name, UUID namespaceUuid, boolean isPrimary, String type, Instant now) {
22+
doUpsertDatasetSymlinkRow(uuid, name, namespaceUuid, isPrimary, type, now);
23+
return findDatasetSymlinkByNamespaceUuidAndName(namespaceUuid, name).orElseThrow();
24+
}
25+
26+
@SqlQuery("SELECT * FROM dataset_symlinks WHERE namespace_uuid = :namespaceUuid and name = :name")
27+
Optional<DatasetSymlinkRow> findDatasetSymlinkByNamespaceUuidAndName(
28+
UUID namespaceUuid, String name);
29+
30+
@SqlUpdate(
31+
"""
32+
INSERT INTO dataset_symlinks (
33+
dataset_uuid,
34+
name,
35+
namespace_uuid,
36+
is_primary,
37+
type,
38+
created_at,
39+
updated_at
40+
) VALUES (
41+
:uuid,
42+
:name,
43+
:namespaceUuid,
44+
:isPrimary,
45+
:type,
46+
:now,
47+
:now)
48+
ON CONFLICT (name, namespace_uuid) DO NOTHING""")
49+
void doUpsertDatasetSymlinkRow(
50+
UUID uuid, String name, UUID namespaceUuid, boolean isPrimary, String type, Instant now);
51+
}

api/src/main/java/marquez/db/OpenLineageDao.java

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import marquez.db.mappers.LineageEventMapper;
3434
import marquez.db.models.DatasetFieldRow;
3535
import marquez.db.models.DatasetRow;
36+
import marquez.db.models.DatasetSymlinkRow;
3637
import marquez.db.models.DatasetVersionRow;
3738
import marquez.db.models.JobContextRow;
3839
import marquez.db.models.JobRow;
@@ -120,6 +121,7 @@ default UpdateLineageRow updateMarquezModel(LineageEvent event, ObjectMapper map
120121

121122
default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper mapper) {
122123
NamespaceDao namespaceDao = createNamespaceDao();
124+
DatasetSymlinkDao datasetSymlinkDao = createDatasetSymlinkDao();
123125
DatasetDao datasetDao = createDatasetDao();
124126
SourceDao sourceDao = createSourceDao();
125127
JobDao jobDao = createJobDao();
@@ -316,6 +318,7 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
316318
runUuid,
317319
true,
318320
namespaceDao,
321+
datasetSymlinkDao,
319322
sourceDao,
320323
datasetDao,
321324
datasetVersionDao,
@@ -337,6 +340,7 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
337340
runUuid,
338341
false,
339342
namespaceDao,
343+
datasetSymlinkDao,
340344
sourceDao,
341345
datasetDao,
342346
datasetVersionDao,
@@ -532,6 +536,7 @@ default DatasetRecord upsertLineageDataset(
532536
UUID runUuid,
533537
boolean isInput,
534538
NamespaceDao namespaceDao,
539+
DatasetSymlinkDao datasetSymlinkDao,
535540
SourceDao sourceDao,
536541
DatasetDao datasetDao,
537542
DatasetVersionDao datasetVersionDao,
@@ -568,6 +573,35 @@ default DatasetRecord upsertLineageDataset(
568573
formatNamespaceName(ds.getNamespace()),
569574
DEFAULT_NAMESPACE_OWNER);
570575

576+
DatasetSymlinkRow symlink =
577+
datasetSymlinkDao.upsertDatasetSymlinkRow(
578+
UUID.randomUUID(),
579+
formatDatasetName(ds.getName()),
580+
dsNamespace.getUuid(),
581+
true,
582+
null,
583+
now);
584+
585+
Optional.ofNullable(ds.getFacets())
586+
.map(facets -> facets.getSymlinks())
587+
.ifPresent(
588+
el ->
589+
el.getIdentifiers().stream()
590+
.forEach(
591+
id ->
592+
datasetSymlinkDao.doUpsertDatasetSymlinkRow(
593+
symlink.getUuid(),
594+
id.getName(),
595+
namespaceDao
596+
.upsertNamespaceRow(
597+
UUID.randomUUID(),
598+
now,
599+
id.getNamespace(),
600+
DEFAULT_NAMESPACE_OWNER)
601+
.getUuid(),
602+
false,
603+
id.getType(),
604+
now)));
571605
String dslifecycleState =
572606
Optional.ofNullable(ds.getFacets())
573607
.map(DatasetFacets::getLifecycleStateChange)
@@ -576,7 +610,7 @@ default DatasetRecord upsertLineageDataset(
576610

577611
DatasetRow datasetRow =
578612
datasetDao.upsert(
579-
UUID.randomUUID(),
613+
symlink.getUuid(),
580614
getDatasetType(ds),
581615
now,
582616
datasetNamespace.getUuid(),
@@ -609,7 +643,7 @@ default DatasetRecord upsertLineageDataset(
609643
dsNamespace.getName(),
610644
source.getName(),
611645
dsRow.getPhysicalName(),
612-
dsRow.getName(),
646+
symlink.getName(),
613647
dslifecycleState,
614648
fields,
615649
runUuid)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright 2018-2022 contributors to the Marquez project
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package marquez.db.mappers;
7+
8+
import static marquez.db.Columns.booleanOrDefault;
9+
import static marquez.db.Columns.stringOrNull;
10+
import static marquez.db.Columns.stringOrThrow;
11+
import static marquez.db.Columns.timestampOrThrow;
12+
import static marquez.db.Columns.uuidOrThrow;
13+
14+
import java.sql.ResultSet;
15+
import java.sql.SQLException;
16+
import lombok.NonNull;
17+
import marquez.db.Columns;
18+
import marquez.db.models.DatasetSymlinkRow;
19+
import org.jdbi.v3.core.mapper.RowMapper;
20+
import org.jdbi.v3.core.statement.StatementContext;
21+
22+
public class DatasetSymlinksRowMapper implements RowMapper<DatasetSymlinkRow> {
23+
24+
@Override
25+
public DatasetSymlinkRow map(@NonNull ResultSet results, @NonNull StatementContext context)
26+
throws SQLException {
27+
return new DatasetSymlinkRow(
28+
uuidOrThrow(results, Columns.DATASET_UUID),
29+
stringOrThrow(results, Columns.NAME),
30+
uuidOrThrow(results, Columns.NAMESPACE_UUID),
31+
stringOrNull(results, Columns.TYPE),
32+
booleanOrDefault(results, Columns.IS_PRIMARY, false),
33+
timestampOrThrow(results, Columns.CREATED_AT),
34+
timestampOrThrow(results, Columns.UPDATED_AT));
35+
}
36+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright 2018-2022 contributors to the Marquez project
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package marquez.db.models;
7+
8+
import java.time.Instant;
9+
import java.util.Optional;
10+
import java.util.UUID;
11+
import javax.annotation.Nullable;
12+
import lombok.AllArgsConstructor;
13+
import lombok.EqualsAndHashCode;
14+
import lombok.Getter;
15+
import lombok.NonNull;
16+
import lombok.Value;
17+
18+
@AllArgsConstructor
19+
@EqualsAndHashCode
20+
@Value
21+
public class DatasetSymlinkRow {
22+
@NonNull UUID uuid;
23+
@NonNull String name;
24+
@NonNull UUID namespaceUuid;
25+
@Nullable String type;
26+
@NonNull boolean isPrimary;
27+
@Getter @NonNull private final Instant createdAt;
28+
@Getter @NonNull private final Instant updatedAt;
29+
30+
public Optional<String> getType() {
31+
return Optional.ofNullable(type);
32+
}
33+
}

0 commit comments

Comments
 (0)