Skip to content

Commit a026783

Browse files
OL facets - PR1 - create & feed new tables while not reading them
Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com>
1 parent 88e00a9 commit a026783

32 files changed

Lines changed: 1926 additions & 46 deletions

api/src/main/java/marquez/db/BaseDao.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,13 @@ public interface BaseDao extends SqlObject {
5656

5757
@CreateSqlObject
5858
ColumnLineageDao createColumnLineageDao();
59+
60+
@CreateSqlObject
61+
DatasetFacetsDao createDatasetFacetsDao();
62+
63+
@CreateSqlObject
64+
JobFacetsDao createJobFacetsDao();
65+
66+
@CreateSqlObject
67+
RunFacetsDao createRunFacetsDao();
5968
}

api/src/main/java/marquez/db/Columns.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import lombok.extern.slf4j.Slf4j;
2626
import marquez.common.Utils;
2727
import org.postgresql.util.PGInterval;
28+
import org.postgresql.util.PGobject;
2829

2930
@Slf4j
3031
public final class Columns {
@@ -299,4 +300,17 @@ public static ImmutableMap<String, String> mapOrNull(final ResultSet results, fi
299300
final String mapAsString = results.getString(column);
300301
return Utils.fromJson(mapAsString, new TypeReference<>() {});
301302
}
303+
304+
public static PGobject toPgObject(@NonNull final Object object) {
305+
final PGobject jsonObject = new PGobject();
306+
jsonObject.setType("jsonb");
307+
final String json = Utils.toJson(object);
308+
try {
309+
jsonObject.setValue(json);
310+
} catch (SQLException e) {
311+
log.error("Error when ...", e);
312+
return null;
313+
}
314+
return jsonObject;
315+
}
302316
}
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/*
2+
* Copyright 2018-2022 contributors to the Marquez project
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package marquez.db;
7+
8+
import static marquez.db.Columns.toPgObject;
9+
10+
import java.time.Instant;
11+
import java.util.Arrays;
12+
import java.util.Map;
13+
import java.util.Optional;
14+
import java.util.Set;
15+
import java.util.UUID;
16+
import java.util.stream.Collectors;
17+
import lombok.NonNull;
18+
import marquez.service.models.LineageEvent;
19+
import org.jdbi.v3.sqlobject.statement.SqlUpdate;
20+
import org.jdbi.v3.sqlobject.transaction.Transaction;
21+
import org.postgresql.util.PGobject;
22+
23+
/** The DAO for {@code dataset} facets. */
24+
public interface DatasetFacetsDao {
25+
/* An {@code enum} used ... */
26+
enum Type {
27+
DATASET,
28+
INPUT,
29+
OUTPUT,
30+
UNKNOWN;
31+
}
32+
33+
/* An {@code enum} used to determine the dataset facet. */
34+
enum DatasetFacet implements Facet {
35+
DOCUMENTATION(Type.DATASET, "documentation") {
36+
Optional<PGobject> toPgObject(LineageEvent.DatasetFacets facets) {
37+
return Optional.ofNullable(facets.getDocumentation()).map(this::toPgObject);
38+
}
39+
},
40+
SCHEMA(Type.DATASET, "schema") {
41+
Optional<PGobject> toPgObject(LineageEvent.DatasetFacets facets) {
42+
return Optional.ofNullable(facets.getSchema()).map(this::toPgObject);
43+
}
44+
},
45+
DATASOURCE(Type.DATASET, "dataSource") {
46+
Optional<PGobject> toPgObject(LineageEvent.DatasetFacets facets) {
47+
return Optional.ofNullable(facets.getDataSource()).map(this::toPgObject);
48+
}
49+
},
50+
DESCRIPTION(Type.DATASET, "description") {
51+
Optional<PGobject> toPgObject(LineageEvent.DatasetFacets facets) {
52+
return Optional.ofNullable(facets.getDescription()).map(this::toPgObject);
53+
}
54+
},
55+
LIFECYCLE_STATE_CHANGE(Type.DATASET, "lifecycleStateChange") {
56+
Optional<PGobject> toPgObject(LineageEvent.DatasetFacets facets) {
57+
return Optional.ofNullable(facets.getLifecycleStateChange()).map(this::toPgObject);
58+
}
59+
},
60+
VERSION(Type.DATASET, "version"),
61+
COLUMN_LINEAGE(Type.DATASET, "columnLineage") {
62+
Optional<PGobject> toPgObject(LineageEvent.DatasetFacets facets) {
63+
return Optional.ofNullable(facets.getColumnLineage()).map(this::toPgObject);
64+
}
65+
},
66+
OWNERSHIP(Type.DATASET, "ownership"),
67+
DATA_QUALITY_METRICS(Type.INPUT, "dataQualityMetrics"),
68+
DATA_QUALITY_ASSERTIONS(Type.INPUT, "dataQualityAssertions"),
69+
OUTPUT_STATISTICS(Type.OUTPUT, "outputStatistics");
70+
71+
// whenever adding new types, please mind migration script in
72+
// marquez.db.migrations.V55_5__BackfillFacets
73+
74+
final Type type;
75+
final String name;
76+
77+
DatasetFacet(@NonNull final Type type, @NonNull final String name) {
78+
this.type = type;
79+
this.name = name;
80+
}
81+
82+
Type getType() {
83+
return type;
84+
}
85+
86+
public String getName() {
87+
return name;
88+
}
89+
90+
/** ... */
91+
public static DatasetFacet fromName(@NonNull final String name) {
92+
for (final DatasetFacet facet : DatasetFacet.values()) {
93+
if (facet.getName().equalsIgnoreCase(name)) {
94+
return facet;
95+
}
96+
}
97+
return null;
98+
}
99+
100+
Optional<PGobject> toPgObject(LineageEvent.DatasetFacets runFacet) {
101+
return propertiesToPgObject(runFacet.getAdditionalFacets());
102+
}
103+
}
104+
105+
/**
106+
* @param uuid
107+
* @param createdAt
108+
* @param datasetUuid
109+
* @param runUuid
110+
* @param lineageEventTime
111+
* @param lineageEventType
112+
* @param type
113+
* @param name
114+
* @param facet
115+
*/
116+
@SqlUpdate(
117+
"""
118+
INSERT INTO dataset_facets (
119+
uuid,
120+
created_at,
121+
dataset_uuid,
122+
run_uuid,
123+
lineage_event_time,
124+
lineage_event_type,
125+
type,
126+
name,
127+
facet
128+
) VALUES (
129+
:uuid,
130+
:createdAt,
131+
:datasetUuid,
132+
:runUuid,
133+
:lineageEventTime,
134+
:lineageEventType,
135+
:type,
136+
:name,
137+
:facet
138+
)
139+
""")
140+
void insertDatasetFacet(
141+
UUID uuid,
142+
Instant createdAt,
143+
UUID datasetUuid,
144+
UUID runUuid,
145+
Instant lineageEventTime,
146+
String lineageEventType,
147+
Type type,
148+
String name,
149+
PGobject facet);
150+
151+
/**
152+
* @param datasetUuid
153+
* @param runUuid
154+
* @param lineageEventTime
155+
* @param lineageEventType
156+
* @param datasetFacets
157+
*/
158+
@Transaction
159+
default void insertDatasetFacetsFor(
160+
@NonNull UUID datasetUuid,
161+
@NonNull UUID runUuid,
162+
@NonNull Instant lineageEventTime,
163+
@NonNull String lineageEventType,
164+
@NonNull LineageEvent.DatasetFacets datasetFacets) {
165+
final Instant now = Instant.now();
166+
167+
// insert rows for known facets
168+
Arrays.stream(DatasetFacet.values())
169+
.collect(Collectors.toMap(DatasetFacet::getName, facet -> facet.toPgObject(datasetFacets)))
170+
.entrySet()
171+
.stream()
172+
.filter(entry -> entry.getValue().isPresent())
173+
.forEach(
174+
entry ->
175+
insertDatasetFacet(
176+
UUID.randomUUID(),
177+
now,
178+
datasetUuid,
179+
runUuid,
180+
lineageEventTime,
181+
lineageEventType,
182+
DatasetFacet.fromName(entry.getKey()).getType(),
183+
entry.getKey(),
184+
entry.getValue().get()));
185+
186+
// insert undefined facets
187+
Optional.ofNullable(datasetFacets.getAdditionalFacets()).stream()
188+
.map(Map::entrySet)
189+
.flatMap(Set::stream)
190+
.filter(entry -> Optional.ofNullable(DatasetFacet.fromName(entry.getKey())).isEmpty())
191+
.forEach(
192+
entry ->
193+
insertDatasetFacet(
194+
UUID.randomUUID(),
195+
now,
196+
datasetUuid,
197+
runUuid,
198+
lineageEventTime,
199+
lineageEventType,
200+
Type.UNKNOWN,
201+
entry.getKey(),
202+
toPgObject(Facet.asJson(entry.getKey(), entry.getValue()))));
203+
}
204+
205+
record DatasetFacetRow(
206+
UUID uuid,
207+
Instant createdAt,
208+
UUID datasetUuid,
209+
UUID runUuid,
210+
Instant lineageEventTime,
211+
String lineageEventType,
212+
DatasetFacetsDao.Type type,
213+
String name,
214+
PGobject facet) {}
215+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright 2018-2022 contributors to the Marquez project
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package marquez.db;
6+
7+
import com.fasterxml.jackson.databind.node.ObjectNode;
8+
import java.util.Map;
9+
import java.util.Optional;
10+
import lombok.NonNull;
11+
import marquez.common.Utils;
12+
import org.postgresql.util.PGobject;
13+
14+
public interface Facet {
15+
16+
/** ... */
17+
static ObjectNode asJson(@NonNull final String facetName, @NonNull Object facetValue) {
18+
final ObjectNode facetAsJson = Utils.getMapper().createObjectNode();
19+
facetAsJson.putPOJO(facetName, facetValue);
20+
return facetAsJson;
21+
}
22+
23+
String getName();
24+
25+
default Optional<PGobject> propertiesToPgObject(Map<String, Object> additionalProperties) {
26+
return Optional.ofNullable(additionalProperties)
27+
.filter(map -> map.containsKey(getName()))
28+
.map(key -> toPgObject(additionalProperties.get(getName())));
29+
}
30+
31+
default PGobject toPgObject(Object o) {
32+
return Columns.toPgObject(Facet.asJson(getName(), o));
33+
}
34+
}

0 commit comments

Comments
 (0)