Skip to content

Commit 9cfdd83

Browse files
OL facets - PR1 - create & feed new tables while not reading them
Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com>
1 parent 88e00a9 commit 9cfdd83

30 files changed

Lines changed: 1804 additions & 46 deletions

api/src/main/java/marquez/db/BaseDao.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,13 @@ public interface BaseDao extends SqlObject {
5656

5757
@CreateSqlObject
5858
ColumnLineageDao createColumnLineageDao();
59+
60+
@CreateSqlObject
61+
DatasetFacetsDao createDatasetFacetsDao();
62+
63+
@CreateSqlObject
64+
JobFacetsDao createJobFacetsDao();
65+
66+
@CreateSqlObject
67+
RunFacetsDao createRunFacetsDao();
5968
}

api/src/main/java/marquez/db/Columns.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import lombok.extern.slf4j.Slf4j;
2626
import marquez.common.Utils;
2727
import org.postgresql.util.PGInterval;
28+
import org.postgresql.util.PGobject;
2829

2930
@Slf4j
3031
public final class Columns {
@@ -299,4 +300,17 @@ public static ImmutableMap<String, String> mapOrNull(final ResultSet results, fi
299300
final String mapAsString = results.getString(column);
300301
return Utils.fromJson(mapAsString, new TypeReference<>() {});
301302
}
303+
304+
public static PGobject toPgObject(@NonNull final Object object) {
305+
final PGobject jsonObject = new PGobject();
306+
jsonObject.setType("jsonb");
307+
final String json = Utils.toJson(object);
308+
try {
309+
jsonObject.setValue(json);
310+
} catch (SQLException e) {
311+
log.error("Error when ...", e);
312+
return null;
313+
}
314+
return jsonObject;
315+
}
302316
}
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
/*
2+
* Copyright 2018-2022 contributors to the Marquez project
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package marquez.db;
7+
8+
import static marquez.db.Columns.toPgObject;
9+
10+
import com.fasterxml.jackson.databind.node.ObjectNode;
11+
import java.time.Instant;
12+
import java.util.Optional;
13+
import java.util.UUID;
14+
import lombok.NonNull;
15+
import marquez.common.Utils;
16+
import marquez.service.models.LineageEvent;
17+
import org.jdbi.v3.sqlobject.statement.SqlUpdate;
18+
import org.jdbi.v3.sqlobject.transaction.Transaction;
19+
import org.postgresql.util.PGobject;
20+
21+
/** The DAO for {@code dataset} facets. */
22+
public interface DatasetFacetsDao {
23+
/* An {@code enum} used ... */
24+
enum Type {
25+
DATASET,
26+
INPUT,
27+
OUTPUT,
28+
UNKNOWN;
29+
}
30+
31+
/* An {@code enum} used to determine the dataset facet. */
32+
enum Facet {
33+
DOCUMENTATION(Type.DATASET, "documentation"),
34+
SCHEMA(Type.DATASET, "schema"),
35+
DATASOURCE(Type.DATASET, "dataSource"),
36+
DESCRIPTION(Type.DATASET, "description"),
37+
LIFECYCLE_STATE_CHANGE(Type.DATASET, "lifecycleStateChange"),
38+
VERSION(Type.DATASET, "version"),
39+
COLUMN_LINEAGE(Type.DATASET, "columnLineage"),
40+
OWNERSHIP(Type.DATASET, "ownership"),
41+
DATA_QUALITY_METRICS(Type.INPUT, "dataQualityMetrics"),
42+
DATA_QUALITY_ASSERTIONS(Type.INPUT, "dataQualityAssertions"),
43+
OUTPUT_STATISTICS(Type.OUTPUT, "outputStatistics");
44+
45+
// whenever adding new types, please mind migration script in
46+
// marquez.db.migrations.V55_5__BackfillFacets
47+
48+
final Type type;
49+
final String name;
50+
51+
Facet(@NonNull final Type type, @NonNull final String name) {
52+
this.type = type;
53+
this.name = name;
54+
}
55+
56+
Type getType() {
57+
return type;
58+
}
59+
60+
String getName() {
61+
return name;
62+
}
63+
64+
/** ... */
65+
public static Facet fromName(@NonNull final String name) {
66+
for (final Facet facet : Facet.values()) {
67+
if (facet.getName().equalsIgnoreCase(name)) {
68+
return facet;
69+
}
70+
}
71+
return null;
72+
}
73+
74+
/** ... */
75+
public ObjectNode asJson(@NonNull Object facetValue) {
76+
return asJson(name, facetValue);
77+
}
78+
79+
/** ... */
80+
public static ObjectNode asJson(@NonNull final String facetName, @NonNull Object facetValue) {
81+
final ObjectNode facetAsJson = Utils.getMapper().createObjectNode();
82+
facetAsJson.putPOJO(facetName, facetValue);
83+
return facetAsJson;
84+
}
85+
}
86+
87+
/**
88+
* @param uuid
89+
* @param createdAt
90+
* @param datasetUuid
91+
* @param runUuid
92+
* @param lineageEventTime
93+
* @param lineageEventType
94+
* @param type
95+
* @param name
96+
* @param facet
97+
*/
98+
@SqlUpdate(
99+
"""
100+
INSERT INTO dataset_facets (
101+
uuid,
102+
created_at,
103+
dataset_uuid,
104+
run_uuid,
105+
lineage_event_time,
106+
lineage_event_type,
107+
type,
108+
name,
109+
facet
110+
) VALUES (
111+
:uuid,
112+
:createdAt,
113+
:datasetUuid,
114+
:runUuid,
115+
:lineageEventTime,
116+
:lineageEventType,
117+
:type,
118+
:name,
119+
:facet
120+
)
121+
""")
122+
void insertDatasetFacet(
123+
UUID uuid,
124+
Instant createdAt,
125+
UUID datasetUuid,
126+
UUID runUuid,
127+
Instant lineageEventTime,
128+
String lineageEventType,
129+
Type type,
130+
String name,
131+
PGobject facet);
132+
133+
/**
134+
* @param datasetUuid
135+
* @param runUuid
136+
* @param lineageEventTime
137+
* @param lineageEventType
138+
* @param datasetFacets
139+
*/
140+
@Transaction
141+
default void insertDatasetFacetsFor(
142+
@NonNull UUID datasetUuid,
143+
@NonNull UUID runUuid,
144+
@NonNull Instant lineageEventTime,
145+
@NonNull String lineageEventType,
146+
@NonNull LineageEvent.DatasetFacets datasetFacets) {
147+
final Instant now = Instant.now();
148+
149+
// Add ...
150+
Optional.ofNullable(datasetFacets.getDocumentation())
151+
.ifPresent(
152+
documentation ->
153+
insertDatasetFacet(
154+
UUID.randomUUID(),
155+
now,
156+
datasetUuid,
157+
runUuid,
158+
lineageEventTime,
159+
lineageEventType,
160+
Facet.DOCUMENTATION.getType(),
161+
Facet.DOCUMENTATION.getName(),
162+
toPgObject(Facet.DOCUMENTATION.asJson(documentation))));
163+
164+
// Add ...
165+
Optional.ofNullable(datasetFacets.getSchema())
166+
.ifPresent(
167+
schema ->
168+
insertDatasetFacet(
169+
UUID.randomUUID(),
170+
now,
171+
datasetUuid,
172+
runUuid,
173+
lineageEventTime,
174+
lineageEventType,
175+
Facet.SCHEMA.getType(),
176+
Facet.SCHEMA.getName(),
177+
toPgObject(Facet.SCHEMA.asJson(schema))));
178+
179+
// Add ...
180+
Optional.ofNullable(datasetFacets.getDataSource())
181+
.ifPresent(
182+
datasource ->
183+
insertDatasetFacet(
184+
UUID.randomUUID(),
185+
now,
186+
datasetUuid,
187+
runUuid,
188+
lineageEventTime,
189+
lineageEventType,
190+
Facet.DATASOURCE.getType(),
191+
Facet.DATASOURCE.getName(),
192+
toPgObject(Facet.DATASOURCE.asJson(datasource))));
193+
194+
// Add ...
195+
Optional.ofNullable(datasetFacets.getDescription())
196+
.ifPresent(
197+
description ->
198+
insertDatasetFacet(
199+
UUID.randomUUID(),
200+
now,
201+
datasetUuid,
202+
runUuid,
203+
lineageEventTime,
204+
lineageEventType,
205+
Facet.DESCRIPTION.getType(),
206+
Facet.DESCRIPTION.getName(),
207+
toPgObject(Facet.DESCRIPTION.asJson(description))));
208+
209+
// Add ...
210+
Optional.ofNullable(datasetFacets.getLifecycleStateChange())
211+
.ifPresent(
212+
lifecycleStateChange ->
213+
insertDatasetFacet(
214+
UUID.randomUUID(),
215+
now,
216+
datasetUuid,
217+
runUuid,
218+
lineageEventTime,
219+
lineageEventType,
220+
Facet.LIFECYCLE_STATE_CHANGE.getType(),
221+
Facet.LIFECYCLE_STATE_CHANGE.getName(),
222+
toPgObject(Facet.LIFECYCLE_STATE_CHANGE.asJson(lifecycleStateChange))));
223+
224+
// Add ...
225+
Optional.ofNullable(datasetFacets.getColumnLineage())
226+
.ifPresent(
227+
lifecycleStateChange ->
228+
insertDatasetFacet(
229+
UUID.randomUUID(),
230+
now,
231+
datasetUuid,
232+
runUuid,
233+
lineageEventTime,
234+
lineageEventType,
235+
Facet.COLUMN_LINEAGE.getType(),
236+
Facet.COLUMN_LINEAGE.getName(),
237+
toPgObject(Facet.COLUMN_LINEAGE.asJson(lifecycleStateChange))));
238+
239+
// Add ..
240+
Optional.ofNullable(datasetFacets.getAdditionalFacets())
241+
.ifPresent(
242+
additional ->
243+
additional.forEach(
244+
(name, facet) -> {
245+
Optional.ofNullable(Facet.fromName(name))
246+
.ifPresentOrElse(
247+
(x) -> {
248+
insertDatasetFacet(
249+
UUID.randomUUID(),
250+
now,
251+
datasetUuid,
252+
runUuid,
253+
lineageEventTime,
254+
lineageEventType,
255+
x.getType(),
256+
x.getName(),
257+
toPgObject(x.asJson(facet)));
258+
},
259+
() -> {
260+
insertDatasetFacet(
261+
UUID.randomUUID(),
262+
now,
263+
datasetUuid,
264+
runUuid,
265+
lineageEventTime,
266+
lineageEventType,
267+
Type.UNKNOWN,
268+
name,
269+
toPgObject(Facet.asJson(name, facet)));
270+
});
271+
}));
272+
}
273+
274+
record DatasetFacetRow(
275+
UUID uuid,
276+
Instant createdAt,
277+
UUID datasetUuid,
278+
UUID runUuid,
279+
Instant lineageEventTime,
280+
String lineageEventType,
281+
DatasetFacetsDao.Type type,
282+
String name,
283+
PGobject facet) {}
284+
}

api/src/main/java/marquez/db/LineageDao.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import marquez.db.mappers.JobDataMapper;
1515
import marquez.db.mappers.JobRowMapper;
1616
import marquez.db.mappers.RunMapper;
17-
import marquez.db.models.DatasetData;
18-
import marquez.db.models.JobData;
17+
import marquez.service.models.DatasetData;
18+
import marquez.service.models.JobData;
1919
import marquez.service.models.Run;
2020
import org.jdbi.v3.sqlobject.config.RegisterRowMapper;
2121
import org.jdbi.v3.sqlobject.customizer.BindList;

0 commit comments

Comments
 (0)