Skip to content

Commit 52ebaa3

Browse files
add new lightweight current runs query
Signed-off-by: Prachi Mishra <prachi.mishra@astronomer.io>
1 parent 12702f3 commit 52ebaa3

6 files changed

Lines changed: 119 additions & 12 deletions

File tree

api/src/main/java/marquez/api/OpenLineageResource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ private int determineStatusCode(Throwable e) {
9696
public Response getLineage(
9797
@QueryParam("nodeId") @NotNull NodeId nodeId,
9898
@QueryParam("depth") @DefaultValue(DEFAULT_DEPTH) int depth) {
99-
return Response.ok(lineageService.lineage(nodeId, depth)).build();
99+
return Response.ok(lineageService.lineage(nodeId, depth, true)).build();
100100
}
101101

102102
@Timed

api/src/main/java/marquez/db/LineageDao.java

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,52 @@ WHERE ds.uuid IN (<dsUuids>)""")
9191
LIMIT 1""")
9292
Optional<UUID> getJobFromInputOrOutput(String datasetName, String namespaceName);
9393

94+
@SqlQuery(
95+
"WITH latest_runs AS (\n"
96+
+ " SELECT DISTINCT on(r.job_name, r.namespace_name) r.*, jv.version\n"
97+
+ " FROM runs_view r\n"
98+
+ " INNER JOIN job_versions jv ON jv.uuid=r.job_version_uuid\n"
99+
+ " INNER JOIN jobs_view j ON j.uuid=jv.job_uuid\n"
100+
+ " WHERE j.uuid in (<jobUuid>) OR j.symlink_target_uuid IN (<jobUuid>)\n"
101+
+ " ORDER BY r.job_name, r.namespace_name, created_at DESC\n"
102+
+ ")\n"
103+
+ "SELECT r.*, ra.args, ctx.context, f.facets,\n"
104+
+ " r.version AS job_version, ri.input_versions, ro.output_versions\n"
105+
+ " from latest_runs AS r\n"
106+
+ "LEFT JOIN run_args AS ra ON ra.uuid = r.run_args_uuid\n"
107+
+ "LEFT JOIN job_contexts AS ctx ON r.job_context_uuid = ctx.uuid\n"
108+
+ "LEFT JOIN LATERAL (\n"
109+
+ " SELECT le.run_uuid, JSON_AGG(event->'run'->'facets') AS facets\n"
110+
+ " FROM lineage_events le\n"
111+
+ " WHERE le.run_uuid=r.uuid\n"
112+
+ " GROUP BY le.run_uuid\n"
113+
+ ") AS f ON r.uuid=f.run_uuid\n"
114+
+ "LEFT JOIN LATERAL (\n"
115+
+ " SELECT im.run_uuid,\n"
116+
+ " JSON_AGG(json_build_object('namespace', dv.namespace_name,\n"
117+
+ " 'name', dv.dataset_name,\n"
118+
+ " 'version', dv.version)) AS input_versions\n"
119+
+ " FROM runs_input_mapping im\n"
120+
+ " INNER JOIN dataset_versions dv on im.dataset_version_uuid = dv.uuid\n"
121+
+ " WHERE im.run_uuid=r.uuid\n"
122+
+ " GROUP BY im.run_uuid\n"
123+
+ ") ri ON ri.run_uuid=r.uuid\n"
124+
+ "LEFT JOIN LATERAL (\n"
125+
+ " SELECT run_uuid, JSON_AGG(json_build_object('namespace', namespace_name,\n"
126+
+ " 'name', dataset_name,\n"
127+
+ " 'version', version)) AS output_versions\n"
128+
+ " FROM dataset_versions\n"
129+
+ " WHERE run_uuid=r.uuid\n"
130+
+ " GROUP BY run_uuid\n"
131+
+ ") ro ON ro.run_uuid=r.uuid")
132+
List<Run> getCurrentRunsWithFacets(@BindList Collection<UUID> jobUuid);
133+
94134
@SqlQuery(
95135
"SELECT DISTINCT on(r.job_name, r.namespace_name) r.*, jv.version as job_version\n"
96136
+ " FROM runs_view r\n"
97137
+ " INNER JOIN job_versions jv ON jv.uuid=r.job_version_uuid\n"
98138
+ " INNER JOIN jobs_view j ON j.uuid=jv.job_uuid\n"
99139
+ " WHERE j.uuid in (<jobUuid>) OR j.symlink_target_uuid IN (<jobUuid>)\n"
100-
+ " ORDER BY r.job_name, r.namespace_name, created_at DESC")
140+
+ " ORDER BY r.job_name, r.namespace_name, created_at DESC\n")
101141
List<Run> getCurrentRuns(@BindList Collection<UUID> jobUuid);
102142
}

api/src/main/java/marquez/service/LineageService.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public LineageService(LineageDao delegate, JobDao jobDao) {
4646
this.jobDao = jobDao;
4747
}
4848

49-
public Lineage lineage(NodeId nodeId, int depth) {
49+
public Lineage lineage(NodeId nodeId, int depth, boolean withRunFacets) {
5050
Optional<UUID> optionalUUID = getJobUuid(nodeId);
5151
if (optionalUUID.isEmpty()) {
5252
throw new NodeIdNotFoundException("Could not find node");
@@ -56,7 +56,11 @@ public Lineage lineage(NodeId nodeId, int depth) {
5656
Set<JobData> jobData = getLineage(Collections.singleton(job), depth);
5757

5858
List<Run> runs =
59-
getCurrentRuns(jobData.stream().map(JobData::getUuid).collect(Collectors.toSet()));
59+
withRunFacets
60+
? getCurrentRunsWithFacets(
61+
jobData.stream().map(JobData::getUuid).collect(Collectors.toSet()))
62+
: getCurrentRuns(jobData.stream().map(JobData::getUuid).collect(Collectors.toSet()));
63+
6064
// todo fix runtime
6165
for (JobData j : jobData) {
6266
if (j.getLatestRun().isEmpty()) {

api/src/test/java/marquez/api/OpenLineageResourceTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import static org.junit.jupiter.api.Assertions.assertEquals;
99
import static org.mockito.ArgumentMatchers.any;
10+
import static org.mockito.ArgumentMatchers.anyBoolean;
1011
import static org.mockito.ArgumentMatchers.anyInt;
1112
import static org.mockito.Mockito.mock;
1213
import static org.mockito.Mockito.when;
@@ -41,7 +42,7 @@ class OpenLineageResourceTest {
4142
OpenLineageResourceTest.class.getResourceAsStream("/lineage/node.json"),
4243
new TypeReference<>() {});
4344
LINEAGE = new Lineage(ImmutableSortedSet.of(testNode));
44-
when(lineageService.lineage(any(NodeId.class), anyInt())).thenReturn(LINEAGE);
45+
when(lineageService.lineage(any(NodeId.class), anyInt(), anyBoolean())).thenReturn(LINEAGE);
4546

4647
ServiceFactory serviceFactory =
4748
ApiTestUtils.mockServiceFactory(Map.of(LineageService.class, lineageService));

api/src/test/java/marquez/db/LineageDaoTest.java

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ public void testGetCurrentRuns() {
773773
Stream.of(writeJob.getJob().getUuid()), newRows.stream().map(JobLineage::getId))
774774
.collect(Collectors.toSet());
775775

776-
List<Run> currentRuns = lineageDao.getCurrentRuns(jobids);
776+
List<Run> currentRuns = lineageDao.getCurrentRunsWithFacets(jobids);
777777

778778
// assert the job does exist
779779
assertThat(currentRuns)
@@ -790,7 +790,7 @@ public void testGetCurrentRunsWithFailedJob() {
790790

791791
Set<UUID> jobids = Collections.singleton(writeJob.getJob().getUuid());
792792

793-
List<Run> currentRuns = lineageDao.getCurrentRuns(jobids);
793+
List<Run> currentRuns = lineageDao.getCurrentRunsWithFacets(jobids);
794794

795795
// assert the job does exist
796796
assertThat(currentRuns)
@@ -799,6 +799,56 @@ public void testGetCurrentRunsWithFailedJob() {
799799
.contains(writeJob.getRun().getUuid());
800800
}
801801

802+
@Test
803+
public void testGetCurrentRunsWithFacetsGetsLatestRun() {
804+
for (int i = 0; i < 5; i++) {
805+
LineageTestUtils.createLineageRow(
806+
openLineageDao,
807+
"writeJob",
808+
"COMPLETE",
809+
jobFacet,
810+
Arrays.asList(),
811+
Arrays.asList(dataset));
812+
}
813+
814+
List<JobLineage> newRows =
815+
writeDownstreamLineage(
816+
openLineageDao,
817+
new LinkedList<>(
818+
Arrays.asList(
819+
new DatasetConsumerJob("readJob", 3, Optional.of("outputData2")),
820+
new DatasetConsumerJob("downstreamJob", 1, Optional.empty()))),
821+
jobFacet,
822+
dataset);
823+
UpdateLineageRow writeJob =
824+
LineageTestUtils.createLineageRow(
825+
openLineageDao, "writeJob", "FAIL", jobFacet, Arrays.asList(), Arrays.asList(dataset));
826+
827+
Set<UUID> expectedRunIds =
828+
Stream.concat(
829+
Stream.of(writeJob.getRun().getUuid()), newRows.stream().map(JobLineage::getRunId))
830+
.collect(Collectors.toSet());
831+
Set<UUID> jobids =
832+
Stream.concat(
833+
Stream.of(writeJob.getJob().getUuid()), newRows.stream().map(JobLineage::getId))
834+
.collect(Collectors.toSet());
835+
836+
List<Run> currentRuns = lineageDao.getCurrentRunsWithFacets(jobids);
837+
838+
// assert the job does exist
839+
assertThat(currentRuns)
840+
.hasSize(expectedRunIds.size())
841+
.extracting(r -> r.getId().getValue())
842+
.containsAll(expectedRunIds);
843+
844+
// assert that run_args, input/output versions, and run facets are fetched from the dao.
845+
for (Run run : currentRuns) {
846+
assertThat(run.getArgs()).hasSize(2);
847+
assertThat(run.getOutputVersions()).hasSize(1);
848+
assertThat(run.getFacets()).hasSize(1);
849+
}
850+
}
851+
802852
@Test
803853
public void testGetCurrentRunsGetsLatestRun() {
804854
for (int i = 0; i < 5; i++) {

api/src/test/java/marquez/service/LineageServiceTest.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ public void testLineage() {
124124
dataset);
125125
String jobName = writeJob.getJob().getName();
126126
Lineage lineage =
127-
lineageService.lineage(NodeId.of(new NamespaceName(NAMESPACE), new JobName(jobName)), 2);
127+
lineageService.lineage(
128+
NodeId.of(new NamespaceName(NAMESPACE), new JobName(jobName)), 2, true);
128129

129130
// 1 writeJob + 1 commonDataset
130131
// 20 readJob + 20 outputData
@@ -158,6 +159,9 @@ public void testLineage() {
158159
runAssert
159160
.extracting(Run::getInputVersions, InstanceOfAssertFactories.list(DatasetVersionId.class))
160161
.hasSize(0);
162+
runAssert
163+
.extracting(Run::getOutputVersions, InstanceOfAssertFactories.list(DatasetVersionId.class))
164+
.hasSize(1);
161165

162166
// check the output edges for the commonDataset node
163167
assertThat(lineage.getGraph())
@@ -229,7 +233,8 @@ public void testLineageWithDeletedDataset() {
229233

230234
String jobName = writeJob.getJob().getName();
231235
Lineage lineage =
232-
lineageService.lineage(NodeId.of(new NamespaceName(NAMESPACE), new JobName(jobName)), 2);
236+
lineageService.lineage(
237+
NodeId.of(new NamespaceName(NAMESPACE), new JobName(jobName)), 2, true);
233238

234239
// 1 writeJob + 0 commonDataset is hidden
235240
// 20 readJob + 20 outputData
@@ -263,6 +268,9 @@ public void testLineageWithDeletedDataset() {
263268
runAssert
264269
.extracting(Run::getInputVersions, InstanceOfAssertFactories.list(DatasetVersionId.class))
265270
.hasSize(0);
271+
runAssert
272+
.extracting(Run::getOutputVersions, InstanceOfAssertFactories.list(DatasetVersionId.class))
273+
.hasSize(1);
266274

267275
// check the output edges for the commonDataset node
268276
assertThat(lineage.getGraph())
@@ -275,7 +283,8 @@ public void testLineageWithDeletedDataset() {
275283
jobDao.delete(NAMESPACE, "downstreamJob0<-outputData<-readJob0<-commonDataset");
276284

277285
lineage =
278-
lineageService.lineage(NodeId.of(new NamespaceName(NAMESPACE), new JobName(jobName)), 2);
286+
lineageService.lineage(
287+
NodeId.of(new NamespaceName(NAMESPACE), new JobName(jobName)), 2, true);
279288

280289
// 1 writeJob + 0 commonDataset is hidden
281290
// 20 readJob + 20 outputData
@@ -305,7 +314,9 @@ public void testLineageWithNoDatasets() {
305314
openLineageDao, "writeJob", "COMPLETE", jobFacet, Arrays.asList(), Arrays.asList());
306315
Lineage lineage =
307316
lineageService.lineage(
308-
NodeId.of(new NamespaceName(NAMESPACE), new JobName(writeJob.getJob().getName())), 5);
317+
NodeId.of(new NamespaceName(NAMESPACE), new JobName(writeJob.getJob().getName())),
318+
5,
319+
true);
309320
assertThat(lineage.getGraph())
310321
.hasSize(1)
311322
.first()
@@ -356,7 +367,8 @@ public void testLineageWithWithCycle() {
356367
lineageService.lineage(
357368
NodeId.of(
358369
new NamespaceName(NAMESPACE), new JobName(intermediateJob.getJob().getName())),
359-
5);
370+
5,
371+
true);
360372
assertThat(lineage.getGraph()).extracting(Node::getId).hasSize(6);
361373
ObjectAssert<Node> datasetNode =
362374
assertThat(lineage.getGraph())

0 commit comments

Comments
 (0)