Skip to content

Commit 65d57f7

Browse files
authored
Latest runs for jobs (#2901)
* Latest runs coming down. * Adding query param for job listing. * Editing tests. * Bringing in jobs without a run state for tests. * Spotless * Moving to list for lastRunStates. Signed-off-by: phixMe <peter.hicks@pdtechsolutions.com> --------- Signed-off-by: phixMe <peter.hicks@pdtechsolutions.com>
1 parent 1d90f18 commit 65d57f7

9 files changed

Lines changed: 84 additions & 33 deletions

File tree

api/src/main/java/marquez/api/JobResource.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import com.codahale.metrics.annotation.Timed;
1313
import com.fasterxml.jackson.annotation.JsonProperty;
1414
import java.net.URI;
15+
import java.util.ArrayList;
16+
import java.util.Collections;
1517
import java.util.List;
1618
import javax.validation.Valid;
1719
import javax.validation.constraints.Min;
@@ -42,6 +44,7 @@
4244
import marquez.common.models.JobName;
4345
import marquez.common.models.NamespaceName;
4446
import marquez.common.models.RunId;
47+
import marquez.common.models.RunState;
4548
import marquez.common.models.TagName;
4649
import marquez.common.models.Version;
4750
import marquez.db.JobFacetsDao;
@@ -164,11 +167,19 @@ public Response listJobVersions(
164167
@Produces(APPLICATION_JSON)
165168
public Response list(
166169
@PathParam("namespace") NamespaceName namespaceName,
170+
@QueryParam("lastRunStates") List<RunState> lastRunStates,
167171
@QueryParam("limit") @DefaultValue("100") @Min(value = 0) int limit,
168172
@QueryParam("offset") @DefaultValue("0") @Min(value = 0) int offset) {
169173
throwIfNotExists(namespaceName);
170174

171-
final List<Job> jobs = jobService.findAllWithRun(namespaceName.getValue(), limit, offset);
175+
// default to all run states if not specified
176+
if (lastRunStates.isEmpty()) {
177+
lastRunStates = new ArrayList<>();
178+
Collections.addAll(lastRunStates, RunState.values());
179+
}
180+
181+
final List<Job> jobs =
182+
jobService.findAllWithRun(namespaceName.getValue(), lastRunStates, limit, offset);
172183
final int totalCount = jobService.countFor(namespaceName.getValue());
173184
return Response.ok(new ResultsPage<>("jobs", jobs, totalCount)).build();
174185
}

api/src/main/java/marquez/db/JobDao.java

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,28 @@
2020
import marquez.common.models.JobName;
2121
import marquez.common.models.JobType;
2222
import marquez.common.models.NamespaceName;
23+
import marquez.common.models.RunState;
2324
import marquez.db.JobVersionDao.IoType;
2425
import marquez.db.JobVersionDao.JobDataset;
2526
import marquez.db.JobVersionDao.JobDatasetMapper;
2627
import marquez.db.mappers.JobMapper;
2728
import marquez.db.mappers.JobRowMapper;
29+
import marquez.db.mappers.RunMapper;
2830
import marquez.db.models.JobRow;
2931
import marquez.db.models.NamespaceRow;
3032
import marquez.service.models.Job;
3133
import marquez.service.models.JobMeta;
3234
import marquez.service.models.Run;
3335
import org.jdbi.v3.sqlobject.config.RegisterRowMapper;
36+
import org.jdbi.v3.sqlobject.customizer.BindList;
3437
import org.jdbi.v3.sqlobject.statement.SqlQuery;
3538
import org.jdbi.v3.sqlobject.statement.SqlUpdate;
3639
import org.postgresql.util.PGobject;
3740

3841
@RegisterRowMapper(JobRowMapper.class)
3942
@RegisterRowMapper(JobMapper.class)
4043
@RegisterRowMapper(JobDatasetMapper.class)
44+
@RegisterRowMapper(RunMapper.class)
4145
public interface JobDao extends BaseDao {
4246

4347
@SqlQuery(
@@ -138,14 +142,10 @@ default Optional<Job> findWithDatasetsAndRun(String namespaceName, String jobNam
138142
Optional<Job> job = findJobByName(namespaceName, jobName);
139143
job.ifPresent(
140144
j -> {
141-
Optional<Run> run = createRunDao().findByLatestJob(namespaceName, jobName);
142-
run.ifPresentOrElse(
143-
r -> this.setJobData(r, j),
144-
() ->
145-
this.setJobData(
146-
createJobVersionDao()
147-
.findCurrentInputOutputDatasetsFor(namespaceName, jobName),
148-
j));
145+
List<Run> runs = createRunDao().findByLatestJob(namespaceName, jobName, 1, 0);
146+
this.setJobData(runs, j);
147+
this.setJobDataset(
148+
createJobVersionDao().findCurrentInputOutputDatasetsFor(namespaceName, jobName), j);
149149
});
150150
return job;
151151
}
@@ -244,10 +244,18 @@ job_tags as (
244244
ON f.run_uuid = jv.latest_run_uuid
245245
LEFT OUTER JOIN job_tags jt
246246
ON j.uuid = jt.uuid
247+
LEFT JOIN runs r
248+
ON r.uuid = jv.latest_run_uuid
249+
WHERE
250+
(r.current_run_state IN (<lastRunStates>) OR r.uuid IS NULL)
247251
ORDER BY
248252
j.name
249253
""")
250-
List<Job> findAll(String namespaceName, int limit, int offset);
254+
List<Job> findAll(
255+
String namespaceName,
256+
@BindList("lastRunStates") List<RunState> lastRunStates,
257+
int limit,
258+
int offset);
251259

252260
@SqlQuery("SELECT count(*) FROM jobs_view AS j WHERE symlink_target_uuid IS NULL")
253261
int count();
@@ -271,18 +279,19 @@ job_tags as (
271279
+ "AND symlink_target_uuid IS NULL")
272280
int countFor(String namespaceName);
273281

274-
default List<Job> findAllWithRun(String namespaceName, int limit, int offset) {
282+
default List<Job> findAllWithRun(
283+
String namespaceName, List<RunState> lastRunStates, int limit, int offset) {
275284
RunDao runDao = createRunDao();
276-
return findAll(namespaceName, limit, offset).stream()
285+
return findAll(namespaceName, lastRunStates, limit, offset).stream()
277286
.peek(
278-
j ->
279-
runDao
280-
.findByLatestJob(namespaceName, j.getName().getValue())
281-
.ifPresent(run -> this.setJobData(run, j)))
282-
.collect(Collectors.toList());
287+
j -> {
288+
List<Run> runs = runDao.findByLatestJob(namespaceName, j.getName().getValue(), 10, 0);
289+
this.setJobData(runs, j);
290+
})
291+
.toList();
283292
}
284293

285-
default void setJobData(List<JobDataset> datasets, Job j) {
294+
default void setJobDataset(List<JobDataset> datasets, Job j) {
286295
Optional.of(
287296
datasets.stream()
288297
.filter(d -> d.ioType().equals(IoType.INPUT))
@@ -304,19 +313,25 @@ default void setJobData(List<JobDataset> datasets, Job j) {
304313
.ifPresent(s -> j.setOutputs(s));
305314
}
306315

307-
default void setJobData(Run run, Job j) {
308-
j.setLatestRun(run);
316+
default void setJobData(List<Run> runs, Job j) {
317+
if (runs.isEmpty()) {
318+
return;
319+
}
320+
321+
Run latestRun = runs.get(0);
322+
j.setLatestRun(latestRun);
323+
j.setLatestRuns(runs);
309324
DatasetVersionDao datasetVersionDao = createDatasetVersionDao();
310325
j.setInputs(
311-
datasetVersionDao.findInputDatasetVersionsFor(run.getId().getValue()).stream()
326+
datasetVersionDao.findInputDatasetVersionsFor(latestRun.getId().getValue()).stream()
312327
.map(
313328
ds ->
314329
new DatasetId(
315330
NamespaceName.of(ds.getNamespaceName()),
316331
DatasetName.of(ds.getDatasetName())))
317332
.collect(Collectors.toSet()));
318333
j.setOutputs(
319-
datasetVersionDao.findOutputDatasetVersionsFor(run.getId().getValue()).stream()
334+
datasetVersionDao.findOutputDatasetVersionsFor(latestRun.getId().getValue()).stream()
320335
.map(
321336
ds ->
322337
new DatasetId(

api/src/main/java/marquez/db/RunDao.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,15 +510,15 @@ default RunRow upsertRunMeta(
510510
@SqlQuery(
511511
BASE_FIND_RUN_SQL
512512
+ """
513-
WHERE r.uuid=(
513+
WHERE r.uuid IN (
514514
SELECT r.uuid FROM runs_view r
515515
INNER JOIN jobs_view j ON j.namespace_name=r.namespace_name AND j.name=r.job_name
516516
WHERE j.namespace_name=:namespace AND (j.name=:jobName OR j.name=ANY(j.aliases))
517517
ORDER BY transitioned_at DESC
518-
LIMIT 1
518+
LIMIT :limit OFFSET :offset
519519
)
520520
""")
521-
Optional<Run> findByLatestJob(String namespace, String jobName);
521+
List<Run> findByLatestJob(String namespace, String jobName, int limit, int offset);
522522

523523
@Builder
524524
record RunUpsert(

api/src/main/java/marquez/db/mappers/JobMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ public Job map(@NonNull ResultSet results, @NonNull StatementContext context)
7171
// Latest Run is resolved in the JobDao. This can be brought in via a join and
7272
// and a jsonb but custom deserializers will need to be introduced
7373
null,
74+
null,
7475
facetsOrNull,
7576
uuidOrNull(results, Columns.CURRENT_VERSION_UUID),
7677
getLabels(facetsOrNull),

api/src/main/java/marquez/service/models/Job.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.google.common.collect.ImmutableSet;
1111
import java.net.URL;
1212
import java.time.Instant;
13+
import java.util.List;
1314
import java.util.Optional;
1415
import java.util.Set;
1516
import java.util.UUID;
@@ -43,6 +44,7 @@ public final class Job {
4344
@Nullable private final URL location;
4445
@Nullable private final String description;
4546
@Nullable @Setter private Run latestRun;
47+
@Nullable @Setter private List<Run> latestRuns;
4648
@Getter private final ImmutableMap<String, Object> facets;
4749
@Nullable private UUID currentVersion;
4850
@Getter @Nullable private ImmutableList<String> labels;
@@ -62,6 +64,7 @@ public Job(
6264
@Nullable final URL location,
6365
@Nullable final String description,
6466
@Nullable final Run latestRun,
67+
@Nullable final List<Run> latestRuns,
6568
@Nullable final ImmutableMap<String, Object> facets,
6669
@Nullable UUID currentVersion,
6770
@Nullable ImmutableList<String> labels,
@@ -80,6 +83,7 @@ public Job(
8083
this.location = location;
8184
this.description = description;
8285
this.latestRun = latestRun;
86+
this.latestRuns = latestRuns;
8387
this.facets = (facets == null) ? ImmutableMap.of() : facets;
8488
this.currentVersion = currentVersion;
8589
this.labels = (labels == null) ? ImmutableList.of() : labels;
@@ -98,6 +102,10 @@ public Optional<Run> getLatestRun() {
98102
return Optional.ofNullable(latestRun);
99103
}
100104

105+
public Optional<List<Run>> getLatestRuns() {
106+
return Optional.ofNullable(latestRuns);
107+
}
108+
101109
public Optional<UUID> getCurrentVersion() {
102110
return Optional.ofNullable(currentVersion);
103111
}

api/src/test/java/marquez/db/JobDaoTest.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
import com.fasterxml.jackson.databind.ObjectMapper;
1818
import java.sql.SQLException;
1919
import java.time.Instant;
20+
import java.util.ArrayList;
21+
import java.util.Collections;
2022
import java.util.List;
2123
import java.util.Optional;
2224
import java.util.UUID;
2325
import marquez.common.models.JobType;
26+
import marquez.common.models.RunState;
2427
import marquez.db.models.DbModelGenerator;
2528
import marquez.db.models.JobRow;
2629
import marquez.db.models.NamespaceRow;
@@ -106,7 +109,10 @@ public void testFindAll() {
106109
JobRow anotherJobSameNamespace =
107110
createJobWithoutSymlinkTarget(jdbi, namespace, "anotherJob", "a random other job");
108111

109-
List<Job> jobs = jobDao.findAll(namespace.getName(), 10, 0);
112+
List<RunState> runStates = new ArrayList<>();
113+
Collections.addAll(runStates, RunState.values());
114+
115+
List<Job> jobs = jobDao.findAll(namespace.getName(), runStates, 10, 0);
110116

111117
// the symlinked job isn't present in the response - only the symlink target and the job with
112118
// no symlink

api/src/test/java/marquez/db/RunDaoTest.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,9 @@ public void testFindByLatestJob() {
165165
TreeSet<RunRow> sortedRuns =
166166
new TreeSet<>(Comparator.comparing(RunRow::getUpdatedAt).reversed());
167167
sortedRuns.addAll(runs);
168-
Optional<Run> byLatestJob = runDao.findByLatestJob(jobRow.getNamespaceName(), jobRow.getName());
168+
Run byLatestJob =
169+
runDao.findByLatestJob(jobRow.getNamespaceName(), jobRow.getName(), 1, 0).get(0);
169170
assertThat(byLatestJob)
170-
.isPresent()
171-
.get()
172171
.hasFieldOrPropertyWithValue("id", new RunId(sortedRuns.first().getUuid()));
173172

174173
JobRow newTargetJob =
@@ -183,10 +182,11 @@ public void testFindByLatestJob() {
183182
jobMeta.getDescription().orElse(null));
184183

185184
// get the latest run for the *newTargetJob*. It should be the same as the old job's latest run
186-
byLatestJob = runDao.findByLatestJob(newTargetJob.getNamespaceName(), newTargetJob.getName());
185+
byLatestJob =
186+
runDao
187+
.findByLatestJob(newTargetJob.getNamespaceName(), newTargetJob.getName(), 1, 0)
188+
.get(0);
187189
assertThat(byLatestJob)
188-
.isPresent()
189-
.get()
190190
.hasFieldOrPropertyWithValue("id", new RunId(sortedRuns.first().getUuid()));
191191
}
192192

api/src/test/java/marquez/service/LineageServiceTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import static org.assertj.core.api.Assertions.assertThat;
1212
import static org.junit.Assert.assertTrue;
1313

14+
import java.util.ArrayList;
1415
import java.util.Arrays;
1516
import java.util.Collections;
1617
import java.util.HashMap;
@@ -28,6 +29,7 @@
2829
import marquez.common.models.JobName;
2930
import marquez.common.models.NamespaceName;
3031
import marquez.common.models.OutputDatasetVersion;
32+
import marquez.common.models.RunState;
3133
import marquez.db.DatasetDao;
3234
import marquez.db.JobDao;
3335
import marquez.db.LineageDao;
@@ -205,7 +207,10 @@ && jobNameEquals(n, "downstreamJob0<-outputData<-readJob0<-commonDataset"))
205207
new NamespaceName(NAMESPACE),
206208
new DatasetName("outputData<-readJob0<-commonDataset")));
207209

208-
List<Job> jobs = jobDao.findAllWithRun(NAMESPACE, 1000, 0);
210+
List<RunState> runStates = new ArrayList<>();
211+
Collections.addAll(runStates, RunState.values());
212+
213+
List<Job> jobs = jobDao.findAllWithRun(NAMESPACE, runStates, 1000, 0);
209214
jobs =
210215
jobs.stream()
211216
.filter(j -> j.getName().getValue().contains("newDownstreamJob"))

spec/openapi.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,6 +1596,10 @@ components:
15961596
type: string
15971597
latestRun:
15981598
$ref: '#/components/schemas/Run'
1599+
latestRuns:
1600+
type: array
1601+
items:
1602+
$ref: '#/components/schemas/Run'
15991603
facets:
16001604
$ref: '#/components/schemas/JobFacets'
16011605
currentVersion:
@@ -1617,6 +1621,7 @@ components:
16171621
context: {'SQL': "SELECT * FROM mytable;"}
16181622
description: My first job!
16191623
latestRun: null
1624+
latestRuns: []
16201625
facets: {}
16211626
currentVersion: "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
16221627

0 commit comments

Comments
 (0)