77
88import com .google .common .base .Functions ;
99import com .google .common .collect .ImmutableSet ;
10+ import com .google .common .collect .ImmutableSortedSet ;
1011import com .google .common .collect .Maps ;
1112import java .util .Collections ;
1213import java .util .HashMap ;
2021import java .util .UUID ;
2122import java .util .stream .Collectors ;
2223import java .util .stream .Stream ;
24+ import lombok .NonNull ;
2325import lombok .extern .slf4j .Slf4j ;
2426import marquez .common .models .DatasetId ;
2527import marquez .common .models .JobId ;
@@ -48,14 +50,30 @@ public LineageService(LineageDao delegate, JobDao jobDao) {
4850
4951 // TODO make input parameters easily extendable if adding more options like 'withJobFacets'
5052 public Lineage lineage (NodeId nodeId , int depth , boolean withRunFacets ) {
53+ log .debug ("Attempting to get lineage for node '{}' with depth '{}'" , nodeId .getValue (), depth );
5154 Optional <UUID > optionalUUID = getJobUuid (nodeId );
5255 if (optionalUUID .isEmpty ()) {
53- throw new NodeIdNotFoundException ("Could not find node" );
56+ log .warn (
57+ "Failed to get job associated with node '{}', returning orphan graph..." ,
58+ nodeId .getValue ());
59+ return toLineageWithOrphanDataset (nodeId .asDatasetId ());
5460 }
5561 UUID job = optionalUUID .get ();
56-
62+ log . debug ( "Attempting to get lineage for job '{}'" , job );
5763 Set <JobData > jobData = getLineage (Collections .singleton (job ), depth );
5864
65+ // Ensure job data is not empty, an empty set cannot be passed to LineageDao.getCurrentRuns() or
66+ // LineageDao.getCurrentRunsWithFacets().
67+ if (jobData .isEmpty ()) {
68+ // Log warning, then return an orphan lineage graph; a graph should contain at most one
69+ // job->dataset relationship.
70+ log .warn (
71+ "Failed to get lineage for job '{}' associated with node '{}', returning orphan graph..." ,
72+ job ,
73+ nodeId .getValue ());
74+ return toLineageWithOrphanDataset (nodeId .asDatasetId ());
75+ }
76+
5977 List <Run > runs =
6078 withRunFacets
6179 ? getCurrentRunsWithFacets (
@@ -81,10 +99,26 @@ public Lineage lineage(NodeId nodeId, int depth, boolean withRunFacets) {
8199 if (!datasetIds .isEmpty ()) {
82100 datasets .addAll (this .getDatasetData (datasetIds ));
83101 }
102+ if (nodeId .isDatasetType ()
103+ && datasets .stream ().noneMatch (n -> n .getId ().equals (nodeId .asDatasetId ()))) {
104+ log .warn (
105+ "Found jobs {} which no longer share lineage with dataset '{}' - discarding" ,
106+ jobData .stream ().map (JobData ::getId ).toList (),
107+ nodeId .getValue ());
108+ return toLineageWithOrphanDataset (nodeId .asDatasetId ());
109+ }
84110
85111 return toLineage (jobData , datasets );
86112 }
87113
114+ private Lineage toLineageWithOrphanDataset (@ NonNull DatasetId datasetId ) {
115+ final DatasetData datasetData =
116+ getDatasetData (datasetId .getNamespace ().getValue (), datasetId .getName ().getValue ());
117+ return new Lineage (
118+ ImmutableSortedSet .of (
119+ Node .dataset ().data (datasetData ).id (NodeId .of (datasetData .getId ())).build ()));
120+ }
121+
88122 private Lineage toLineage (Set <JobData > jobData , Set <DatasetData > datasets ) {
89123 Set <Node > nodes = new LinkedHashSet <>();
90124 // build mapping for later
@@ -214,7 +248,8 @@ public Optional<UUID> getJobUuid(NodeId nodeId) {
214248 return getJobFromInputOrOutput (
215249 datasetId .getName ().getValue (), datasetId .getNamespace ().getValue ());
216250 } else {
217- throw new NodeIdNotFoundException ("Node must be a dataset node or job node" );
251+ throw new NodeIdNotFoundException (
252+ String .format ("Node '%s' must be of type dataset or job!" , nodeId .getValue ()));
218253 }
219254 }
220255}
0 commit comments