5858import marquez .service .models .LineageEvent .LifecycleStateChangeFacet ;
5959import marquez .service .models .LineageEvent .NominalTimeRunFacet ;
6060import marquez .service .models .LineageEvent .ParentRunFacet ;
61+ import marquez .service .models .LineageEvent .Run ;
6162import marquez .service .models .LineageEvent .RunFacet ;
6263import marquez .service .models .LineageEvent .SchemaDatasetFacet ;
6364import marquez .service .models .LineageEvent .SchemaField ;
@@ -150,24 +151,12 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
150151 DEFAULT_NAMESPACE_OWNER );
151152 bag .setNamespace (namespace );
152153
153- String description =
154- Optional .ofNullable (event .getJob ().getFacets ())
155- .map (JobFacet ::getDocumentation )
156- .map (DocumentationJobFacet ::getDescription )
157- .orElse (null );
158-
159154 Map <String , String > context = buildJobContext (event );
160155 JobContextRow jobContext =
161156 jobContextDao .upsert (
162157 UUID .randomUUID (), now , Utils .toJson (context ), Utils .checksumFor (context ));
163158 bag .setJobContext (jobContext );
164159
165- String location =
166- Optional .ofNullable (event .getJob ().getFacets ())
167- .flatMap (f -> Optional .ofNullable (f .getSourceCodeLocation ()))
168- .flatMap (s -> Optional .ofNullable (s .getUrl ()))
169- .orElse (null );
170-
171160 Instant nominalStartTime =
172161 Optional .ofNullable (event .getRun ().getFacets ())
173162 .flatMap (f -> Optional .ofNullable (f .getNominalTime ()))
@@ -181,75 +170,26 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
181170 .map (t -> t .withZoneSameInstant (ZoneId .of ("UTC" )).toInstant ())
182171 .orElse (null );
183172
184- Logger log = LoggerFactory .getLogger (OpenLineageDao .class );
185173 Optional <ParentRunFacet > parentRun =
186- Optional .ofNullable (event .getRun ())
187- .map (LineageEvent .Run ::getFacets )
188- .map (RunFacet ::getParent );
189-
174+ Optional .ofNullable (event .getRun ()).map (Run ::getFacets ).map (RunFacet ::getParent );
190175 Optional <UUID > parentUuid = parentRun .map (Utils ::findParentRunUuid );
191- Optional <JobRow > parentJob =
192- parentUuid .map (
193- uuid ->
194- findParentJobRow (
195- event ,
196- namespace ,
197- jobContext ,
198- location ,
199- nominalStartTime ,
200- nominalEndTime ,
201- log ,
202- parentRun .get (),
203- uuid ));
204176
205- // construct the simple name of the job by removing the parent prefix plus the dot '.' separator
206- String jobName =
207- parentJob
208- .map (
209- p -> {
210- if (event .getJob ().getName ().startsWith (p .getName () + '.' )) {
211- return event .getJob ().getName ().substring (p .getName ().length () + 1 );
212- } else {
213- return event .getJob ().getName ();
214- }
215- })
216- .orElse (event .getJob ().getName ());
217- log .debug (
218- "Calculated job name {} from job {} with parent {}" ,
219- jobName ,
220- event .getJob ().getName (),
221- parentJob .map (JobRow ::getName ));
222177 JobRow job =
223- parentJob
224- .map (
225- parent ->
226- jobDao .upsertJob (
227- UUID .randomUUID (),
228- parent .getUuid (),
229- getJobType (event .getJob ()),
230- now ,
231- namespace .getUuid (),
232- namespace .getName (),
233- jobName ,
234- description ,
235- jobContext .getUuid (),
236- location ,
237- null ,
238- jobDao .toJson (toDatasetId (event .getInputs ()), mapper )))
178+ runDao
179+ .findJobRowByRunUuid (runToUuid (event .getRun ().getRunId ()))
239180 .orElseGet (
240181 () ->
241- jobDao .upsertJob (
242- UUID .randomUUID (),
243- getJobType (event .getJob ()),
182+ buildJobFromEvent (
183+ event ,
184+ mapper ,
185+ jobDao ,
244186 now ,
245- namespace .getUuid (),
246- namespace .getName (),
247- jobName ,
248- description ,
249- jobContext .getUuid (),
250- location ,
251- null ,
252- jobDao .toJson (toDatasetId (event .getInputs ()), mapper )));
187+ namespace ,
188+ jobContext ,
189+ nominalStartTime ,
190+ nominalEndTime ,
191+ parentRun ));
192+
253193 bag .setJob (job );
254194
255195 Map <String , String > runArgsMap = createRunArgs (event );
@@ -277,8 +217,8 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
277217 runStateType ,
278218 now ,
279219 namespace .getName (),
280- jobName ,
281- location ,
220+ job . getName () ,
221+ job . getLocation () ,
282222 jobContext .getUuid ());
283223 } else {
284224 run =
@@ -294,8 +234,8 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
294234 nominalEndTime ,
295235 namespace .getUuid (),
296236 namespace .getName (),
297- jobName ,
298- location ,
237+ job . getName () ,
238+ job . getLocation () ,
299239 jobContext .getUuid ());
300240 }
301241 bag .setRun (run );
@@ -363,6 +303,93 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
363303 return bag ;
364304 }
365305
306+ private JobRow buildJobFromEvent (
307+ LineageEvent event ,
308+ ObjectMapper mapper ,
309+ JobDao jobDao ,
310+ Instant now ,
311+ NamespaceRow namespace ,
312+ JobContextRow jobContext ,
313+ Instant nominalStartTime ,
314+ Instant nominalEndTime ,
315+ Optional <ParentRunFacet > parentRun ) {
316+ Logger log = LoggerFactory .getLogger (OpenLineageDao .class );
317+ String description =
318+ Optional .ofNullable (event .getJob ().getFacets ())
319+ .map (JobFacet ::getDocumentation )
320+ .map (DocumentationJobFacet ::getDescription )
321+ .orElse (null );
322+
323+ String location =
324+ Optional .ofNullable (event .getJob ().getFacets ())
325+ .flatMap (f -> Optional .ofNullable (f .getSourceCodeLocation ()))
326+ .flatMap (s -> Optional .ofNullable (s .getUrl ()))
327+ .orElse (null );
328+
329+ Optional <UUID > parentUuid = parentRun .map (Utils ::findParentRunUuid );
330+ Optional <JobRow > parentJob =
331+ parentUuid .map (
332+ uuid ->
333+ findParentJobRow (
334+ event ,
335+ namespace ,
336+ jobContext ,
337+ location ,
338+ nominalStartTime ,
339+ nominalEndTime ,
340+ log ,
341+ parentRun .get (),
342+ uuid ));
343+
344+ // construct the simple name of the job by removing the parent prefix plus the dot '.' separator
345+ String jobName =
346+ parentJob
347+ .map (
348+ p -> {
349+ if (event .getJob ().getName ().startsWith (p .getName () + '.' )) {
350+ return event .getJob ().getName ().substring (p .getName ().length () + 1 );
351+ } else {
352+ return event .getJob ().getName ();
353+ }
354+ })
355+ .orElse (event .getJob ().getName ());
356+ log .debug (
357+ "Calculated job name {} from job {} with parent {}" ,
358+ jobName ,
359+ event .getJob ().getName (),
360+ parentJob .map (JobRow ::getName ));
361+ return parentJob
362+ .map (
363+ parent ->
364+ jobDao .upsertJob (
365+ UUID .randomUUID (),
366+ parent .getUuid (),
367+ getJobType (event .getJob ()),
368+ now ,
369+ namespace .getUuid (),
370+ namespace .getName (),
371+ jobName ,
372+ description ,
373+ jobContext .getUuid (),
374+ location ,
375+ null ,
376+ jobDao .toJson (toDatasetId (event .getInputs ()), mapper )))
377+ .orElseGet (
378+ () ->
379+ jobDao .upsertJob (
380+ UUID .randomUUID (),
381+ getJobType (event .getJob ()),
382+ now ,
383+ namespace .getUuid (),
384+ namespace .getName (),
385+ jobName ,
386+ description ,
387+ jobContext .getUuid (),
388+ location ,
389+ null ,
390+ jobDao .toJson (toDatasetId (event .getInputs ()), mapper )));
391+ }
392+
366393 private JobRow findParentJobRow (
367394 LineageEvent event ,
368395 NamespaceRow namespace ,
0 commit comments