2323import java .util .UUID ;
2424import java .util .stream .Collectors ;
2525import java .util .stream .Stream ;
26+ import javax .annotation .Nullable ;
2627import marquez .common .Utils ;
2728import marquez .common .models .DatasetId ;
2829import marquez .common .models .DatasetName ;
@@ -235,7 +236,8 @@ default UpdateLineageRow updateMarquezModel(JobEvent event, ObjectMapper mapper)
235236 namespace ,
236237 null ,
237238 null ,
238- Optional .empty ());
239+ Optional .empty (),
240+ null );
239241
240242 bag .setJob (job );
241243
@@ -304,6 +306,8 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
304306 Optional .ofNullable (event .getRun ()).map (Run ::getFacets ).map (RunFacet ::getParent );
305307 Optional <UUID > parentUuid = parentRun .map (Utils ::findParentRunUuid );
306308
309+ final UUID runUuid = runToUuid (event .getRun ().getRunId ());
310+
307311 JobRow job =
308312 buildJobFromEvent (
309313 event .getJob (),
@@ -316,7 +320,8 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
316320 namespace ,
317321 nominalStartTime ,
318322 nominalEndTime ,
319- parentRun );
323+ parentRun ,
324+ runUuid );
320325
321326 bag .setJob (job );
322327
@@ -327,7 +332,6 @@ default UpdateLineageRow updateBaseMarquezModel(LineageEvent event, ObjectMapper
327332 UUID .randomUUID (), now , Utils .toJson (runArgsMap ), Utils .checksumFor (runArgsMap ));
328333 bag .setRunArgs (runArgs );
329334
330- final UUID runUuid = runToUuid (event .getRun ().getRunId ());
331335 RunRow run ;
332336 RunUpsert .RunUpsertBuilder runUpsertBuilder =
333337 RunUpsert .builder ()
@@ -509,7 +513,8 @@ private JobRow buildJobFromEvent(
509513 NamespaceRow namespace ,
510514 Instant nominalStartTime ,
511515 Instant nominalEndTime ,
512- Optional <ParentRunFacet > parentRun ) {
516+ Optional <ParentRunFacet > parentRun ,
517+ @ Nullable UUID runUuid ) {
513518 Logger log = LoggerFactory .getLogger (OpenLineageDao .class );
514519 String description =
515520 Optional .ofNullable (job .getFacets ())
@@ -523,10 +528,10 @@ private JobRow buildJobFromEvent(
523528 .flatMap (s -> Optional .ofNullable (s .getUrl ()))
524529 .orElse (null );
525530
526- Optional <UUID > parentUuid = parentRun .map (Utils ::findParentRunUuid );
531+ Optional <UUID > parentRunUuid = parentRun .map (Utils ::findParentRunUuid );
527532 Optional <JobRow > parentJob =
528- parentUuid .map (
529- uuid ->
533+ parentRunUuid .map (
534+ parentRunUuidFound ->
530535 findParentJobRow (
531536 job ,
532537 eventTime ,
@@ -537,7 +542,7 @@ private JobRow buildJobFromEvent(
537542 nominalEndTime ,
538543 log ,
539544 parentRun .get (),
540- uuid ));
545+ parentRunUuidFound ));
541546
542547 // construct the simple name of the job by removing the parent prefix plus the dot '.' separator
543548 String jobName =
@@ -570,7 +575,8 @@ private JobRow buildJobFromEvent(
570575 description ,
571576 location ,
572577 null ,
573- jobDao .toJson (toDatasetId (inputs ), mapper )))
578+ jobDao .toJson (toDatasetId (inputs ), mapper ),
579+ parent .getCurrentRunUuid ().orElse (null )))
574580 .orElseGet (
575581 () ->
576582 jobDao .upsertJob (
@@ -583,7 +589,8 @@ private JobRow buildJobFromEvent(
583589 description ,
584590 location ,
585591 null ,
586- jobDao .toJson (toDatasetId (inputs ), mapper )));
592+ jobDao .toJson (toDatasetId (inputs ), mapper ),
593+ runUuid ));
587594 }
588595
589596 private JobRow findParentJobRow (
@@ -596,15 +603,15 @@ private JobRow findParentJobRow(
596603 Instant nominalEndTime ,
597604 Logger log ,
598605 ParentRunFacet facet ,
599- UUID uuid ) {
606+ UUID parentRunUuid ) {
600607 try {
601608 log .debug ("Found parent run event {}" , facet );
602609 PGobject inputs = new PGobject ();
603610 inputs .setType ("json" );
604611 inputs .setValue ("[]" );
605612 JobRow parentJobRow =
606613 createRunDao ()
607- .findJobRowByRunUuid (uuid )
614+ .findJobRowByRunUuid (parentRunUuid )
608615 .map (
609616 j -> {
610617 String parentJobName =
@@ -617,18 +624,20 @@ private JobRow findParentJobRow(
617624 } else {
618625 // Addresses an Airflow integration bug that generated conflicting run UUIDs
619626 // for DAGs that had the same name, but ran in different namespaces.
620- UUID parentRunUuid =
627+ UUID parentRunUuidNoConflict =
621628 Utils .toNameBasedUuid (
622- facet .getJob ().getNamespace (), parentJobName , uuid .toString ());
629+ facet .getJob ().getNamespace (),
630+ parentJobName ,
631+ parentRunUuid .toString ());
623632 log .warn (
624633 "Parent Run id {} has a different job name '{}.{}' from facet '{}.{}'. "
625634 + "Assuming Run UUID conflict and generating a new UUID {}" ,
626- uuid ,
635+ parentRunUuid ,
627636 j .getNamespaceName (),
628637 j .getName (),
629638 facet .getJob ().getNamespace (),
630639 facet .getJob ().getName (),
631- parentRunUuid );
640+ parentRunUuidNoConflict );
632641 return createParentJobRunRecord (
633642 job ,
634643 eventTime ,
@@ -637,7 +646,7 @@ private JobRow findParentJobRow(
637646 location ,
638647 nominalStartTime ,
639648 nominalEndTime ,
640- parentRunUuid ,
649+ parentRunUuidNoConflict ,
641650 facet ,
642651 inputs );
643652 }
@@ -652,7 +661,7 @@ private JobRow findParentJobRow(
652661 location ,
653662 nominalStartTime ,
654663 nominalEndTime ,
655- uuid ,
664+ parentRunUuid ,
656665 facet ,
657666 inputs ));
658667 log .debug ("Found parent job record {}" , parentJobRow );
@@ -670,7 +679,7 @@ private JobRow createParentJobRunRecord(
670679 String location ,
671680 Instant nominalStartTime ,
672681 Instant nominalEndTime ,
673- UUID uuid ,
682+ UUID parentRunUuid ,
674683 ParentRunFacet facet ,
675684 PGobject inputs ) {
676685 Instant now = eventTime .withZoneSameInstant (ZoneId .of ("UTC" )).toInstant ();
@@ -691,7 +700,8 @@ private JobRow createParentJobRunRecord(
691700 null ,
692701 location ,
693702 null ,
694- inputs );
703+ inputs ,
704+ parentRunUuid );
695705 log .info ("Created new parent job record {}" , newParentJobRow );
696706
697707 RunArgsRow argsRow =
@@ -702,7 +712,7 @@ private JobRow createParentJobRunRecord(
702712 RunDao runDao = createRunDao ();
703713 RunRow newRow =
704714 runDao .upsert (
705- uuid ,
715+ parentRunUuid ,
706716 null ,
707717 facet .getRun ().getRunId (),
708718 now ,
@@ -719,14 +729,14 @@ private JobRow createParentJobRunRecord(
719729 log .info ("Created new parent run record {}" , newRow );
720730
721731 runState
722- .map (rs -> createRunStateDao ().upsert (UUID .randomUUID (), now , uuid , rs ))
732+ .map (rs -> createRunStateDao ().upsert (UUID .randomUUID (), now , parentRunUuid , rs ))
723733 .ifPresent (
724734 runStateRow -> {
725735 UUID runStateUuid = runStateRow .getUuid ();
726736 if (RunState .valueOf (runStateRow .getState ()).isDone ()) {
727- runDao .updateEndState (uuid , now , runStateUuid );
737+ runDao .updateEndState (parentRunUuid , now , runStateUuid );
728738 } else {
729- runDao .updateStartState (uuid , now , runStateUuid );
739+ runDao .updateStartState (parentRunUuid , now , runStateUuid );
730740 }
731741 });
732742
0 commit comments