|
22 | 22 | import marquez.db.ColumnLineageDao; |
23 | 23 | import marquez.db.DatasetFieldDao; |
24 | 24 | import marquez.db.models.ColumnLineageNodeData; |
| 25 | +import marquez.service.models.ColumnLineage; |
| 26 | +import marquez.service.models.ColumnLineageInputField; |
| 27 | +import marquez.service.models.Dataset; |
25 | 28 | import marquez.service.models.Edge; |
26 | 29 | import marquez.service.models.Lineage; |
27 | 30 | import marquez.service.models.Node; |
28 | 31 | import marquez.service.models.NodeId; |
| 32 | +import org.apache.commons.lang3.tuple.Pair; |
29 | 33 |
|
30 | 34 | @Slf4j |
31 | 35 | public class ColumnLineageService extends DelegatingDaos.DelegatingColumnLineageDao { |
@@ -125,4 +129,51 @@ List<UUID> getColumnNodeUuids(NodeId nodeId) { |
125 | 129 | } |
126 | 130 | return columnNodeUuids; |
127 | 131 | } |
| 132 | + |
| 133 | + public void enrichWithColumnLineage(List<Dataset> datasets) { |
| 134 | + if (datasets.isEmpty()) { |
| 135 | + return; |
| 136 | + } |
| 137 | + |
| 138 | + Set<ColumnLineageNodeData> lineageRowsForDatasets = |
| 139 | + getLineageRowsForDatasets( |
| 140 | + datasets.stream() |
| 141 | + .map(d -> Pair.of(d.getNamespace().getValue(), d.getName().getValue())) |
| 142 | + .collect(Collectors.toList())); |
| 143 | + |
| 144 | + Map<Dataset, List<ColumnLineage>> datasetLineage = new HashMap<>(); |
| 145 | + lineageRowsForDatasets.stream() |
| 146 | + .forEach( |
| 147 | + nodeData -> { |
| 148 | + Dataset dataset = |
| 149 | + datasets.stream() |
| 150 | + .filter(d -> d.getNamespace().getValue().equals(nodeData.getNamespace())) |
| 151 | + .filter(d -> d.getName().getValue().equals(nodeData.getName())) |
| 152 | + .findAny() |
| 153 | + .get(); |
| 154 | + |
| 155 | + if (!datasetLineage.containsKey(dataset)) { |
| 156 | + datasetLineage.put(dataset, new LinkedList<>()); |
| 157 | + } |
| 158 | + datasetLineage |
| 159 | + .get(dataset) |
| 160 | + .add( |
| 161 | + ColumnLineage.builder() |
| 162 | + .name(nodeData.getField()) |
| 163 | + .transformationDescription(nodeData.getTransformationDescription()) |
| 164 | + .transformationType(nodeData.getTransformationType()) |
| 165 | + .inputFields( |
| 166 | + nodeData.getInputFields().stream() |
| 167 | + .map( |
| 168 | + f -> |
| 169 | + new ColumnLineageInputField( |
| 170 | + f.getNamespace(), f.getName(), f.getField())) |
| 171 | + .collect(Collectors.toList())) |
| 172 | + .build()); |
| 173 | + }); |
| 174 | + |
| 175 | + datasets.stream() |
| 176 | + .filter(dataset -> datasetLineage.containsKey(dataset)) |
| 177 | + .forEach(dataset -> dataset.setColumnLineage(datasetLineage.get(dataset))); |
| 178 | + } |
128 | 179 | } |
0 commit comments