|
5 | 5 |
|
6 | 6 | package marquez.db; |
7 | 7 |
|
| 8 | +import static org.jdbi.v3.sqlobject.customizer.BindList.EmptyHandling.NULL_STRING; |
| 9 | + |
8 | 10 | import java.time.Instant; |
9 | 11 | import java.util.Collections; |
10 | 12 | import java.util.List; |
| 13 | +import java.util.Set; |
11 | 14 | import java.util.UUID; |
12 | 15 | import java.util.stream.Collectors; |
13 | 16 | import marquez.db.mappers.ColumnLevelLineageRowMapper; |
| 17 | +import marquez.db.mappers.ColumnLineageNodeDataMapper; |
14 | 18 | import marquez.db.models.ColumnLevelLineageRow; |
| 19 | +import marquez.db.models.ColumnLineageNodeData; |
15 | 20 | import org.apache.commons.lang3.tuple.Pair; |
16 | 21 | import org.jdbi.v3.sqlobject.config.RegisterRowMapper; |
17 | 22 | import org.jdbi.v3.sqlobject.customizer.BindBeanList; |
| 23 | +import org.jdbi.v3.sqlobject.customizer.BindList; |
18 | 24 | import org.jdbi.v3.sqlobject.statement.SqlQuery; |
19 | 25 | import org.jdbi.v3.sqlobject.statement.SqlUpdate; |
20 | 26 |
|
21 | 27 | @RegisterRowMapper(ColumnLevelLineageRowMapper.class) |
| 28 | +@RegisterRowMapper(ColumnLineageNodeDataMapper.class) |
22 | 29 | public interface ColumnLevelLineageDao extends BaseDao { |
23 | 30 |
|
24 | 31 | default List<ColumnLevelLineageRow> upsertColumnLevelLineageRow( |
@@ -90,4 +97,59 @@ void doUpsertColumnLevelLineageRow( |
90 | 97 | }, |
91 | 98 | value = "values") |
92 | 99 | ColumnLevelLineageRow... rows); |
| 100 | + |
| 101 | + @SqlQuery( |
| 102 | + """ |
| 103 | + WITH RECURSIVE |
| 104 | + dataset_fields_view AS ( |
| 105 | + SELECT d.namespace_name as namespace_name, d.name as dataset_name, df.name as field_name, df.type, df.uuid |
| 106 | + FROM dataset_fields df |
| 107 | + INNER JOIN datasets_view d ON d.uuid = df.dataset_uuid |
| 108 | + ), |
| 109 | + column_lineage_recursive AS ( |
| 110 | + SELECT *, 0 as depth |
| 111 | + FROM column_level_lineage |
| 112 | + WHERE output_dataset_field_uuid IN (<datasetFieldUuids>) AND created_at <= :createdAtUntil |
| 113 | + UNION |
| 114 | + SELECT |
| 115 | + upstream_node.output_dataset_version_uuid, |
| 116 | + upstream_node.output_dataset_field_uuid, |
| 117 | + upstream_node.input_dataset_version_uuid, |
| 118 | + upstream_node.input_dataset_field_uuid, |
| 119 | + upstream_node.transformation_description, |
| 120 | + upstream_node.transformation_type, |
| 121 | + upstream_node.created_at, |
| 122 | + upstream_node.updated_at, |
| 123 | + node.depth + 1 as depth |
| 124 | + FROM column_level_lineage upstream_node, column_lineage_recursive node |
| 125 | + WHERE node.input_dataset_field_uuid = upstream_node.output_dataset_field_uuid |
| 126 | + AND node.depth < :depth |
| 127 | + ) |
| 128 | + SELECT |
| 129 | + output_fields.namespace_name, |
| 130 | + output_fields.dataset_name, |
| 131 | + output_fields.field_name, |
| 132 | + output_fields.type, |
| 133 | + ARRAY_AGG(ARRAY[input_fields.namespace_name, input_fields.dataset_name, input_fields.field_name]) AS inputFields, |
| 134 | + clr.transformation_description, |
| 135 | + clr.transformation_type, |
| 136 | + clr.created_at, |
| 137 | + clr.updated_at |
| 138 | + FROM column_lineage_recursive clr |
| 139 | + INNER JOIN dataset_fields_view output_fields ON clr.output_dataset_field_uuid = output_fields.uuid -- hidden datasets will be filtered |
| 140 | + LEFT JOIN dataset_fields_view input_fields ON clr.input_dataset_field_uuid = input_fields.uuid |
| 141 | + GROUP BY |
| 142 | + output_fields.namespace_name, |
| 143 | + output_fields.dataset_name, |
| 144 | + output_fields.field_name, |
| 145 | + output_fields.type, |
| 146 | + clr.transformation_description, |
| 147 | + clr.transformation_type, |
| 148 | + clr.created_at, |
| 149 | + clr.updated_at |
| 150 | + """) |
| 151 | + Set<ColumnLineageNodeData> getLineage( |
| 152 | + int depth, |
| 153 | + @BindList(onEmpty = NULL_STRING) List<UUID> datasetFieldUuids, |
| 154 | + Instant createdAtUntil); |
93 | 155 | } |
0 commit comments