2525import marquez .db .mappers .DatasetVersionRowMapper ;
2626import marquez .db .mappers .ExtendedDatasetVersionRowMapper ;
2727import marquez .db .models .DatasetFieldRow ;
28+ import marquez .db .models .DatasetRow ;
2829import marquez .db .models .DatasetVersionRow ;
2930import marquez .db .models .ExtendedDatasetVersionRow ;
3031import marquez .db .models .TagRow ;
@@ -47,7 +48,7 @@ public interface DatasetVersionDao extends BaseDao {
4748
4849 @ Transaction
4950 default DatasetVersionRow upsertDatasetVersion (
50- UUID datasetUuid ,
51+ DatasetRow datasetRow ,
5152 Instant now ,
5253 String namespaceName ,
5354 String datasetName ,
@@ -56,14 +57,38 @@ default DatasetVersionRow upsertDatasetVersion(
5657 TagDao tagDao = createTagDao ();
5758 DatasetFieldDao datasetFieldDao = createDatasetFieldDao ();
5859
60+ List <DatasetFieldRow > datasetFields = new ArrayList <>();
61+ List <DatasetFieldTag > datasetFieldTags = new ArrayList <>();
62+ for (Field field : datasetMeta .getFields ()) {
63+ DatasetFieldRow datasetFieldRow =
64+ datasetFieldDao .upsert (
65+ UUID .randomUUID (),
66+ now ,
67+ field .getName ().getValue (),
68+ field .getType (),
69+ field .getDescription ().orElse (null ),
70+ datasetRow .getUuid ());
71+ datasetFields .add (datasetFieldRow );
72+ for (TagName tagName : field .getTags ()) {
73+ TagRow tag = tagDao .upsert (UUID .randomUUID (), now , tagName .getValue ());
74+ datasetFieldTags .add (new DatasetFieldTag (datasetFieldRow .getUuid (), tag .getUuid (), now ));
75+ }
76+ }
77+ datasetFieldDao .updateTags (datasetFieldTags );
78+
5979 Version version = Utils .newDatasetVersionFor (namespaceName , datasetName , datasetMeta );
6080 UUID newDatasetVersionUuid = UUID .randomUUID ();
81+ UUID datasetSchemaVersionUuid =
82+ createDatasetSchemaVersionDao ()
83+ .upsertSchemaVersion (datasetRow , datasetFields , now )
84+ .getValue ();
6185 DatasetVersionRow datasetVersionRow =
6286 upsert (
6387 newDatasetVersionUuid ,
6488 now ,
65- datasetUuid ,
89+ datasetRow . getUuid () ,
6690 version .getValue (),
91+ datasetSchemaVersionUuid ,
6792 datasetMeta .getRunId ().map (RunId ::getValue ).orElse (null ),
6893 toPgObjectFields (datasetMeta .getFields ()),
6994 namespaceName ,
@@ -84,29 +109,13 @@ default DatasetVersionRow upsertDatasetVersion(
84109 }
85110
86111 List <DatasetFieldMapping > datasetFieldMappings = new ArrayList <>();
87- List <DatasetFieldTag > datasetFieldTag = new ArrayList <>();
88-
89- for (Field field : datasetMeta .getFields ()) {
90- DatasetFieldRow datasetFieldRow =
91- datasetFieldDao .upsert (
92- UUID .randomUUID (),
93- now ,
94- field .getName ().getValue (),
95- field .getType (),
96- field .getDescription ().orElse (null ),
97- datasetUuid );
98- for (TagName tagName : field .getTags ()) {
99- TagRow tag = tagDao .upsert (UUID .randomUUID (), now , tagName .getValue ());
100- datasetFieldTag .add (new DatasetFieldTag (datasetFieldRow .getUuid (), tag .getUuid (), now ));
101- }
112+ for (DatasetFieldRow datasetFieldRow : datasetFields ) {
102113 datasetFieldMappings .add (
103114 new DatasetFieldMapping (datasetVersionRow .getUuid (), datasetFieldRow .getUuid ()));
104115 }
105-
106116 datasetFieldDao .updateFieldMapping (datasetFieldMappings );
107- datasetFieldDao .updateTags (datasetFieldTag );
108117
109- createDatasetDao ().updateVersion (datasetUuid , now , datasetVersionRow .getUuid ());
118+ createDatasetDao ().updateVersion (datasetRow . getUuid () , now , datasetVersionRow .getUuid ());
110119 return datasetVersionRow ;
111120 }
112121
@@ -166,8 +175,8 @@ WITH selected_dataset_versions AS (
166175 LEFT JOIN dataset_facets_view df ON df.dataset_version_uuid = dv.uuid
167176 )
168177 SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,\s
169- dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location ,
170- t.tags, f.facets
178+ dv.created_at, dv.version, dv.dataset_schema_version_uuid, dv. fields, dv.run_uuid AS createdByRunUuid,
179+ sv.schema_location, t.tags, f.facets
171180 FROM selected_dataset_versions dv
172181 LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid
173182 LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid
@@ -197,8 +206,8 @@ WITH selected_dataset_versions AS (
197206 LEFT JOIN dataset_facets_view df ON df.dataset_version_uuid = dv.uuid AND (df.type ILIKE 'dataset' OR df.type ILIKE 'unknown' OR df.type ILIKE 'input')
198207 )
199208 SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,\s
200- dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location ,
201- t.tags, f.facets
209+ dv.created_at, dv.version, dv.dataset_schema_version_uuid, dv. fields, dv.run_uuid AS createdByRunUuid,
210+ sv.schema_location, t.tags, f.facets
202211 FROM selected_dataset_versions dv
203212 LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid
204213 LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid
@@ -246,8 +255,8 @@ default Optional<DatasetVersion> findByWithRun(UUID version) {
246255 """
247256 WITH dataset_info AS (
248257 SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,
249- dv.created_at, dv.version, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location ,
250- t.tags, f.facets, f.lineage_event_time, f.dataset_version_uuid, facet_name
258+ dv.created_at, dv.version, dv.dataset_schema_version_uuid, dv. fields, dv.run_uuid AS createdByRunUuid,
259+ sv.schema_location, t.tags, f.facets, f.lineage_event_time, f.dataset_version_uuid, facet_name
251260 FROM dataset_versions dv
252261 LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid
253262 LEFT JOIN stream_versions AS sv ON sv.dataset_version_uuid = dv.uuid
@@ -272,12 +281,12 @@ LEFT JOIN (
272281 )
273282 SELECT
274283 type, name, physical_name, namespace_name, source_name, description, lifecycle_state,
275- created_at, version, fields, createdByRunUuid, schema_location,
284+ created_at, version, dataset_schema_version_uuid, fields, createdByRunUuid, schema_location,
276285 tags, dataset_version_uuid,
277286 JSONB_AGG(facets ORDER BY lineage_event_time ASC) AS facets
278287 FROM dataset_info
279288 GROUP BY type, name, physical_name, namespace_name, source_name, description, lifecycle_state,
280- created_at, version, fields, createdByRunUuid, schema_location,
289+ created_at, version, dataset_schema_version_uuid, fields, createdByRunUuid, schema_location,
281290 tags, dataset_version_uuid
282291 ORDER BY created_at DESC
283292 """ )
@@ -302,9 +311,9 @@ default List<DatasetVersion> findAllWithRun(
302311
303312 @ SqlQuery (
304313 "INSERT INTO dataset_versions "
305- + "(uuid, created_at, dataset_uuid, version, run_uuid, fields, namespace_name, dataset_name, lifecycle_state) "
314+ + "(uuid, created_at, dataset_uuid, version, dataset_schema_version_uuid, run_uuid, fields, namespace_name, dataset_name, lifecycle_state) "
306315 + "VALUES "
307- + "(:uuid, :now, :datasetUuid, :version, :runUuid, :fields, :namespaceName, :datasetName, :lifecycleState) "
316+ + "(:uuid, :now, :datasetUuid, :version, :schemaVersionUuid, : runUuid, :fields, :namespaceName, :datasetName, :lifecycleState) "
308317 + "ON CONFLICT(version) "
309318 + "DO UPDATE SET "
310319 + "run_uuid = EXCLUDED.run_uuid "
@@ -314,6 +323,7 @@ DatasetVersionRow upsert(
314323 Instant now ,
315324 UUID datasetUuid ,
316325 UUID version ,
326+ UUID schemaVersionUuid ,
317327 UUID runUuid ,
318328 PGobject fields ,
319329 String namespaceName ,
0 commit comments