Skip to content

Commit df02468

Browse files
authored
Merge pull request #14 from gdcc/13-drafts
handle drafts
2 parents bc17752 + 1011d4e commit df02468

10 files changed

Lines changed: 560 additions & 4 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ venv
44
src/test/resources/minimal/out/croissant.json
55
src/test/resources/max/out/croissant.json
66
src/test/resources/cars/out/croissant.json
7+
src/test/resources/draft/out/croissant.json
78
release.properties
89
pom.xml.releaseBackup
910
.vscode

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
0.1.4
22
=====
33

4+
- Handle drafts. See https://github.com/gdcc/exporter-croissant/pull/14
45
- Switch to parent pom. See https://github.com/gdcc/exporter-croissant/issues/5
56
- Tweaked spotless (code formatting) config. See https://github.com/gdcc/exporter-croissant/pull/12
67

src/main/java/io/gdcc/spi/export/croissant/CroissantExporter.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,10 @@ public void exportDataset(ExportDataProvider dataProvider, OutputStream outputSt
151151
job.add("description", datasetSchemaDotOrg.getJsonString("description"));
152152
job.add("keywords", datasetSchemaDotOrg.getJsonArray("keywords"));
153153
job.add("license", datasetSchemaDotOrg.getString("license"));
154-
job.add("datePublished", datasetSchemaDotOrg.getString("datePublished"));
154+
String datePublished = datasetSchemaDotOrg.getString("datePublished", null);
155+
if (datePublished != null) {
156+
job.add("datePublished", datasetSchemaDotOrg.getString("datePublished"));
157+
}
155158
job.add("dateModified", datasetSchemaDotOrg.getString("dateModified"));
156159
job.add(
157160
"includedInDataCatalog",
@@ -388,7 +391,11 @@ private String getBibtex(
388391
String identifier = datasetJson.getString("identifier");
389392

390393
JsonObject oreDescribes = datasetORE.getJsonObject("ore:describes");
391-
String publicationYear = oreDescribes.getString("schema:datePublished").substring(0, 4);
394+
String publicationYear = null;
395+
String publicationDate = oreDescribes.getString("schema:datePublished", null);
396+
if (publicationDate != null) {
397+
publicationYear = publicationDate.substring(0, 4);
398+
}
392399

393400
JsonArray creatorArray = datasetSchemaDotOrg.getJsonArray("creator");
394401
List<String> creators = new ArrayList<>();
@@ -403,11 +410,17 @@ private String getBibtex(
403410
String pidAsUrl = oreDescribes.getString("@id");
404411

405412
StringBuilder sb = new StringBuilder();
406-
sb.append("@data{").append(identifier).append("_").append(publicationYear).append(",");
413+
if (publicationYear != null) {
414+
sb.append("@data{").append(identifier).append("_").append(publicationYear).append(",");
415+
} else {
416+
sb.append("@data{").append(identifier).append(",");
417+
}
407418
sb.append("author = {").append(creatorsFormatted).append("},");
408419
sb.append("publisher = {").append(publisher).append("},");
409420
sb.append("title = {").append(title).append("},");
410-
sb.append("year = {").append(publicationYear).append("},");
421+
if (publicationYear != null) {
422+
sb.append("year = {").append(publicationYear).append("},");
423+
}
411424
sb.append("url = {").append(pidAsUrl).append("}");
412425
sb.append("}");
413426
return sb.toString();

src/test/java/io/gdcc/spi/export/croissant/CroissantExporterTest.java

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ public class CroissantExporterTest {
3838
static ExportDataProvider dataProviderCars;
3939
static OutputStream outputStreamJunk;
4040
static ExportDataProvider dataProviderJunk;
41+
static OutputStream outputStreamDraft;
42+
static ExportDataProvider dataProviderDraft;
4143

4244
@BeforeAll
4345
public static void setUp() {
@@ -285,6 +287,67 @@ public String getDataCiteXml() {
285287
}
286288
}
287289
};
290+
291+
outputStreamDraft = new ByteArrayOutputStream();
292+
dataProviderDraft =
293+
new ExportDataProvider() {
294+
@Override
295+
public JsonObject getDatasetJson() {
296+
String pathToJsonFile = "src/test/resources/draft/in/datasetJson.json";
297+
try (JsonReader jsonReader =
298+
Json.createReader(new FileReader(pathToJsonFile))) {
299+
return jsonReader.readObject();
300+
} catch (FileNotFoundException ex) {
301+
return null;
302+
}
303+
}
304+
305+
@Override
306+
public JsonObject getDatasetORE() {
307+
String pathToJsonFile = "src/test/resources/draft/in/datasetORE.json";
308+
try (JsonReader jsonReader =
309+
Json.createReader(new FileReader(pathToJsonFile))) {
310+
return jsonReader.readObject();
311+
} catch (FileNotFoundException ex) {
312+
return null;
313+
}
314+
}
315+
316+
@Override
317+
public JsonArray getDatasetFileDetails() {
318+
String pathToJsonFile =
319+
"src/test/resources/draft/in/datasetFileDetails.json";
320+
try (JsonReader jsonReader =
321+
Json.createReader(new FileReader(pathToJsonFile))) {
322+
return jsonReader.readArray();
323+
} catch (FileNotFoundException ex) {
324+
return null;
325+
}
326+
}
327+
328+
@Override
329+
public JsonObject getDatasetSchemaDotOrg() {
330+
String pathToJsonFile =
331+
"src/test/resources/draft/in/datasetSchemaDotOrg.json";
332+
try (JsonReader jsonReader =
333+
Json.createReader(new FileReader(pathToJsonFile))) {
334+
return jsonReader.readObject();
335+
} catch (FileNotFoundException ex) {
336+
return null;
337+
}
338+
}
339+
340+
@Override
341+
public String getDataCiteXml() {
342+
try {
343+
return Files.readString(
344+
Paths.get("src/test/resources/draft/in/dataCiteXml.xml"),
345+
StandardCharsets.UTF_8);
346+
} catch (IOException ex) {
347+
return null;
348+
}
349+
}
350+
};
288351
}
289352

290353
@Test
@@ -394,6 +457,19 @@ public void testExportDatasetJunk() throws Exception {
394457
assertEquals(prettyPrint(expected), prettyPrint(outputStreamJunk.toString()));
395458
}
396459

460+
@Test
461+
public void testExportDatasetDraft() throws Exception {
462+
exporter.exportDataset(dataProviderDraft, outputStreamDraft);
463+
String actual = outputStreamDraft.toString();
464+
writeCroissantFile(actual, "draft");
465+
String expected =
466+
Files.readString(
467+
Paths.get("src/test/resources/draft/expected/draft-croissant.json"),
468+
StandardCharsets.UTF_8);
469+
JSONAssert.assertEquals(expected, actual, true);
470+
assertEquals(prettyPrint(expected), prettyPrint(outputStreamDraft.toString()));
471+
}
472+
397473
private void writeCroissantFile(String actual, String name) throws IOException {
398474
Path dir = Files.createDirectories(Paths.get("src/test/resources/" + name + "/out"));
399475
Path out = Paths.get(dir + "/croissant.json");
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"@context": {
3+
"@language": "en",
4+
"@vocab": "https://schema.org/",
5+
"citeAs": "cr:citeAs",
6+
"column": "cr:column",
7+
"conformsTo": "dct:conformsTo",
8+
"cr": "http://mlcommons.org/croissant/",
9+
"rai": "http://mlcommons.org/croissant/RAI/",
10+
"data": {
11+
"@id": "cr:data",
12+
"@type": "@json"
13+
},
14+
"dataType": {
15+
"@id": "cr:dataType",
16+
"@type": "@vocab"
17+
},
18+
"dct": "http://purl.org/dc/terms/",
19+
"examples": {
20+
"@id": "cr:examples",
21+
"@type": "@json"
22+
},
23+
"extract": "cr:extract",
24+
"field": "cr:field",
25+
"fileProperty": "cr:fileProperty",
26+
"fileObject": "cr:fileObject",
27+
"fileSet": "cr:fileSet",
28+
"format": "cr:format",
29+
"includes": "cr:includes",
30+
"isLiveDataset": "cr:isLiveDataset",
31+
"jsonPath": "cr:jsonPath",
32+
"key": "cr:key",
33+
"md5": "cr:md5",
34+
"parentField": "cr:parentField",
35+
"path": "cr:path",
36+
"recordSet": "cr:recordSet",
37+
"references": "cr:references",
38+
"regex": "cr:regex",
39+
"repeated": "cr:repeated",
40+
"replace": "cr:replace",
41+
"sc": "https://schema.org/",
42+
"separator": "cr:separator",
43+
"source": "cr:source",
44+
"subField": "cr:subField",
45+
"transform": "cr:transform",
46+
"wd": "https://www.wikidata.org/wiki/"
47+
},
48+
"@type": "sc:Dataset",
49+
"conformsTo": "http://mlcommons.org/croissant/1.0",
50+
"name": "Draft Dataset",
51+
"url": "https://doi.org/10.5072/FK2/OO7TEP",
52+
"creator": [
53+
{
54+
"@type": "Person",
55+
"givenName": "Draft",
56+
"familyName": "Punk",
57+
"affiliation": {
58+
"@type": "Organization",
59+
"name": "French house"
60+
},
61+
"name": "Punk, Draft"
62+
}
63+
],
64+
"description": "This dataset hasn't been published yet.",
65+
"keywords": [
66+
"Other"
67+
],
68+
"license": "http://creativecommons.org/publicdomain/zero/1.0",
69+
"dateModified": "",
70+
"includedInDataCatalog": {
71+
"@type": "DataCatalog",
72+
"name": "Root",
73+
"url": "http://localhost:8080"
74+
},
75+
"publisher": {
76+
"@type": "Organization",
77+
"name": "Root"
78+
},
79+
"version": "DRAFT",
80+
"citeAs": "@data{FK2/OO7TEP,author = {Punk, Draft},publisher = {Root},title = {Draft Dataset},url = {https://doi.org/10.5072/FK2/OO7TEP}}",
81+
"distribution": [
82+
{
83+
"@type": "cr:FileObject",
84+
"@id": "data.txt",
85+
"name": "data.txt",
86+
"encodingFormat": "text/plain",
87+
"md5": "050644e853fdfe46a3707695ba2fe736",
88+
"contentSize": "18",
89+
"description": "",
90+
"contentUrl": "http://localhost:8080/api/access/datafile/4"
91+
}
92+
]
93+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd">
3+
<identifier identifierType="DOI">10.5072/FK2/OO7TEP</identifier>
4+
<creators>
5+
<creator>
6+
<creatorName nameType="Personal">Punk, Draft</creatorName>
7+
<givenName>Draft</givenName>
8+
<familyName>Punk</familyName>
9+
<affiliation>French house</affiliation>
10+
</creator>
11+
</creators>
12+
<titles>
13+
<title>Draft Dataset</title>
14+
</titles>
15+
<publisher>Root</publisher>
16+
<publicationYear>2025</publicationYear>
17+
<subjects>
18+
<subject>Other</subject>
19+
</subjects>
20+
<contributors>
21+
<contributor contributorType="ContactPerson">
22+
<contributorName nameType="Personal">Admin, Dataverse</contributorName>
23+
<givenName>Dataverse</givenName>
24+
<familyName>Admin</familyName>
25+
<affiliation>Dataverse.org</affiliation>
26+
</contributor>
27+
</contributors>
28+
<dates>
29+
<date dateType="Submitted">2025-04-14</date>
30+
</dates>
31+
<resourceType resourceTypeGeneral="Dataset"/>
32+
<sizes>
33+
<size>18</size>
34+
</sizes>
35+
<formats>
36+
<format>text/plain</format>
37+
</formats>
38+
<version>DRAFT</version>
39+
<rightsList>
40+
<rights rightsURI="info:eu-repo/semantics/openAccess"/>
41+
<rights rightsURI="http://creativecommons.org/publicdomain/zero/1.0" rightsIdentifier="CC0-1.0" rightsIdentifierScheme="SPDX" schemeURI="https://spdx.org/licenses/" xml:lang="en">Creative Commons CC0 1.0 Universal Public Domain Dedication.</rights>
42+
</rightsList>
43+
<descriptions>
44+
<description descriptionType="Abstract">This dataset hasn&amp;apos;t been published yet.</description>
45+
</descriptions>
46+
</resource>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
[
2+
{
3+
"id": 4,
4+
"persistentId": "",
5+
"filename": "data.txt",
6+
"contentType": "text/plain",
7+
"friendlyType": "Plain Text",
8+
"filesize": 18,
9+
"storageIdentifier": "local://196347bdb85-7b4820f8e4ef",
10+
"rootDataFileId": -1,
11+
"md5": "050644e853fdfe46a3707695ba2fe736",
12+
"checksum": {
13+
"type": "MD5",
14+
"value": "050644e853fdfe46a3707695ba2fe736"
15+
},
16+
"tabularData": false,
17+
"creationDate": "2025-04-14",
18+
"fileAccessRequest": false,
19+
"restricted": false,
20+
"fileMetadataId": 1,
21+
"varGroups": []
22+
}
23+
]

0 commit comments

Comments
 (0)