diff --git a/doc/release-notes/12346-DataCite-xml-updates.md b/doc/release-notes/12346-DataCite-xml-updates.md new file mode 100644 index 00000000000..4ad541925ee --- /dev/null +++ b/doc/release-notes/12346-DataCite-xml-updates.md @@ -0,0 +1,9 @@ +This release updates the DataCite XML metadata format to +- indicate compliance with the version 4.7 schema, +- add support for specifying a 'Translator' contributor, +- add a valueURI attribute to a subject element when a value exists in the keywordTermURI field, +- add a language element when a dataset has one language defined in its Citation block metadata, +- accept dates of the form YYYY or YYYY-MM in the timePeriodCovered and dateOfCollection fields, and +- avoids sending the word 'null' as part of a date range when the start or end date is unspecified. + +As it adds Translator to the contributorTypes allowed in the citation block, people would have to reload the block to get the new option. \ No newline at end of file diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index b6bed2b9c5b..b0d2fefef37 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -129,8 +129,9 @@ contributorType Research Group 11 contributorType Rights Holder 12 contributorType Sponsor 13 - contributorType Supervisor 14 - contributorType Work Package Leader 15 + contributorType Translator 14 + contributorType Supervisor 15 + contributorType Work Package Leader 16 contributorType Other 16 authorIdentifierScheme ORCID 0 authorIdentifierScheme ROR 1 diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 1d14b89e11a..b10690abab1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -7,8 +7,10 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; -import java.text.ParseException; -import java.text.SimpleDateFormat; +import java.time.LocalDate; +import java.time.Year; +import java.time.YearMonth; +import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; @@ -24,31 +26,16 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; +import edu.harvard.iq.dataverse.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.StringEscapeUtils; import org.ocpsoft.common.util.Strings; -import edu.harvard.iq.dataverse.AlternativePersistentIdentifier; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetAuthor; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetRelPublication; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.ExternalIdentifier; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.dataset.DatasetType; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; -import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; @@ -67,9 +54,9 @@ public class XmlMetadataTemplate { private static final Logger logger = Logger.getLogger(XmlMetadataTemplate.class.getName()); public static final String XML_NAMESPACE = "http://datacite.org/schema/kernel-4"; - public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.7/metadata.xsd"; public static final String XML_XSI = "http://www.w3.org/2001/XMLSchema-instance"; - public static final String XML_SCHEMA_VERSION = "4.5"; + public static final String XML_SCHEMA_VERSION = "4.7"; private DoiMetadata doiMetadata; @@ -359,6 +346,7 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt String keyword = null; String scheme = null; String schemeUri = null; + String valueUri = null; for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { @@ -371,6 +359,9 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt case DatasetFieldConstant.keywordVocabURI: schemeUri = subField.getValue(); break; + case DatasetFieldConstant.keywordTermURI: + valueUri = subField.getValue(); + break; } } if (StringUtils.isNotBlank(keyword)) { @@ -381,6 +372,9 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt if (StringUtils.isNotBlank(schemeUri)) { attributesMap.put("schemeURI", schemeUri); } + if (StringUtils.isNotBlank(valueUri)) { + attributesMap.put("valueURI", valueUri); + } subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(keyword)); } @@ -425,7 +419,8 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt * 7, Contributor (with optional given name, family name, name identifier and * affiliation sub-properties) * - * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, + * @see #writeEntityElements(javax.xml.stream.XMLStreamWriter, + * java.lang.String, java.lang.String, jakarta.json.JsonObject, * java.lang.String, java.lang.String, java.lang.String) * * @param xmlw @@ -570,7 +565,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X //List from https://schema.datacite.org/meta/kernel-4/include/datacite-contributorType-v4.xsd private Set contributorTypes = new HashSet<>(Arrays.asList("ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", "Editor", "HostingInstitution", "Other", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", "RegistrationAuthority", - "RelatedPerson", "ResearchGroup", "RightsHolder", "Researcher", "Sponsor", "Supervisor", "WorkPackageLeader")); + "RelatedPerson", "ResearchGroup", "RightsHolder", "Researcher", "Sponsor", "Supervisor", "Translator", "WorkPackageLeader")); private String getCanonicalContributorType(String contributorType) { if(StringUtils.isBlank(contributorType) || !contributorTypes.contains(contributorType)) { @@ -758,17 +753,17 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea for (DatasetField subField : collectionDateFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { case DatasetFieldConstant.dateOfCollectionStart: - startDate = subField.getValue(); + startDate = subField.getValue().trim(); break; case DatasetFieldConstant.dateOfCollectionEnd: - endDate = subField.getValue(); + endDate = subField.getValue().trim(); break; } } - // Minimal clean-up - useful? Parse/format would remove unused chars, and an - // exception would clear the date so we don't send nonsense - startDate = cleanUpDate(startDate); - endDate = cleanUpDate(endDate); + // Verify valid date format + + startDate = isValidYearMonthOrDay(startDate) ? startDate:""; + endDate = isValidYearMonthOrDay(endDate) ? endDate:""; if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); @@ -784,17 +779,16 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea for (DatasetField subField : timePeriodFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { case DatasetFieldConstant.timePeriodCoveredStart: - startDate = subField.getValue(); + startDate = subField.getValue().trim(); break; case DatasetFieldConstant.timePeriodCoveredEnd: - endDate = subField.getValue(); + endDate = subField.getValue().trim(); break; } } - // Minimal clean-up - useful? Parse/format would remove unused chars, and an - // exception would clear the date so we don't send nonsense - startDate = cleanUpDate(startDate); - endDate = cleanUpDate(endDate); + // Verify valid date format + startDate = isValidYearMonthOrDay(startDate) ? startDate:""; + endDate = isValidYearMonthOrDay(endDate) ? endDate:""; if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Other"); @@ -808,26 +802,67 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } - private String cleanUpDate(String date) { - String newDate = null; - if (!StringUtils.isBlank(date)) { - try { - SimpleDateFormat sdf = Util.getDateFormat(); - Date start = sdf.parse(date); - newDate = sdf.format(start); - } catch (ParseException e) { - logger.warning("Could not parse date: " + date); + /** Checks for yyyy, yyyy-MM, or yyyy-MM-dd format + * @param value + * @return true if valid date format, false otherwise + */ + private boolean isValidYearMonthOrDay(String value) { + if (StringUtils.isBlank(value)) { + return false; + } + + try { + if (value.matches("\\d{4}")) { + Year.parse(value); + return true; + } + + if (value.matches("\\d{4}-\\d{2}")) { + YearMonth.parse(value); + return true; } + + if (value.matches("\\d{4}-\\d{2}-\\d{2}")) { + LocalDate.parse(value); + return true; + } + } catch (DateTimeParseException e) { + return false; } - return newDate; + + return false; } // 9, Language (MA), language private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - // Currently not supported. Spec indicates one 'primary' language. Could send - // the first entry in DatasetFieldConstant.language or send iff there is only - // one entry, and/or default to the machine's default lang, or the dataverse metadatalang? + // Spec indicates one 'primary' language. Sending a language iff there is only + // one citation mdb language entry (Could send first entry if there are several and/or default to the machine's default lang, or use the dataset's metadatalang?) + if (dvObject.isInstanceofDataFile()) { + dvObject = dvObject.getOwner(); + } + if (!(dvObject instanceof Dataset dataset)) { + return; + } + + DatasetVersion dv = dataset.getLatestVersionForCopy(); + if (dv == null) { + return; + } + Optional dsf = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.language)).findFirst(); + if (dsf.isPresent()) { + String languageIdentifier = null; + List controlledVocabularyValues = dsf.get().getControlledVocabularyValues(); + if (controlledVocabularyValues != null && controlledVocabularyValues.size() == 1) { + ControlledVocabularyValue cvv = controlledVocabularyValues.get(0); + languageIdentifier = cvv.getIdentifier(); + } + // 'Not applicable' has no identifier - we want to skip it. + if (StringUtils.isNotBlank(languageIdentifier)) { + XmlWriterUtil.writeFullElement(xmlw, "language", StringEscapeUtils.escapeXml10(languageIdentifier)); + } + } return; + } // 10, ResourceType (with mandatory general type @@ -1569,7 +1604,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr funder = jo.getString("termName"); } } - + xmlw.writeStartElement("fundingReference"); // XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (isROR) { diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index 00f25740a19..5e6c3dd56f4 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -295,6 +295,7 @@ controlledvocabulary.contributorType.research_group=Research Group controlledvocabulary.contributorType.rights_holder=Rights Holder controlledvocabulary.contributorType.sponsor=Sponsor controlledvocabulary.contributorType.supervisor=Supervisor +controlledvocabulary.contributorType.translator=Translator controlledvocabulary.contributorType.work_package_leader=Work Package Leader controlledvocabulary.contributorType.other=Other controlledvocabulary.authorIdentifierScheme.orcid=ORCID diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index 14d8dfb13a8..1855553c1b6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -164,8 +164,85 @@ public void testDataCiteXMLCreation() throws IOException { testDatasetField.setDatasetVersion(dv); testDatasetField.setDatasetFieldType(primitiveDSFType); testDatasetField.setSingleValue("First Title"); + + DatasetFieldType keywordType = new DatasetFieldType(DatasetFieldConstant.keyword, FieldType.NONE, true); + DatasetFieldType keywordValueType = new DatasetFieldType(DatasetFieldConstant.keywordValue, FieldType.TEXT, false); + DatasetFieldType keywordTermURIType = new DatasetFieldType(DatasetFieldConstant.keywordTermURI, FieldType.URL, false); + + DatasetField keywordField = new DatasetField(); + keywordField.setDatasetVersion(dv); + keywordField.setDatasetFieldType(keywordType); + + DatasetFieldCompoundValue compoundValue = new DatasetFieldCompoundValue(); + compoundValue.setParentDatasetField(keywordField); + + DatasetField valField = new DatasetField(); + valField.setDatasetFieldType(keywordValueType); + DatasetFieldValue val = new DatasetFieldValue(); + val.setDatasetField(valField); + val.setValue("Keyword1"); + valField.setDatasetFieldValues(new ArrayList<>(List.of(val))); + + DatasetField uriField = new DatasetField(); + uriField.setDatasetFieldType(keywordTermURIType); + DatasetFieldValue uriVal = new DatasetFieldValue(); + uriVal.setDatasetField(uriField); + uriVal.setValue("https://example.com/keyword1"); + uriField.setDatasetFieldValues(new ArrayList<>(List.of(uriVal))); + + compoundValue.setChildDatasetFields(new ArrayList<>(List.of(valField, uriField))); + keywordField.setDatasetFieldCompoundValues(new ArrayList<>(List.of(compoundValue))); + List fields = new ArrayList<>(); fields.add(testDatasetField); + fields.add(keywordField); + + DatasetFieldType contributorTypeFieldType = new DatasetFieldType(DatasetFieldConstant.contributor, + DatasetFieldType.FieldType.TEXT, false); + DatasetFieldType contributorNameFieldType = new DatasetFieldType(DatasetFieldConstant.contributorName, + DatasetFieldType.FieldType.TEXT, false); + DatasetFieldType contributorRoleFieldType = new DatasetFieldType(DatasetFieldConstant.contributorType, + DatasetFieldType.FieldType.TEXT, false); + + DatasetField translatorField = new DatasetField(); + translatorField.setDatasetVersion(dv); + translatorField.setDatasetFieldType(contributorTypeFieldType); + DatasetFieldCompoundValue translatorValue = new DatasetFieldCompoundValue(); + + DatasetField translatorName = new DatasetField(); + translatorName.setDatasetVersion(dv); + translatorName.setDatasetFieldType(contributorNameFieldType); + translatorName.setSingleValue("Translator Name"); + + DatasetField translatorRole = new DatasetField(); + translatorRole.setDatasetVersion(dv); + translatorRole.setDatasetFieldType(contributorRoleFieldType); + translatorRole.setSingleValue("Translator"); + + List translatorChildren = new ArrayList<>(); + translatorChildren.add(translatorName); + translatorChildren.add(translatorRole); + translatorValue.setChildDatasetFields(translatorChildren); + + List translatorValues = new ArrayList<>(); + translatorValues.add(translatorValue); + translatorField.setDatasetFieldCompoundValues(translatorValues); + fields.add(translatorField); + + DatasetFieldType languageFieldType = new DatasetFieldType(DatasetFieldConstant.language, + DatasetFieldType.FieldType.TEXT, false); + DatasetField languageField = new DatasetField(); + languageField.setDatasetVersion(dv); + languageField.setDatasetFieldType(languageFieldType); + languageField.setSingleValue("en"); + ControlledVocabularyValue languageCvv = new ControlledVocabularyValue(); + languageCvv.setId(1L); + languageCvv.setIdentifier("en"); + languageCvv.setStrValue("English"); + languageCvv.setDatasetFieldType(languageFieldType); + languageField.setControlledVocabularyValues(List.of(languageCvv)); + fields.add(languageField); + dv.setDatasetFields(fields); ArrayList dsvs = new ArrayList<>(); dsvs.add(0, dv); @@ -200,6 +277,11 @@ public void testDataCiteXMLCreation() throws IOException { assertEquals("ROR", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@nameIdentifierScheme")); assertEquals("https://ror.org", XmlPath.from(xml).getString("resource.creators.creator[3].nameIdentifier.@schemeURI")); assertEquals("Dataverse", XmlPath.from(xml).getString("resource.publisher")); + assertEquals("Keyword1", XmlPath.from(xml).getString("resource.subjects.subject")); + assertEquals("https://example.com/keyword1", XmlPath.from(xml).getString("resource.subjects.subject.@valueURI")); + assertEquals("Translator", XmlPath.from(xml).getString("resource.contributors.contributor[0].@contributorType")); + assertEquals("Translator Name", XmlPath.from(xml).getString("resource.contributors.contributor[0].contributorName")); + assertEquals("en", XmlPath.from(xml).getString("resource.language")); dv.setVersionNumber(1L); dv.setMinorVersionNumber(0l); @@ -260,7 +342,8 @@ public void testDataCiteXMLCreationAllFields() throws IOException { d.setDatasetType(dType); String xml = DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), new DataCitation(dv).getDataCiteMetadata(), dv.getDataset()); - System.out.println("Output from dataset-all-defaults is " + xml); + assertTrue(xml.contains("valueURI=\"http://keywordTermURI1.org\"")); + assertTrue(xml.contains("valueURI=\"http://keywordTermURI2.org\"")); try { StreamSource source = new StreamSource(new StringReader(xml)); source.setSystemId("DataCite XML for test dataset");