Skip to content

Commit 3ac96f3

Browse files
committed
Register unversioned DOI alongside versioned on Crossref submissions
1 parent 57319df commit 3ac96f3

4 files changed

Lines changed: 140 additions & 5 deletions

File tree

api/crossref/views.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from api.crossref.permissions import RequestComesFromMailgun
99
from osf.models import Preprint, NotificationTypeEnum
10+
from osf.models.base import Guid
1011
from website import settings
1112
from website.preprints.tasks import mint_doi_on_crossref_fail
1213

@@ -48,6 +49,12 @@ def post(self, request):
4849
if record.get('status').lower() == 'success' and doi:
4950
msg = record.find('msg').text
5051
created = bool(msg == 'Successfully added')
52+
_, version = Guid.split_guid(guid) if guid else (None, None)
53+
if not version:
54+
logger.info(f'Unversioned DOI confirmed by CrossRef (no identifier update needed): {doi}')
55+
dois_processed += 1
56+
continue
57+
5158
legacy_doi = preprint.get_identifier(category='legacy_doi')
5259
if created or legacy_doi:
5360
# Sets preprint_doi_created and saves the preprint

api_tests/crossref/views/test_crossref_email_response.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,46 @@ def test_confirmation_marks_legacy_doi_as_deleted(self, app, url, preprint):
220220
app.post(url, context_data)
221221

222222
assert preprint.identifiers.get(category='legacy_doi').deleted
223+
224+
def test_unversioned_doi_confirmation_skips_identifier_update(self, app, url, preprint):
225+
versioned_doi = settings.DOI_FORMAT.format(
226+
prefix=preprint.provider.doi_prefix, guid=preprint._id
227+
)
228+
preprint.set_identifier_value(category='doi', value=versioned_doi)
229+
230+
base_guid = preprint.get_guid()._id # no _vN suffix
231+
unversioned_doi = settings.DOI_FORMAT.format(
232+
prefix=preprint.provider.doi_prefix, guid=base_guid
233+
)
234+
dual_confirmation_xml = """
235+
<?xml version="1.0" encoding="UTF-8"?>
236+
<doi_batch_diagnostic status="completed" sp="cs3.crossref.org">
237+
<submission_id>1390675999</submission_id>
238+
<batch_id>{batch_id}</batch_id>
239+
<record_diagnostic status="Success">
240+
<doi>{versioned_doi}</doi>
241+
<msg>Successfully updated</msg>
242+
</record_diagnostic>
243+
<record_diagnostic status="Success">
244+
<doi>{unversioned_doi}</doi>
245+
<msg>Successfully added</msg>
246+
</record_diagnostic>
247+
<batch_data>
248+
<record_count>2</record_count>
249+
<success_count>2</success_count>
250+
<warning_count>0</warning_count>
251+
<failure_count>0</failure_count>
252+
</batch_data>
253+
</doi_batch_diagnostic>
254+
""".format(
255+
batch_id=preprint._id,
256+
versioned_doi=versioned_doi,
257+
unversioned_doi=unversioned_doi,
258+
)
259+
260+
context_data = self.make_mailgun_payload(crossref_response=dual_confirmation_xml)
261+
with capture_notifications(expect_none=True):
262+
app.post(url, context_data)
263+
264+
preprint.reload()
265+
assert preprint.get_identifier_value('doi') == versioned_doi

tests/identifiers/test_crossref.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,18 @@ def test_crossref_build_doi(self, crossref_client, preprint):
9595

9696
assert crossref_client.build_doi(preprint) == settings.DOI_FORMAT.format(prefix=doi_prefix, guid=preprint._id)
9797

98+
def test_crossref_build_unversioned_doi(self, crossref_client, preprint):
99+
doi_prefix = preprint.provider.doi_prefix
100+
base_guid = preprint.get_guid()._id
101+
102+
expected = settings.DOI_FORMAT.format(prefix=doi_prefix, guid=base_guid)
103+
assert crossref_client.build_unversioned_doi(preprint) == expected
104+
assert '_v' not in expected
105+
106+
def test_crossref_build_unversioned_doi_matches_base_guid_not_versioned(self, crossref_client, preprint, preprint_version):
107+
assert crossref_client.build_doi(preprint_version) != crossref_client.build_unversioned_doi(preprint_version)
108+
assert crossref_client.build_unversioned_doi(preprint) == crossref_client.build_unversioned_doi(preprint_version)
109+
98110
def test_crossref_build_doi_versioned(self, crossref_client, preprint_version):
99111
doi_prefix = preprint_version.provider.doi_prefix
100112

@@ -338,6 +350,57 @@ def test_metadata_for_non_included_relation(self, crossref_client, preprint):
338350
root_without_relation = lxml.etree.fromstring(xml_without_relation)
339351
assert root_without_relation.find('.//{%s}intra_work_relation' % crossref.CROSSREF_RELATIONS) is None
340352

353+
def test_metadata_includes_unversioned_doi_entry(self, crossref_client, preprint):
354+
crossref_xml = crossref_client.build_metadata(preprint, include_unversioned_doi=True)
355+
root = lxml.etree.fromstring(crossref_xml)
356+
357+
posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
358+
assert len(posted_contents) == 2
359+
360+
versioned_dois = posted_contents[0].findall('.//{%s}doi' % crossref.CROSSREF_NAMESPACE)
361+
assert len(versioned_dois) == 1
362+
assert versioned_dois[0].text == crossref_client.build_doi(preprint)
363+
364+
unversioned_dois = posted_contents[1].findall('.//{%s}doi' % crossref.CROSSREF_NAMESPACE)
365+
assert len(unversioned_dois) == 1
366+
assert unversioned_dois[0].text == crossref_client.build_unversioned_doi(preprint)
367+
assert '_v' not in unversioned_dois[0].text
368+
369+
unversioned_resource = posted_contents[1].find('.//{%s}resource' % crossref.CROSSREF_NAMESPACE)
370+
base_guid = preprint.get_guid()._id
371+
assert unversioned_resource.text == settings.DOMAIN + base_guid
372+
373+
def test_metadata_unversioned_doi_uses_latest_version_metadata(self, crossref_client, preprint, preprint_version):
374+
crossref_xml = crossref_client.build_metadata(preprint, include_unversioned_doi=True)
375+
root = lxml.etree.fromstring(crossref_xml)
376+
377+
posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
378+
assert len(posted_contents) == 2
379+
380+
unversioned_resource = posted_contents[1].find('.//{%s}resource' % crossref.CROSSREF_NAMESPACE)
381+
base_guid = preprint.get_guid()._id
382+
assert unversioned_resource.text == settings.DOMAIN + base_guid
383+
384+
def test_metadata_unversioned_doi_has_no_version_relations(self, crossref_client, preprint, preprint_version):
385+
crossref_xml = crossref_client.build_metadata(preprint_version, include_unversioned_doi=True)
386+
root = lxml.etree.fromstring(crossref_xml)
387+
388+
posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
389+
unversioned_content = posted_contents[1]
390+
391+
relations = unversioned_content.findall('.//{%s}intra_work_relation' % crossref.CROSSREF_RELATIONS)
392+
is_version_of_relations = [
393+
r for r in relations if r.get('relationship-type') == 'isVersionOf'
394+
]
395+
assert len(is_version_of_relations) == 0
396+
397+
def test_metadata_bulk_does_not_include_unversioned_doi(self, crossref_client, preprint, preprint_version):
398+
crossref_xml = crossref_client.build_metadata([preprint, preprint_version])
399+
root = lxml.etree.fromstring(crossref_xml)
400+
401+
posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
402+
assert len(posted_contents) == 2 # one per preprint, no extras
403+
341404
def test_metadata_for_affiliated_institutions(self, crossref_client, preprint):
342405
institution = InstitutionFactory()
343406
institution.ror_uri = 'http://ror.org/WHATisITgoodFOR/'

website/identifiers/clients/crossref.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,18 @@ def build_doi(self, preprint):
4141
prefix = preprint.provider.doi_prefix
4242
return settings.DOI_FORMAT.format(prefix=prefix, guid=preprint._id)
4343

44-
def build_metadata(self, preprint, include_relation=True):
44+
def build_unversioned_doi(self, preprint):
45+
prefix = preprint.provider.doi_prefix
46+
return settings.DOI_FORMAT.format(prefix=prefix, guid=preprint.get_guid()._id)
47+
48+
def build_metadata(self, preprint, include_relation=True, include_unversioned_doi=False):
4549
"""Return the crossref metadata XML document for a given preprint as a string for DOI minting purposes
4650
4751
:param preprint: the preprint, or list of preprints to build metadata for
4852
"""
4953
if isinstance(preprint, (list, QuerySet)):
5054
preprints = preprint
55+
include_unversioned_doi = False # not supported for bulk batches
5156
else:
5257
preprints = [preprint]
5358

@@ -74,6 +79,9 @@ def build_metadata(self, preprint, include_relation=True):
7479
for preprint in preprints:
7580
body.append(self.build_posted_content(preprint, element, include_relation))
7681

82+
if include_unversioned_doi:
83+
body.append(self.build_unversioned_posted_content(preprints[0], element))
84+
7785
root = element.doi_batch(
7886
head,
7987
body,
@@ -82,10 +90,12 @@ def build_metadata(self, preprint, include_relation=True):
8290
root.attrib['{%s}schemaLocation' % XSI] = CROSSREF_SCHEMA_LOCATION
8391
return lxml.etree.tostring(root)
8492

85-
def build_posted_content(self, preprint, element, include_relation):
93+
def build_posted_content(self, preprint, element, include_relation, doi_override=None, resource_override=None):
8694
"""Build the <posted_content> element for a single preprint
8795
preprint - preprint to build posted_content for
8896
element - namespace element to use when building parts of the XML structure
97+
doi_override - if provided, use this DOI value instead of the preprint's own DOI
98+
resource_override - if provided, use this URL as the <resource> instead of the default
8999
"""
90100
from osf.models import SpamStatus
91101

@@ -157,15 +167,27 @@ def build_posted_content(self, preprint, element, include_relation):
157167
posted_content.append(relations_program)
158168

159169
minted_doi = preprint.get_identifier_value('doi')
160-
doi = minted_doi or self.build_doi(preprint)
170+
doi = doi_override or minted_doi or self.build_doi(preprint)
171+
resource_url = resource_override if resource_override is not None else settings.DOMAIN + preprint._id
161172
doi_data = [
162173
element.doi(doi),
163-
element.resource(settings.DOMAIN + preprint._id)
174+
element.resource(resource_url)
164175
]
165176
posted_content.append(element.doi_data(*doi_data))
166177

167178
return posted_content
168179

180+
def build_unversioned_posted_content(self, preprint, element):
181+
latest = preprint.get_guid().referent
182+
base_guid = latest.get_guid()._id
183+
return self.build_posted_content(
184+
latest,
185+
element,
186+
include_relation=False,
187+
doi_override=self.build_unversioned_doi(latest),
188+
resource_override=settings.DOMAIN + base_guid,
189+
)
190+
169191
def _process_crossref_name(self, contributor):
170192
# Adapted from logic used in `api/citations/utils.py`
171193
# If the user has a family and given name, use those
@@ -249,7 +271,7 @@ def _build_url(self, **query):
249271

250272
def create_identifier(self, preprint, category, include_relation=True):
251273
if category == 'doi':
252-
metadata = self.build_metadata(preprint, include_relation)
274+
metadata = self.build_metadata(preprint, include_relation, include_unversioned_doi=True)
253275
doi = self.build_doi(preprint)
254276
username, password = self.get_credentials()
255277
logger.info(f'Sending metadata for DOI {doi}:\n{metadata}')

0 commit comments

Comments
 (0)