Skip to content

Commit 2e2fe8a

Browse files
authored
Merge pull request #11607 from sh-andriy/feature/ENG-10103_Update-Datacite-client-to-ensure-ROR-compat
[ENG-10103] | feat(osf): Add DataCite client tests for ROR funder identifier support
2 parents e352e09 + a1c0a66 commit 2e2fe8a

5 files changed

Lines changed: 167 additions & 14 deletions

File tree

osf/metadata/schemas/datacite.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,9 @@
473473
"ROR",
474474
"Other"
475475
]
476+
},
477+
"schemeURI": {
478+
"$ref": "#/definitions/uri"
476479
}
477480
},
478481
"additionalProperties": false,

osf/metadata/serializers/datacite/datacite_tree_walker.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,12 @@ def _identifier_type_and_value(self, identifier: str):
195195
return ('URL', identifier)
196196
logger.warning('skipping non-IRI-shaped identifier "%s"', identifier)
197197

198-
def _funder_identifier_type(self, identifier: str):
198+
def _funder_identifier_type_and_scheme(self, identifier: str):
199199
if identifier.startswith(DxDOI) or identifier.startswith(DOI):
200-
return 'Crossref Funder ID'
200+
return ('Crossref Funder ID', 'https://www.crossref.org/services/funder-registry/')
201201
if identifier.startswith(ROR):
202-
return 'ROR'
203-
return 'Other'
202+
return ('ROR', str(ROR))
203+
return ('Other', '')
204204

205205
def _get_name_type(self, agent_iri):
206206
if (agent_iri, RDF.type, FOAF.Person) in self.basket:
@@ -312,13 +312,15 @@ def _funding_reference(self, fundrefs_el, funder, funding_award=None):
312312
_fundref_el = self.visit(fundrefs_el, 'fundingReference')
313313
self.visit(_fundref_el, 'funderName', text=next(self.basket[funder:FOAF.name], ''))
314314
_funder_identifier = next(self.basket[funder:DCTERMS.identifier], '')
315+
_funder_id_type, _funder_scheme_uri = self._funder_identifier_type_and_scheme(_funder_identifier)
316+
_funder_id_attrib = {'funderIdentifierType': _funder_id_type}
317+
if _funder_scheme_uri:
318+
_funder_id_attrib['schemeURI'] = _funder_scheme_uri
315319
self.visit(
316320
_fundref_el,
317321
'funderIdentifier',
318322
text=_funder_identifier,
319-
attrib={
320-
'funderIdentifierType': self._funder_identifier_type(_funder_identifier),
321-
},
323+
attrib=_funder_id_attrib,
322324
)
323325
if funding_award is not None:
324326
self.visit(

osf_tests/metadata/expected_metadata_files/project_full.datacite.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@
5656
"awardTitle": "because reasons",
5757
"funderIdentifier": {
5858
"funderIdentifier": "https://doi.org/10.$$$$",
59-
"funderIdentifierType": "Crossref Funder ID"
59+
"funderIdentifierType": "Crossref Funder ID",
60+
"schemeURI": "https://www.crossref.org/services/funder-registry/"
6061
},
6162
"funderName": "Mx. Moneypockets"
6263
},
@@ -68,14 +69,16 @@
6869
"awardTitle": "because reasons!",
6970
"funderIdentifier": {
7071
"funderIdentifier": "https://doi.org/10.$$$$",
71-
"funderIdentifierType": "Crossref Funder ID"
72+
"funderIdentifierType": "Crossref Funder ID",
73+
"schemeURI": "https://www.crossref.org/services/funder-registry/"
7274
},
7375
"funderName": "Mx. Moneypockets"
7476
},
7577
{
7678
"funderIdentifier": {
7779
"funderIdentifier": "https://ror.org/0example",
78-
"funderIdentifierType": "ROR"
80+
"funderIdentifierType": "ROR",
81+
"schemeURI": "https://ror.org/"
7982
},
8083
"funderName": "Caring Fan"
8184
}

osf_tests/metadata/expected_metadata_files/project_full.datacite.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,19 @@
3838
<fundingReferences>
3939
<fundingReference>
4040
<funderName>Mx. Moneypockets</funderName>
41-
<funderIdentifier funderIdentifierType="Crossref Funder ID">https://doi.org/10.$$$$</funderIdentifier>
41+
<funderIdentifier funderIdentifierType="Crossref Funder ID" schemeURI="https://www.crossref.org/services/funder-registry/">https://doi.org/10.$$$$</funderIdentifier>
4242
<awardNumber awardURI="https://moneypockets.example/millions">10000000</awardNumber>
4343
<awardTitle>because reasons</awardTitle>
4444
</fundingReference>
4545
<fundingReference>
4646
<funderName>Mx. Moneypockets</funderName>
47-
<funderIdentifier funderIdentifierType="Crossref Funder ID">https://doi.org/10.$$$$</funderIdentifier>
47+
<funderIdentifier funderIdentifierType="Crossref Funder ID" schemeURI="https://www.crossref.org/services/funder-registry/">https://doi.org/10.$$$$</funderIdentifier>
4848
<awardNumber awardURI="https://moneypockets.example/millions-more">2000000</awardNumber>
4949
<awardTitle>because reasons!</awardTitle>
5050
</fundingReference>
5151
<fundingReference>
5252
<funderName>Caring Fan</funderName>
53-
<funderIdentifier funderIdentifierType="ROR">https://ror.org/0example</funderIdentifier>
53+
<funderIdentifier funderIdentifierType="ROR" schemeURI="https://ror.org/">https://ror.org/0example</funderIdentifier>
5454
</fundingReference>
5555
</fundingReferences>
5656
<relatedIdentifiers>

tests/identifiers/test_datacite.py

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from django.utils import timezone
77

88
from framework.auth import Auth
9-
from osf.models import Outcome
9+
from osf.models import GuidMetadataRecord, Outcome
1010
from osf.utils.outcomes import ArtifactTypes
1111
from osf_tests.factories import AuthUserFactory, IdentifierFactory, RegistrationFactory
1212
from tests.base import OsfTestCase
@@ -300,6 +300,151 @@ def test_datacite_format_related_resources__ignores_inactive_resources(self, dat
300300
]
301301
_assert_unordered_list_of_dicts_equal(metadata_dict['relatedIdentifiers'], expected_relationships)
302302

303+
def _set_funding_info(self, registration, funding_info):
304+
metadata_record = GuidMetadataRecord.objects.for_guid(registration._id)
305+
metadata_record.funding_info = funding_info
306+
metadata_record.save()
307+
308+
def test_datacite_funding_references_with_ror_identifier_xml(self, registration, datacite_client):
309+
self._set_funding_info(registration, [
310+
{
311+
'funder_name': 'National Science Foundation',
312+
'funder_identifier': 'https://ror.org/021nxhr62',
313+
'funder_identifier_type': 'ROR',
314+
},
315+
])
316+
metadata_xml = datacite_client.build_metadata(registration)
317+
parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
318+
root = lxml.etree.fromstring(metadata_xml, parser=parser)
319+
ns = schema40.ns[None]
320+
321+
funding_refs = root.find(f'{{{ns}}}fundingReferences')
322+
refs = funding_refs.findall(f'{{{ns}}}fundingReference')
323+
assert len(refs) == 1
324+
325+
funder_name = refs[0].find(f'{{{ns}}}funderName')
326+
assert funder_name.text == 'National Science Foundation'
327+
328+
funder_id = refs[0].find(f'{{{ns}}}funderIdentifier')
329+
assert funder_id.text == 'https://ror.org/021nxhr62'
330+
assert funder_id.attrib['funderIdentifierType'] == 'ROR'
331+
assert funder_id.attrib['schemeURI'] == 'https://ror.org/'
332+
333+
def test_datacite_funding_references_with_ror_identifier_json(self, registration, datacite_client):
334+
self._set_funding_info(registration, [
335+
{
336+
'funder_name': 'National Science Foundation',
337+
'funder_identifier': 'https://ror.org/021nxhr62',
338+
'funder_identifier_type': 'ROR',
339+
},
340+
])
341+
metadata_dict = datacite_client.build_metadata(registration, as_xml=False)
342+
343+
funding_refs = metadata_dict['fundingReferences']
344+
assert len(funding_refs) == 1
345+
assert str(funding_refs[0]['funderName']) == 'National Science Foundation'
346+
assert funding_refs[0]['funderIdentifier']['funderIdentifier'] == 'https://ror.org/021nxhr62'
347+
assert funding_refs[0]['funderIdentifier']['funderIdentifierType'] == 'ROR'
348+
assert funding_refs[0]['funderIdentifier']['schemeURI'] == 'https://ror.org/'
349+
350+
def test_datacite_funding_references_with_crossref_funder_id(self, registration, datacite_client):
351+
self._set_funding_info(registration, [
352+
{
353+
'funder_name': 'Mx. Moneypockets',
354+
'funder_identifier': 'https://doi.org/10.13039/100000001',
355+
'funder_identifier_type': 'Crossref Funder ID',
356+
'award_number': '10000000',
357+
'award_uri': 'https://moneypockets.example/millions',
358+
'award_title': 'because reasons',
359+
},
360+
])
361+
metadata_xml = datacite_client.build_metadata(registration)
362+
parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
363+
root = lxml.etree.fromstring(metadata_xml, parser=parser)
364+
ns = schema40.ns[None]
365+
366+
funding_refs = root.find(f'{{{ns}}}fundingReferences')
367+
refs = funding_refs.findall(f'{{{ns}}}fundingReference')
368+
assert len(refs) == 1
369+
370+
funder_id = refs[0].find(f'{{{ns}}}funderIdentifier')
371+
assert funder_id.text == 'https://doi.org/10.13039/100000001'
372+
assert funder_id.attrib['funderIdentifierType'] == 'Crossref Funder ID'
373+
assert funder_id.attrib['schemeURI'] == 'https://www.crossref.org/services/funder-registry/'
374+
375+
award_number = refs[0].find(f'{{{ns}}}awardNumber')
376+
assert award_number.text == '10000000'
377+
378+
def test_datacite_funding_references_mixed_ror_and_crossref(self, registration, datacite_client):
379+
self._set_funding_info(registration, [
380+
{
381+
'funder_name': 'Mx. Moneypockets',
382+
'funder_identifier': 'https://doi.org/10.13039/100000001',
383+
'funder_identifier_type': 'Crossref Funder ID',
384+
'award_number': '10000000',
385+
'award_uri': 'https://moneypockets.example/millions',
386+
'award_title': 'because reasons',
387+
},
388+
{
389+
'funder_name': 'National Science Foundation',
390+
'funder_identifier': 'https://ror.org/021nxhr62',
391+
'funder_identifier_type': 'ROR',
392+
},
393+
])
394+
metadata_dict = datacite_client.build_metadata(registration, as_xml=False)
395+
funding_refs = metadata_dict['fundingReferences']
396+
assert len(funding_refs) == 2
397+
398+
# Build a lookup by funder name for order-independent assertions
399+
refs_by_name = {str(ref['funderName']): ref for ref in funding_refs}
400+
401+
crossref_ref = refs_by_name['Mx. Moneypockets']
402+
assert crossref_ref['funderIdentifier']['funderIdentifier'] == 'https://doi.org/10.13039/100000001'
403+
assert crossref_ref['funderIdentifier']['funderIdentifierType'] == 'Crossref Funder ID'
404+
assert crossref_ref['funderIdentifier']['schemeURI'] == 'https://www.crossref.org/services/funder-registry/'
405+
assert crossref_ref['awardNumber']['awardNumber'] == '10000000'
406+
407+
ror_ref = refs_by_name['National Science Foundation']
408+
assert ror_ref['funderIdentifier']['funderIdentifier'] == 'https://ror.org/021nxhr62'
409+
assert ror_ref['funderIdentifier']['funderIdentifierType'] == 'ROR'
410+
assert ror_ref['funderIdentifier']['schemeURI'] == 'https://ror.org/'
411+
412+
def test_datacite_funding_references_ror_with_award_info(self, registration, datacite_client):
413+
self._set_funding_info(registration, [
414+
{
415+
'funder_name': 'National Institutes of Health',
416+
'funder_identifier': 'https://ror.org/01cwqze88',
417+
'funder_identifier_type': 'ROR',
418+
'award_number': 'R01-GM123456',
419+
'award_uri': 'https://reporter.nih.gov/project-details/123456',
420+
'award_title': 'Studying important things',
421+
},
422+
])
423+
metadata_xml = datacite_client.build_metadata(registration)
424+
parser = lxml.etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
425+
root = lxml.etree.fromstring(metadata_xml, parser=parser)
426+
ns = schema40.ns[None]
427+
428+
funding_refs = root.find(f'{{{ns}}}fundingReferences')
429+
refs = funding_refs.findall(f'{{{ns}}}fundingReference')
430+
assert len(refs) == 1
431+
432+
funder_id = refs[0].find(f'{{{ns}}}funderIdentifier')
433+
assert funder_id.text == 'https://ror.org/01cwqze88'
434+
assert funder_id.attrib['funderIdentifierType'] == 'ROR'
435+
assert funder_id.attrib['schemeURI'] == 'https://ror.org/'
436+
437+
award_number = refs[0].find(f'{{{ns}}}awardNumber')
438+
assert award_number.text == 'R01-GM123456'
439+
440+
award_title = refs[0].find(f'{{{ns}}}awardTitle')
441+
assert award_title.text == 'Studying important things'
442+
443+
def test_datacite_funding_references_no_funding_info(self, registration, datacite_client):
444+
# With no funding info set, fundingReferences should be empty
445+
metadata_dict = datacite_client.build_metadata(registration, as_xml=False)
446+
assert metadata_dict.get('fundingReferences', []) == []
447+
303448

304449
@pytest.mark.django_db
305450
class TestDataCiteViews(OsfTestCase):

0 commit comments

Comments
 (0)