Skip to content

Commit 1a7c0a4

Browse files
authored
Merge branch 'IQSS:develop' into develop
2 parents 8d74e18 + f0cc9c8 commit 1a7c0a4

9 files changed

Lines changed: 74 additions & 26 deletions

File tree

.github/workflows/deploy_beta_testing.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
- uses: actions/checkout@v3
4646

4747
- name: Download war artifact
48-
uses: actions/download-artifact@v3
48+
uses: actions/download-artifact@v4.1.7
4949
with:
5050
name: built-app
5151
path: ./

.github/workflows/maven_unit_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ jobs:
107107
cache: maven
108108

109109
# Get the build output from the unit test job
110-
- uses: actions/download-artifact@v3
110+
- uses: actions/download-artifact@v4.1.7
111111
with:
112112
name: java-artifacts
113113
- run: |
@@ -140,7 +140,7 @@ jobs:
140140
cache: maven
141141

142142
# Get the build output from the integration test job
143-
- uses: actions/download-artifact@v3
143+
- uses: actions/download-artifact@v4.1.7
144144
with:
145145
name: java-reportdir
146146
- run: tar -xvf java-reportdir.tar
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
MDC Citation retrieval with the PID settings has been fixed.
2+
DOI parsing in Dataverse is case insensitive, improving interaction with services that may change the case.
3+
Warnings related to managed/excluded PID lists for PID providers have been reduced

src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
import java.io.IOException;
2020
import java.io.InputStream;
2121
import java.net.HttpURLConnection;
22+
import java.net.MalformedURLException;
23+
import java.net.URI;
24+
import java.net.URISyntaxException;
2225
import java.net.URL;
2326
import java.util.Iterator;
2427
import java.util.List;
@@ -152,10 +155,17 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
152155
// DataCite wants "doi=", not "doi:".
153156
String authorityPlusIdentifier = persistentId.replaceFirst("doi:", "");
154157
// Request max page size and then loop to handle multiple pages
155-
URL url = new URL(JvmSettings.DATACITE_REST_API_URL.lookup() +
158+
URL url = null;
159+
try {
160+
url = new URI(JvmSettings.DATACITE_REST_API_URL.lookup(pidProvider.getId()) +
156161
"/events?doi=" +
157162
authorityPlusIdentifier +
158-
"&source=crossref&page[size]=1000");
163+
"&source=crossref&page[size]=1000").toURL();
164+
} catch (URISyntaxException e) {
165+
//Nominally this means a config error/ bad DATACITE_REST_API_URL for this provider
166+
logger.warning("Unable to create URL for " + persistentId + ", pidProvider " + pidProvider.getId());
167+
return error(Status.INTERNAL_SERVER_ERROR, "Unable to create DataCite URL to retrieve citations.");
168+
}
159169
logger.fine("Retrieving Citations from " + url.toString());
160170
boolean nextPage = true;
161171
JsonArrayBuilder dataBuilder = Json.createArrayBuilder();
@@ -178,7 +188,12 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
178188
dataBuilder.add(iter.next());
179189
}
180190
if (links.containsKey("next")) {
181-
url = new URL(links.getString("next"));
191+
try {
192+
url = new URI(links.getString("next")).toURL();
193+
} catch (URISyntaxException e) {
194+
logger.warning("Unable to create URL from DataCite response: " + links.getString("next"));
195+
return error(Status.INTERNAL_SERVER_ERROR, "Unable to retrieve all results from DataCite");
196+
}
182197
} else {
183198
nextPage = false;
184199
}
@@ -187,7 +202,7 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
187202
JsonArray allData = dataBuilder.build();
188203
List<DatasetExternalCitations> datasetExternalCitations = datasetExternalCitationsService.parseCitations(allData);
189204
/*
190-
* ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopuate them.
205+
* ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopulate them.
191206
* As is, this call doesn't remove old citations if there are now none (legacy issue if we decide to stop counting certain types of citation
192207
* as we've done for 'hasPart').
193208
* If there are some, this call individually checks each one and if a matching item exists, it removes it and adds it back. Faster and better to delete all and

src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
import edu.harvard.iq.dataverse.Dataset;
99
import edu.harvard.iq.dataverse.DatasetServiceBean;
10+
import edu.harvard.iq.dataverse.GlobalId;
11+
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
12+
1013
import java.util.ArrayList;
1114
import java.util.Arrays;
1215
import java.util.List;
@@ -40,7 +43,8 @@ public class DatasetExternalCitationsServiceBean implements java.io.Serializable
4043
Arrays.asList(
4144
"cites",
4245
"references",
43-
"supplements"));
46+
"supplements",
47+
"is-supplement-to"));
4448
static ArrayList<String> outboundRelationships = new ArrayList<String>(
4549
Arrays.asList(
4650
"is-cited-by",
@@ -59,22 +63,21 @@ public List<DatasetExternalCitations> parseCitations(JsonArray citations) {
5963
if (inboundRelationships.contains(relationship)) {
6064
Dataset localDs = null;
6165
if (objectUri.contains("doi")) {
62-
String globalId = objectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:");
63-
localDs = datasetService.findByGlobalId(globalId);
66+
localDs = datasetService.findByGlobalId(objectUri);
6467
exCit.setDataset(localDs);
6568
}
6669
exCit.setCitedByUrl(subjectUri);
67-
70+
6871
if (localDs != null && !exCit.getCitedByUrl().isEmpty()) {
6972
datasetExternalCitations.add(exCit);
7073
}
7174
}
7275
if (outboundRelationships.contains(relationship)) {
7376
Dataset localDs = null;
7477
if (subjectUri.contains("doi")) {
75-
String globalId = subjectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:");
76-
localDs = datasetService.findByGlobalId(globalId);
78+
localDs = datasetService.findByGlobalId(subjectUri);
7779
exCit.setDataset(localDs);
80+
7881
}
7982
exCit.setCitedByUrl(objectUri);
8083

src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ public abstract class AbstractPidProvider implements PidProvider {
3636

3737
private String datafilePidFormat = null;
3838

39-
private HashSet<String> managedSet;
39+
protected HashSet<String> managedSet = new HashSet<String>();
4040

41-
private HashSet<String> excludedSet;
41+
protected HashSet<String> excludedSet = new HashSet<String>();
4242

4343
private String id;
4444
private String label;
@@ -47,8 +47,6 @@ protected AbstractPidProvider(String id, String label, String protocol) {
4747
this.id = id;
4848
this.label = label;
4949
this.protocol = protocol;
50-
this.managedSet = new HashSet<String>();
51-
this.excludedSet = new HashSet<String>();
5250
}
5351

5452
protected AbstractPidProvider(String id, String label, String protocol, String authority, String shoulder,
@@ -60,8 +58,12 @@ protected AbstractPidProvider(String id, String label, String protocol, String a
6058
this.shoulder = shoulder;
6159
this.identifierGenerationStyle = identifierGenerationStyle;
6260
this.datafilePidFormat = datafilePidFormat;
63-
this.managedSet = new HashSet<String>(Arrays.asList(managedList.split(",\\s")));
64-
this.excludedSet = new HashSet<String>(Arrays.asList(excludedList.split(",\\s")));
61+
if(!managedList.isEmpty()) {
62+
this.managedSet.addAll(Arrays.asList(managedList.split(",\\s")));
63+
}
64+
if(!excludedList.isEmpty()) {
65+
this.excludedSet.addAll(Arrays.asList(excludedList.split(",\\s")));
66+
}
6567
if (logger.isLoggable(Level.FINE)) {
6668
Iterator<String> iter = managedSet.iterator();
6769
while (iter.hasNext()) {
@@ -313,10 +315,17 @@ protected GlobalId parsePersistentId(String protocol, String identifierString) {
313315
}
314316

315317
public GlobalId parsePersistentId(String protocol, String authority, String identifier) {
318+
return parsePersistentId(protocol, authority, identifier, false);
319+
}
320+
321+
public GlobalId parsePersistentId(String protocol, String authority, String identifier, boolean isCaseInsensitive) {
316322
logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getId());
317323
if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) {
318324
return null;
319325
}
326+
if(isCaseInsensitive) {
327+
identifier = identifier.toUpperCase();
328+
}
320329
// Check authority/identifier if this is a provider that manages specific
321330
// identifiers
322331
// /is not one of the unmanaged providers that has null authority
@@ -333,7 +342,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden
333342
logger.fine("managed in " + getId() + ": " + getManagedSet().contains(cleanIdentifier));
334343
logger.fine("excluded from " + getId() + ": " + getExcludedSet().contains(cleanIdentifier));
335344

336-
if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder()))
345+
if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder().toUpperCase()))
337346
|| getManagedSet().contains(cleanIdentifier)) && !getExcludedSet().contains(cleanIdentifier))) {
338347
return null;
339348
}

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package edu.harvard.iq.dataverse.pidproviders.doi;
22

33
import java.util.Arrays;
4+
import java.util.HashSet;
45
import java.util.Map;
56
import java.util.logging.Level;
67
import java.util.logging.Logger;
@@ -26,9 +27,24 @@ public abstract class AbstractDOIProvider extends AbstractPidProvider {
2627

2728
public AbstractDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) {
2829
super(id, label, DOI_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList);
30+
//Create case insensitive (converted toUpperCase) managedSet and excludedSet
31+
managedSet = clean(managedSet, "managed");
32+
excludedSet = clean(excludedSet, "excluded");
2933
}
3034

31-
//For Unmanged provider
35+
private HashSet<String> clean(HashSet<String> originalSet, String setName) {
36+
HashSet<String> cleanSet = new HashSet<String>();
37+
for(String entry: originalSet) {
38+
if(entry.startsWith(DOI_PROTOCOL)) {
39+
cleanSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase());
40+
} else {
41+
logger.warning("Non-DOI found in " + setName + " set of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped.");
42+
}
43+
}
44+
return cleanSet;
45+
}
46+
47+
//For Unmanaged provider
3248
public AbstractDOIProvider(String name, String label) {
3349
super(name, label, DOI_PROTOCOL);
3450
}
@@ -67,7 +83,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden
6783
if (!DOI_PROTOCOL.equals(protocol)) {
6884
return null;
6985
}
70-
return super.parsePersistentId(protocol, authority, identifier);
86+
return super.parsePersistentId(protocol, authority, identifier, true);
7187
}
7288

7389
public String getUrlPrefix() {

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,7 @@ protected String getProviderKeyName() {
226226

227227
@Override
228228
public String getProviderType() {
229-
// TODO Auto-generated method stub
230-
return null;
229+
return TYPE;
231230
}
232231

233232
public String getMdsUrl() {

src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,12 @@ public void testDOIParsing() throws IOException {
250250
assertEquals(pid1String, pid3.asString());
251251
assertEquals("dc1", pid3.getProviderId());
252252

253-
String pid4String = "doi:10.5072/FK3ABCDEF";
253+
//Also test case insensitive
254+
String pid4String = "doi:10.5072/fk3ABCDEF";
254255
GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String);
255-
assertEquals(pid4String, pid4.asString());
256+
// Lower case is recognized by converting to upper case internally, so we need to test vs. the upper case identifier
257+
// I.e. we are verifying that the lower case string is parsed the same as the upper case string, both give an internal upper case PID representation
258+
assertEquals("doi:10.5072/FK3ABCDEF", pid4.asString());
256259
assertEquals("dc2", pid4.getProviderId());
257260

258261
String pid5String = "doi:10.5072/FK2ABCDEF";

0 commit comments

Comments
 (0)