Skip to content

Commit 6658f45

Browse files
authored
Switching to the DataCite REST API for retrieving registration metadata (IQSS#12270)
* A quick implementation of using DataCite REST API for retrieving DOI metadata from DataCite. This is to address an apparent issue with UTF8 characters when relying on the MDS API used traditionally. (IQSS#12070) * Removing the old-style constructors that are no longer needed. Removing the old-style constructors that are no longer needed. IQSS#12070 * whitespaces IQSS#12070 * a release note for IQSS#12070 * some last touches. IQSS#12070 * cosmetic - cleaning up a dev. comment IQSS#12070 * cosmetic^2 - fixes a typo in a cosmetic comment fix IQSS#12070
1 parent 80fd9f7 commit 6658f45

4 files changed

Lines changed: 96 additions & 16 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Incremental improvements have been made to the process of registering dataset metadata with DataCite. If your instance is using DataCite, please make sure you have a valid DataCite REST API url configured, since it is now required.
2+
3+
The JVM option(s) in question are `dataverse.pid.*.datacite.rest-api-url` if the recommended, new-style pid configuration is used, or `doi.dataciterestapiurlstring` if the legacy settings are in place. In the latter case however, this may be a good occasion to switch to the new configuration setup.
4+
5+
For instances using registered DataCite authorities in production the url should be:
6+
7+
```<jvm-options>-Ddataverse.pid.<providername>.datacite.rest-api-url=https://api.datacite.org</jvm-options>```
8+
9+
Or, for test and development instances:
10+
11+
```<jvm-options>-Ddataverse.pid.<providername>.datacite.rest-api-url=https://api.test.datacite.org</jvm-options>```

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ public class DOIDataCiteRegisterService {
4242
//A singleton since it, and the httpClient in it can be reused.
4343
private DataCiteRESTfullClient client=null;
4444

45-
public DOIDataCiteRegisterService(String url, String username, String password) {
46-
client = new DataCiteRESTfullClient(url, username, password);
45+
public DOIDataCiteRegisterService(String url, String restApiUrl, String username, String password) {
46+
client = new DataCiteRESTfullClient(url, restApiUrl, username, password);
4747
}
4848

4949
/**
@@ -80,13 +80,14 @@ public String registerIdentifier(String identifier, Map<String, String> metadata
8080

8181
public String reRegisterIdentifier(String identifier, Map<String, String> metadata, DvObject dvObject) throws IOException {
8282
String retString = "";
83-
String numericIdentifier = identifier.substring(identifier.indexOf(":") + 1);
83+
// "bare identifier" is the canonical pid with the "doi:" prefix stripped
84+
String bareIdentifier = identifier.substring(identifier.indexOf(":") + 1);
8485
String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject);
8586
String target = metadata.get("_target");
8687
String currentMetadata = null;
8788
boolean hasDifferences = false;
8889
try {
89-
currentMetadata = client.getMetadata(numericIdentifier);
90+
currentMetadata = client.getMetadata(bareIdentifier);
9091
Diff myDiff = DiffBuilder.compare(xmlMetadata).withTest(currentMetadata).ignoreWhitespace().checkForSimilar()
9192
.build();
9293
hasDifferences = myDiff.hasDifferences();
@@ -96,7 +97,7 @@ public String reRegisterIdentifier(String identifier, Map<String, String> metada
9697
}
9798
}
9899
} catch (RuntimeException e) {
99-
logger.log(Level.INFO, "DOI " + numericIdentifier + " not registered with DataCite, registering now.");
100+
logger.log(Level.INFO, "DOI " + bareIdentifier + " not registered with DataCite, registering now.");
100101
hasDifferences = true;
101102
}
102103

@@ -106,13 +107,13 @@ public String reRegisterIdentifier(String identifier, Map<String, String> metada
106107
String currentUrl = null;
107108
try {
108109
//May get a 204 if the DOI is still draft
109-
currentUrl = client.getUrl(numericIdentifier);
110+
currentUrl = client.getUrl(bareIdentifier);
110111
} catch (RuntimeException ex) {
111-
logger.fine("Error getting Url for " + numericIdentifier + ": " + ex.getMessage());
112+
logger.fine("Error getting Url for " + bareIdentifier + ": " + ex.getMessage());
112113
}
113114
if (!target.equals(currentUrl)) {
114115
logger.info("Updating target URL to " + target);
115-
client.postUrl(numericIdentifier, target);
116+
client.postUrl(bareIdentifier, target);
116117
retString = retString + "url:\\r" + target;
117118

118119
}

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public DataCiteDOIProvider(String id, String label, String providerAuthority, St
5959
this.apiUrl = apiUrl;
6060
this.username = username;
6161
this.password = password;
62-
doiDataCiteRegisterService = new DOIDataCiteRegisterService(mdsUrl, username, password);
62+
doiDataCiteRegisterService = new DOIDataCiteRegisterService(mdsUrl, apiUrl, username, password);
6363
}
6464

6565
@Override
@@ -349,7 +349,7 @@ public boolean updateIdentifier(DvObject dvObject) {
349349
logger.info(identifier + "updated: " + updated );
350350
return true;
351351
} else {
352-
logger.info("No updated needed for " + identifier);
352+
logger.info("No update needed for " + identifier);
353353
return false; //No update needed
354354
}
355355
} catch (Exception e) {

src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import java.io.Closeable;
1010
import java.io.IOException;
11+
import java.util.Base64;
1112

1213
import java.util.logging.Level;
1314
import java.util.logging.Logger;
@@ -26,10 +27,9 @@
2627
import org.apache.http.impl.client.BasicCredentialsProvider;
2728
import org.apache.http.impl.client.CloseableHttpClient;
2829
import org.apache.http.impl.client.HttpClients;
29-
30-
31-
3230
import org.apache.http.util.EntityUtils;
31+
import edu.harvard.iq.dataverse.util.json.JsonUtil;
32+
import jakarta.json.JsonObject;
3333

3434
/**
3535
* DataCiteRESTfullClient
@@ -46,12 +46,14 @@ public class DataCiteRESTfullClient implements Closeable {
4646
private static final long RETRY_DELAY_MS = 10000; // 10 seconds
4747

4848
private String url;
49+
private String restApiUrl;
4950
private CloseableHttpClient httpClient;
5051
private HttpClientContext context;
5152
private String encoding = "utf-8";
52-
53-
public DataCiteRESTfullClient(String url, String username, String password) {
53+
54+
public DataCiteRESTfullClient(String url, String restApiUrl, String username, String password) {
5455
this.url = url;
56+
this.restApiUrl = restApiUrl;
5557
context = HttpClientContext.create();
5658
CredentialsProvider credsProvider = new BasicCredentialsProvider();
5759
credsProvider.setCredentials(new AuthScope(null, -1), new UsernamePasswordCredentials(username, password));
@@ -192,6 +194,13 @@ public String postUrl(String doi, String url) throws IOException {
192194
* @return
193195
*/
194196
public String getMetadata(String doi) {
197+
// Try obtaining the metadata using the new, REST API:
198+
try {
199+
return getMetadataViaRestApi(doi);
200+
} catch (RuntimeException rex) {
201+
logger.warning("Failed to getMetadata via REST API for doi " + doi +", falling back to MDS");
202+
}
203+
195204
HttpGet httpGet = new HttpGet(this.url + "/metadata/" + doi);
196205
httpGet.setHeader("Accept", "application/xml");
197206
try {
@@ -209,6 +218,58 @@ public String getMetadata(String doi) {
209218
}
210219
}
211220

221+
/**
222+
* getMetadataViaRestApi
223+
* obtains registration metadata utilizing REST API instead of MDS.
224+
* This solves some known issues, see #12070 for details.
225+
*
226+
* @param doi
227+
* @return
228+
*/
229+
public String getMetadataViaRestApi(String doi) {
230+
HttpGet httpGet = new HttpGet(this.restApiUrl + "/dois/" + doi);
231+
232+
try {
233+
HttpResponse response = executeWithRetry(httpGet, "getMetadataViaRestApi");
234+
String restApiRawData = EntityUtils.toString(response.getEntity(), encoding);
235+
236+
logger.fine("REST API raw data: " + restApiRawData);
237+
238+
if (response.getStatusLine().getStatusCode() != 200) {
239+
String errMsg = "getMetadataViaRestApi, Response: " + response.getStatusLine().getStatusCode() + ", " + restApiRawData;
240+
logger.log(Level.SEVERE, errMsg);
241+
throw new RuntimeException(errMsg);
242+
}
243+
244+
JsonObject restApiJson = JsonUtil.getJsonObject(restApiRawData);
245+
String xmlEncoded = null;
246+
247+
JsonObject restApiJsonData = restApiJson.getJsonObject("data");
248+
if (restApiJsonData != null) {
249+
JsonObject restApiJsonAttributes = restApiJsonData.getJsonObject("attributes");
250+
if (restApiJsonAttributes != null) {
251+
xmlEncoded = restApiJsonAttributes.getString("xml");
252+
}
253+
}
254+
logger.fine("encoded XML entry: " + xmlEncoded);
255+
256+
String metadata = null; // what we want to return, registration metadata in the XML format
257+
258+
if (xmlEncoded != null) {
259+
// Stripping any newlines below may be unnecessary - it is likely
260+
// always returned as a continuous string; but shouldn't hurt
261+
// either.
262+
metadata = new String(Base64.getDecoder().decode(xmlEncoded.replaceAll("[\\r\\n]", "")), encoding);
263+
}
264+
265+
logger.fine("decoded XML metadata: " + metadata);
266+
return metadata;
267+
} catch (IOException ioe) {
268+
logger.log(Level.SEVERE, "IOException in getMetadataViaRestApi", ioe);
269+
throw new RuntimeException("IOException in getMetadataViaRestAPi", ioe);
270+
}
271+
}
272+
212273
/**
213274
* testDOIExists
214275
*
@@ -270,9 +331,16 @@ public String inactiveDataset(String doi) {
270331
}
271332
}
272333

334+
/**
335+
* The main() method can be used to test the functionality on the command
336+
* line outside of Dataverse.
337+
* Un-comment out and modify the code below as needed.
338+
* @param args
339+
* @throws Exception
340+
*/
273341
public static void main(String[] args) throws Exception {
274342
String doi = "10.5072/DVN/274533";
275-
DataCiteRESTfullClient client = new DataCiteRESTfullClient("https://mds.test.datacite.org", "DATACITE_TEST_USERNAME", "DATACITE_TEST_PASSWORD");
343+
DataCiteRESTfullClient client = new DataCiteRESTfullClient("https://mds.test.datacite.org", "https://api.test.datacite.org", "DATACITE_TEST_USERNAME", "DATACITE_TEST_PASSWORD");
276344
// System.out.println(client.getUrl(doi));
277345
// System.out.println(client.getMetadata(doi));
278346
// System.out.println(client.postMetadata(readAndClose("C:/Users/luopc/Desktop/datacite.xml", "utf-8")));

0 commit comments

Comments
 (0)