Skip to content

Commit a6b0505

Browse files
committed
reuse stream supplier, update archivers to send oversized files
1 parent 637b2e3 commit a6b0505

5 files changed

Lines changed: 244 additions & 65 deletions

File tree

src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,24 @@
77
import edu.harvard.iq.dataverse.authorization.users.ApiToken;
88
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
99
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
10+
import edu.harvard.iq.dataverse.util.bagit.BagGenerator;
11+
import edu.harvard.iq.dataverse.util.bagit.OREMap;
12+
1013
import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudContext;
1114
import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudHost;
1215
import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudPort;
1316
import edu.harvard.iq.dataverse.workflow.step.Failure;
1417
import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult;
1518

19+
import java.io.File;
20+
import java.io.FileOutputStream;
1621
import java.io.IOException;
22+
import java.io.InputStream;
1723
import java.io.PipedInputStream;
1824
import java.io.PipedOutputStream;
1925
import java.nio.charset.StandardCharsets;
26+
import java.nio.file.Files;
27+
import java.nio.file.Path;
2028
import java.security.DigestInputStream;
2129
import java.security.MessageDigest;
2230
import java.security.NoSuchAlgorithmException;
@@ -96,6 +104,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t
96104
statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE);
97105
statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred");
98106

107+
Path tempBagFile = null;
108+
99109
try {
100110
/*
101111
* If there is a failure in creating a space, it is likely that a prior version
@@ -161,20 +171,38 @@ public void run() {
161171
// Add BagIt ZIP file
162172
// Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the
163173
// transfer
174+
Path bagFile = null;
175+
164176

165-
messageDigest = MessageDigest.getInstance("MD5");
166-
try (PipedInputStream in = new PipedInputStream(100000);
167-
DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) {
168-
Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token);
169-
checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null);
170-
bagThread.join();
171-
if (success) {
172-
logger.fine("Content: " + fileName + " added with checksum: " + checksum);
173-
localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest());
177+
tempBagFile = Files.createTempFile("dataverse-bag-", ".zip");
178+
logger.fine("Creating bag in temporary file: " + tempBagFile.toString());
179+
180+
BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml);
181+
bagger.setAuthenticationKey(token.getTokenString());
182+
// Generate bag to temporary file using the provided ore JsonObject
183+
try (FileOutputStream fos = new FileOutputStream(tempBagFile.toFile())) {
184+
if (!bagger.generateBag(fos)) {
185+
throw new IOException("Bag generation failed");
174186
}
175-
if (!success || !checksum.equals(localchecksum)) {
187+
}
188+
189+
// Store BagIt file
190+
long bagSize = Files.size(tempBagFile);
191+
logger.fine("Bag created successfully, size: " + bagSize + " bytes");
192+
193+
// Now upload the bag file
194+
messageDigest = MessageDigest.getInstance("MD5");
195+
try (InputStream is = Files.newInputStream(bagFile);
196+
DigestInputStream bagDigestInputStream = new DigestInputStream(is, messageDigest)) {
197+
checksum = store.addContent(spaceName, fileName, bagDigestInputStream, bagFile.toFile().length(), "application/zip", null, null);
198+
localchecksum = Hex.encodeHexString(bagDigestInputStream.getMessageDigest().digest());
199+
200+
if (checksum != null && checksum.equals(localchecksum)) {
201+
logger.fine("Content: " + fileName + " added with checksum: " + checksum);
202+
success = true;
203+
} else {
176204
logger.severe("Failure on " + fileName);
177-
logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud");
205+
logger.severe(checksum + " not equal to " + localchecksum);
178206
try {
179207
store.deleteContent(spaceName, fileName);
180208
store.deleteContent(spaceName, baseFileName + "_datacite.xml");
@@ -185,9 +213,6 @@ public void run() {
185213
"DuraCloud Submission Failure: incomplete archive transfer");
186214
}
187215
}
188-
189-
logger.fine("DuraCloud Submission step: Content Transferred");
190-
191216
// Document the location of dataset archival copy location (actually the URL
192217
// where you can
193218
// view it as an admin)
@@ -223,8 +248,20 @@ public void run() {
223248
return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg);
224249
} catch (NoSuchAlgorithmException e) {
225250
logger.severe("MD5 MessageDigest not available!");
251+
} catch (Exception e) {
252+
logger.warning(e.getLocalizedMessage());
253+
e.printStackTrace();
254+
return new Failure("Error in transferring file to DuraCloud",
255+
"DuraCloud Submission Failure: internal error");
226256
}
227257
finally {
258+
if (tempBagFile != null) {
259+
try {
260+
Files.deleteIfExists(tempBagFile);
261+
} catch (IOException e) {
262+
logger.warning("Failed to delete temporary bag file: " + tempBagFile + " : " + e.getMessage());
263+
}
264+
}
228265
dv.setArchivalCopyLocation(statusObject.build().toString());
229266
}
230267
} else {

0 commit comments

Comments
 (0)