Skip to content

Commit 637b2e3

Browse files
committed
Update to track non-zipped files, add method
1 parent 62a03b2 commit 637b2e3

1 file changed

Lines changed: 31 additions & 19 deletions

File tree

src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -640,8 +640,8 @@ private void processAllFiles(List<FileEntry> sortedFiles)
640640
for (FileEntry entry : sortedFiles) {
641641
// Extract all needed information from the JsonObject reference
642642
JsonObject child = entry.jsonObject;
643-
String dataUrl = child.get(JsonLDTerm.schemaOrg("sameAs").getLabel()).getAsString();
644-
String childTitle = child.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
643+
644+
String childTitle = entry.getChildTitle();
645645

646646
// Check for duplicate titles
647647
if (titles.contains(childTitle)) {
@@ -651,12 +651,7 @@ private void processAllFiles(List<FileEntry> sortedFiles)
651651
titles.add(childTitle);
652652
}
653653

654-
// Build full path using stored currentPath
655-
String childPath = entry.currentPath + childTitle;
656-
JsonElement directoryLabel = child.get(JsonLDTerm.DVCore("directoryLabel").getLabel());
657-
if (directoryLabel != null) {
658-
childPath = entry.currentPath + directoryLabel.getAsString() + "/" + childTitle;
659-
}
654+
String childPath= entry.getChildPath(childTitle);
660655

661656
// Get hash if exists
662657
String childHash = null;
@@ -675,6 +670,7 @@ private void processAllFiles(List<FileEntry> sortedFiles)
675670
}
676671

677672
resourceUsed[entry.resourceIndex] = true;
673+
String dataUrl = entry.getDataUrl();
678674

679675
try {
680676
if ((childHash == null) | ignorehashes) {
@@ -716,11 +712,9 @@ private void processAllFiles(List<FileEntry> sortedFiles)
716712
logger.fine("Adding " + childPath + " with hash " + childHash + " to checksumMap");
717713
checksumMap.put(childPath, childHash);
718714
}
719-
720715
// Add file to bag or fetch file
721716
if (!addToZip(entry.size)) {
722717
if(createHoleyBag) {
723-
dataUrl = suppressDownloadCounts(dataUrl);
724718
logger.fine("Adding to fetch file: " + childPath + " from " + dataUrl +
725719
" (size: " + entry.size + " bytes)");
726720
addToFetchFile(dataUrl, entry.size, childPath);
@@ -1300,10 +1294,7 @@ InputStreamSupplier getInputStreamSupplier(final String uriString) {
13001294
return new InputStreamSupplier() {
13011295
public InputStream get() {
13021296
try {
1303-
// Adding gbrecs to suppress counting this access as a download (archiving is not a download indicating scientific use)
1304-
String modifiedUriString = suppressDownloadCounts(uriString);
1305-
URI uri = new URI(modifiedUriString);
1306-
logger.finest("Final URI used (with gbrecs param): " + modifiedUriString);
1297+
URI uri = new URI(uriString);
13071298
int tries = 0;
13081299
while (tries < 5) {
13091300

@@ -1396,10 +1387,7 @@ public void close() throws IOException {
13961387
};
13971388
}
13981389

1399-
private String suppressDownloadCounts(String uriString ) {
1400-
// Adding gbrecs to suppress counting this access as a download (archiving is not a download indicating scientific use)
1401-
return uriString + (uriString.contains("?") ? "&" : "?") + "gbrecs=true";
1402-
}
1390+
14031391

14041392
public List<FileEntry> getOversizedFiles() {
14051393
return oversizedFiles;
@@ -1456,7 +1444,7 @@ public static void setNumConnections(int numConnections) {
14561444
}
14571445

14581446
// Inner class to hold file information before processing
1459-
private static class FileEntry implements Comparable<FileEntry> {
1447+
public static class FileEntry implements Comparable<FileEntry> {
14601448
final long size;
14611449
final JsonObject jsonObject; // Direct reference, not a copy
14621450
final String currentPath; // Parent directory path
@@ -1469,6 +1457,30 @@ private static class FileEntry implements Comparable<FileEntry> {
14691457
this.resourceIndex = resourceIndex;
14701458
}
14711459

1460+
public String getDataUrl() {
1461+
return suppressDownloadCounts(jsonObject.get(JsonLDTerm.schemaOrg("sameAs").getLabel()).getAsString());
1462+
}
1463+
1464+
public String getChildTitle() {
1465+
return jsonObject.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString();
1466+
}
1467+
1468+
public String getChildPath(String title) {
1469+
// Build full path using stored currentPath
1470+
String childPath = currentPath + title;
1471+
JsonElement directoryLabel = jsonObject.get(JsonLDTerm.DVCore("directoryLabel").getLabel());
1472+
if (directoryLabel != null) {
1473+
childPath = currentPath + directoryLabel.getAsString() + "/" + title;
1474+
}
1475+
return childPath;
1476+
}
1477+
1478+
private String suppressDownloadCounts(String uriString) {
1479+
// Adding gbrecs to suppress counting this access as a download (archiving is
1480+
// not a download indicating scientific use)
1481+
return uriString + (uriString.contains("?") ? "&" : "?") + "gbrecs=true";
1482+
}
1483+
14721484
@Override
14731485
public int compareTo(FileEntry other) {
14741486
return Long.compare(this.size, other.size);

0 commit comments

Comments
 (0)