Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b1a88b4
file pid reservation
qqmyers Oct 2, 2020
94bda80
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Oct 15, 2020
7f90269
add file pid reservation step to publish
qqmyers Oct 15, 2020
77bd653
comment change
qqmyers Oct 16, 2020
d76a738
check if file PIDs used once, use constants - per comments
qqmyers Oct 30, 2020
e0b975e
Merge remote-tracking branch 'IQSS/develop' into
qqmyers Oct 30, 2020
98a7a83
adding release note
djbrooke Oct 30, 2020
2ec3bac
Merge branch 'IQSS/7068-Reserve_file_PIDs' of https://github.com/Qual…
djbrooke Oct 30, 2020
8a41176
release notes, API doc update
djbrooke Oct 30, 2020
4d9b497
reflecting datasets and files for the PID endpoint
djbrooke Oct 30, 2020
979d402
removing release note about pre-reg for file PIDs as this is not supp…
djbrooke Nov 3, 2020
1e567dc
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Nov 3, 2020
cdace4f
file pid pre-reservation
qqmyers Nov 3, 2020
c34e522
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Nov 4, 2020
91862bd
Merge branch 'IQSS/7068-Reserve_file_PIDs' of https://github.com/Qual…
qqmyers Nov 4, 2020
fb017bf
avoid problem when GlobalIDServiceBean implicitly merges
qqmyers Nov 4, 2020
931af58
update theDataset
qqmyers Nov 4, 2020
988b1d9
noting that PID reservation can cause old timeouts to be too short
qqmyers Nov 4, 2020
5ae4e27
more specifics
qqmyers Nov 4, 2020
99194ec
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Nov 9, 2020
7c42ca8
Merge remote-tracking branch 'IQSS/develop' into IQSS/7383_file_pid_r…
qqmyers Nov 9, 2020
94b86ca
Merge branch 'IQSS/7383_file_pid_reservation_api_call' into IQSS/7068…
qqmyers Nov 9, 2020
18e6add
Merge remote-tracking branch 'IQSS/develop' into
qqmyers Feb 23, 2021
28774b4
Merge remote-tracking branch 'IQSS/develop' into
qqmyers Apr 7, 2021
de8ff38
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Apr 13, 2021
682858d
Merge remote-tracking branch 'IQSS/develop' into
qqmyers Mar 27, 2024
e3b01d5
release note update
qqmyers Mar 27, 2024
c5b490c
cleanup reformatting
qqmyers Mar 27, 2024
3c194db
further cleanup
qqmyers Mar 27, 2024
afefed4
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Mar 27, 2024
9a4ddd3
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Apr 23, 2024
da87bd1
set createTime earlier
qqmyers Apr 23, 2024
404ccff
Merge remote-tracking branch 'IQSS/develop' into
qqmyers Jul 19, 2024
0fd4ee1
Merge remote-tracking branch 'IQSS/develop' into IQSS/7068-Reserve_fi…
qqmyers Aug 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/release-notes/7068-reserve-file-pids.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## Release Highlights

### Pre-Publish File DOI Reservation with DataCite

Dataverse installations using DataCite will be able to reserve the persistent identifiers for files with DataCite ahead of publishing time. This allows the file DOI to be reserved earlier in the data sharing process and makes the step of publishing datasets simpler and less error-prone. Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration).

## Major Use Cases

- Users will have DOIs reserved for their files upon dataset create instead of at publish time. (Issue #7068, PR #7334)
2 changes: 1 addition & 1 deletion doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4889,7 +4889,7 @@ The fully expanded example above (without environment variables) looks like this
Reserve a PID
~~~~~~~~~~~~~

Reserved a PID for a dataset. A superuser API token is required.
Reserve a PID for a dataset if not yet registered, and, if FilePIDs are enabled, reserve any file PIDs that are not yet registered. A superuser API token is required.

.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1238,14 +1238,6 @@ public List<Long> selectFilesWithMissingOriginalSizes() {
}


/**
* Check that a identifier entered by the user is unique (not currently used
* for any other study in this Dataverse Network). Also check for duplicate
* in the remote PID service if needed
* @param datafileId
* @param storageLocation
* @return {@code true} iff the global identifier is unique.
*/
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to the PR - this just doesn't fit the method below which doesn't involve PIDs

public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException {
// Verify that the DataFile no longer exists:
if (find(dataFileId) != null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package edu.harvard.iq.dataverse.engine.command.impl;

import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetVersion;
Expand All @@ -15,9 +16,11 @@
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider;
import edu.harvard.iq.dataverse.util.BundleUtil;

import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Date;
import java.util.Set;
import java.util.logging.Level;
Expand Down Expand Up @@ -167,13 +170,12 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx
} while (pidProvider.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT);
}
if(!retry) {
logger.warning("Reserving PID for: " + getDataset().getId() + " during publication failed.");
throw new IllegalCommandException(BundleUtil.getStringFromBundle("publishDatasetCommand.pidNotReserved"), this);
logger.warning("Reserving PID for: " + getDataset().getId() + " failed.");
throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidNotReserved", Arrays.asList(theDataset.getIdentifier())), this);
}
if(attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) {
//Didn't work - we existed the loop with too many tries
throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; "
+ "gave up after " + attempts + " attempts. Current (last requested) identifier: " + theDataset.getIdentifier(), this);
throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidReservationRetryExceeded", Arrays.asList(Integer.toString(attempts), theDataset.getIdentifier())), this);
}
}
// Invariant: Dataset identifier does not exist in the remote registry
Expand All @@ -186,6 +188,9 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx
}

} catch (Throwable e) {
if (e instanceof CommandException) {
throw (CommandException) e;
}
throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), this);
}
} else {
Expand Down Expand Up @@ -215,6 +220,73 @@ protected Timestamp getTimestamp() {
return timestamp;
}

protected void registerFilePidsIfNeeded(Dataset theDataset, CommandContext ctxt, boolean b) throws CommandException {
// Register file PIDs if needed
PidProvider pidGenerator = ctxt.dvObjects().getEffectivePidGenerator(getDataset());
boolean shouldRegister = !pidGenerator.registerWhenPublished() &&
ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) &&
pidGenerator.canCreatePidsLike(getDataset().getGlobalId());
if (shouldRegister) {
for (DataFile dataFile : theDataset.getFiles()) {
logger.fine(dataFile.getId() + " is registered?: " + dataFile.isIdentifierRegistered());
if (!dataFile.isIdentifierRegistered()) {
// pre-register a persistent id
registerFileExternalIdentifier(dataFile, pidGenerator, ctxt, true);
}
}
}
}

private void registerFileExternalIdentifier(DataFile dataFile, PidProvider pidProvider, CommandContext ctxt, boolean retry) throws CommandException {

if (!dataFile.isIdentifierRegistered()) {

if (pidProvider instanceof FakeDOIProvider) {
retry = false; // No reason to allow a retry with the FakeProvider (even if it allows
// pre-registration someday), so set false for efficiency
}
try {
if (pidProvider.alreadyRegistered(dataFile)) {
int attempts = 0;
if (retry) {
do {
pidProvider.generatePid(dataFile);
logger.log(Level.INFO, "Attempting to register external identifier for datafile {0} (trying: {1}).",
new Object[] { dataFile.getId(), dataFile.getIdentifier() });
attempts++;
} while (pidProvider.alreadyRegistered(dataFile) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT);
}
if (!retry) {
logger.warning("Reserving File PID for: " + getDataset().getId() + ", fileId: " + dataFile.getId() + ", during publication failed.");
throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.filePidNotReserved", Arrays.asList(getDataset().getIdentifier())), this);
}
if (attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) {
// Didn't work - we existed the loop with too many tries
throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; "
+ "gave up after " + attempts + " attempts. Current (last requested) identifier: " + dataFile.getIdentifier(), this);
}
}
// Invariant: DataFile identifier does not exist in the remote registry
try {
pidProvider.createIdentifier(dataFile);
dataFile.setGlobalIdCreateTime(getTimestamp());
dataFile.setIdentifierRegistered(true);
} catch (Throwable ex) {
logger.info("Call to globalIdServiceBean.createIdentifier failed: " + ex);
}

} catch (Throwable e) {
if (e instanceof CommandException) {
throw (CommandException) e;
}
throw new CommandException(BundleUtil.getStringFromBundle("file.register.error", pidProvider.getProviderInformation()), this);
}
} else {
throw new IllegalCommandException("This datafile may not have a PID because its id registry service is not supported.", this);
}

}

protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, DatasetVersion persistedVersion) throws IllegalCommandException {
Set<MetadataBlock> changedMDBs = DatasetVersionDifference.getBlocksWithChanges(newVersion, persistedVersion);
for (MetadataBlock mdb : changedMDBs) {
Expand Down
Loading