diff --git a/doc/release-notes/7619-restricted-summary-starts.md b/doc/release-notes/7619-restricted-summary-starts.md new file mode 100644 index 00000000000..e1c20f3bde2 --- /dev/null +++ b/doc/release-notes/7619-restricted-summary-starts.md @@ -0,0 +1,14 @@ +Restricted Files and DDI "dataDscr" Information (Summary Statistics, Variable Names, Variable Labels) + +In previous releases, DDI "dataDscr" information (summary statistics, variable names, and variable labels, sometimes known as "variable metadata") for tabular files that were ingested successfully were available even if files were restricted. This has been changed in the following ways: + +- At the dataset level, DDI exports no longer show "dataDscr" information for restricted files. There is only one version of this export and it is the version that's suitable for public consumption with the "dataDscr" information hidden for restricted files. +- Similarly, at the dataset level, the DDI HTML Codebook no longer shows "dataDscr" information for restricted files. +- At the file level, "dataDscr" information is no longer publicly available for restricted files. In practice, it was only possible to get this publicly via API (the download/access button was hidden). +- At the file level, "dataDscr" (variable metadata) information can still be downloaded for restricted files if you have access to download the file. + +After upgrading, you should re-export to replace cached DDI exports with restricted summary stats with DDI exports fit for public consumption: + + curl http://localhost:8080/api/admin/metadata/reExportAll + +For details on this operation, see https://guides.dataverse.org/en/5.4/admin/metadataexport.html diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 8a853449488..1d5f1344c6c 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -179,8 +179,6 @@ Additional download options available for tabular data (found in the same drop-d - Data File Citation (currently in either RIS, EndNote XML, or BibTeX format); - All of the above, as a zipped bundle. -See also :ref:`restricted-tabular-files`. - Astronomy (FITS) ---------------- @@ -214,19 +212,6 @@ When you restrict a file it cannot be downloaded unless permission has been gran See also :ref:`terms-of-access` and :ref:`permissions`. -.. _restricted-tabular-files: - -Restricted Tabular Files ------------------------- - -Restricted tabular files are treated differently than other file types in that the following information is exposed when the files are published: - -- The name of columns (variables). -- The "label" (description) of columns. -- Summary statistics (max, min, mean, etc.) of columns. - -This information can been seen from the file landing page if you click "Export Metada" and then "DDI". Depending on your installation, the information above may also be available from :doc:`/admin/external-tools`. - Edit Files ========== diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 281f0a76ce1..2cd7e0dd122 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -450,6 +450,24 @@ public String tabularDatafileMetadataDDI(@PathParam("fileId") String fileId, @Q throw new BadRequestException("tabular data required"); } + if (dataFile.isRestricted()) { + boolean hasPermissionToDownloadFile = false; + DataverseRequest dataverseRequest; + try { + dataverseRequest = createDataverseRequest(findUserOrDie()); + } catch (WrappedResponse ex) { + throw new BadRequestException("cannot find user"); + } + if (dataverseRequest != null && dataverseRequest.getUser() instanceof GuestUser) { + // We must be in the UI. Try to get a non-GuestUser from the session. + dataverseRequest = dvRequestService.getDataverseRequest(); + } + hasPermissionToDownloadFile = permissionService.requestOn(dataverseRequest, dataFile).has(Permission.DownloadFile); + if (!hasPermissionToDownloadFile) { + throw new BadRequestException("no permission to download file"); + } + } + response.setHeader("Content-disposition", "attachment; filename=\"dataverse_files.zip\""); FileMetadata fm = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/WebApplicationExceptionHandler.java b/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/WebApplicationExceptionHandler.java index b6229e58192..5f28bfd0afc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/WebApplicationExceptionHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/errorhandlers/WebApplicationExceptionHandler.java @@ -43,8 +43,11 @@ public Response toResponse(WebApplicationException ex) { switch (ex.getResponse().getStatus()) { // BadRequest case 400: + // It's strange to have these "startsWith" conditionals here. They both come from Access.java. if ( (ex.getMessage()+"").toLowerCase().startsWith("tabular data required")) { jrb.message(BundleUtil.getStringFromBundle("access.api.exception.metadata.not.available.for.nontabular.file")); + } else if ((ex.getMessage() + "").toLowerCase().startsWith("no permission to download file")) { + jrb.message(BundleUtil.getStringFromBundle("access.api.exception.metadata.restricted.no.permission")); } else { jrb.message("Bad Request. The API request cannot be completed with the parameters supplied. Please check your code for typos, or consult our API guide at http://guides.dataverse.org."); jrb.request(request); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index dd797a55539..1ce1a2119a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1388,6 +1388,17 @@ public static void createDataDscr(XMLStreamWriter xmlw, DatasetVersion datasetVe for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); + /** + * Previously (in Dataverse 5.3 and below) the dataDscr section was + * included for restricted files but that meant that summary + * statistics were exposed. (To get at these statistics, API users + * should instead use the "Data Variable Metadata Access" endpoint.) + * These days we return early to avoid this exposure. + */ + if (dataFile.isRestricted()) { + return; + } + if (dataFile != null && dataFile.isTabularData()) { if (!tabularData) { xmlw.writeStartElement("dataDscr"); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 4d95322caf2..8352e66c92d 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2354,6 +2354,7 @@ access.api.requestList.fileNotFound=Could not find datafile with id {0}. access.api.requestList.noKey=You must provide a key to get list of access requests for a file. access.api.requestList.noRequestsFound=There are no access requests for this file {0}. access.api.exception.metadata.not.available.for.nontabular.file=This type of metadata is only available for tabular files. +access.api.exception.metadata.restricted.no.permission=You do not have permission to download this file. access.api.exception.version.not.found=Could not find requested dataset version. #permission diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index fb7914561bc..04d7c77d4fb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -36,7 +36,10 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import javax.json.Json; @@ -53,6 +56,7 @@ import static javax.ws.rs.core.Response.Status.OK; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; import static junit.framework.Assert.assertEquals; +import org.hamcrest.CoreMatchers; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.nullValue; import org.junit.After; @@ -2041,4 +2045,123 @@ public void testLinkingDatasets() { */ } + /** + * In this test we are restricting a file and testing "export DDI" at the + * dataset level as well as getting the DDI at the file level. + * + * Export at the dataset level is always the public version. + * + * At the file level, you can still get summary statistics (and friends, + * "dataDscr") if you have access to download the file. If you don't have + * access, you get an error. + */ + @Test + public void testRestrictFileExportDdi() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String authorUsername = UtilIT.getUsernameFromResponse(createUser); + String authorApiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverse = UtilIT.createRandomDataverse(authorApiToken); + createDataverse.prettyPrint(); + createDataverse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, authorApiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = JsonPath.from(createDataset.asString()).getString("data.persistentId"); + + Path pathToFile = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.csv"); + String contentOfCsv = "" + + "name,pounds,species\n" + + "Marshall,40,dog\n" + + "Tiger,17,cat\n" + + "Panther,21,cat\n"; + java.nio.file.Files.write(pathToFile, contentOfCsv.getBytes()); + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile.toString(), authorApiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data.csv")); + + String fileId = JsonPath.from(uploadFile.body().asString()).getString("data.files[0].dataFile.id"); + + assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", authorApiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response restrictFile = UtilIT.restrictFile(fileId, true, authorApiToken); + restrictFile.prettyPrint(); + restrictFile.then().assertThat().statusCode(OK.getStatusCode()); + + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, authorApiToken); + publishDataverse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", authorApiToken); + publishDataset.then().assertThat().statusCode(OK.getStatusCode()); + + // We're testing export here, which is at dataset level. + // Guest/public version + Response exportByGuest = UtilIT.exportDataset(datasetPid, "ddi"); + exportByGuest.prettyPrint(); + exportByGuest.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("codeBook.fileDscr.fileTxt.fileName", equalTo("data.tab")); + + // Here we are asserting that dataDscr is empty. TODO: Do this in REST Assured. + String dataDscrForGuest = XmlPath.from(exportByGuest.asString()).getString("codeBook.dataDscr"); + Assert.assertEquals("", dataDscrForGuest); + + // Author export (has access) + Response exportByAuthor = UtilIT.exportDataset(datasetPid, "ddi", authorApiToken); + exportByAuthor.prettyPrint(); + exportByAuthor.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("codeBook.fileDscr.fileTxt.fileName", equalTo("data.tab")); + + // Here we are asserting that dataDscr is empty. TODO: Do this in REST Assured. + String dataDscrForAuthor = XmlPath.from(exportByAuthor.asString()).getString("codeBook.dataDscr"); + Assert.assertEquals("", dataDscrForAuthor); + + // Now we are testing file-level retrieval. + // The author has access to a restricted file and gets all the metadata. + Response fileMetadataDdiAuthor = UtilIT.getFileMetadata(fileId, "ddi", authorApiToken); + fileMetadataDdiAuthor.prettyPrint(); + fileMetadataDdiAuthor.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("codeBook.fileDscr.fileTxt.fileName", equalTo("data.tab")) + // .body("codeBook", containsString("dataDscr")) + // The names of all these variables (name, pounds, species) should be visible. + .body("codeBook.dataDscr", CoreMatchers.not(equalTo(null))) + // .body("codeBook.dataDscr", equalTo(null)) + .body("codeBook.dataDscr.var[0].@name", equalTo("name")) + .body("codeBook.dataDscr.var[1].@name", equalTo("pounds")) + // This is an example of a summary stat (max) that should be visible. + .body("codeBook.dataDscr.var[1].sumStat.find { it.@type == 'max' }", equalTo("40.0")) + .body("codeBook.dataDscr.var[2].@name", equalTo("species")); + + Response createUserNoAuth = UtilIT.createRandomUser(); + createUserNoAuth.prettyPrint(); + String usernameNoAuth = UtilIT.getUsernameFromResponse(createUserNoAuth); + String apiTokenNoAuth = UtilIT.getApiTokenFromResponse(createUserNoAuth); + + // Users with no access to the restricted file are blocked. + Response fileMetadataDdiUserNoAuth = UtilIT.getFileMetadata(fileId, "ddi", apiTokenNoAuth); + fileMetadataDdiUserNoAuth.prettyPrint(); + fileMetadataDdiUserNoAuth.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo("You do not have permission to download this file.")); + + // Guest users (not logged in) are also blocked. + Response fileMetadataDdiGuest = UtilIT.getFileMetadata(fileId, "ddi"); + fileMetadataDdiGuest.prettyPrint(); + fileMetadataDdiGuest.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("message", equalTo("You do not have permission to download this file.")); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 2b6bbaf0caa..f3ff8f8fae4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -834,6 +834,22 @@ static Response getFileMetadata(String fileIdOrPersistentId, String optionalForm .get("/api/access/datafile/" + idInPath + "/metadata" + optionalFormatInPath + "?key=" + apiToken + optionalQueryParam); } + static Response getFileMetadata(String fileIdOrPersistentId, String optionalFormat) { + String idInPath = fileIdOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isNumber(fileIdOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + fileIdOrPersistentId; + } + String optionalFormatInPath = ""; + if (optionalFormat != null) { + optionalFormatInPath = "/" + optionalFormat; + } + return given() + .urlEncodingEnabled(false) + .get("/api/access/datafile/" + idInPath + "/metadata" + optionalFormatInPath + optionalQueryParam); + } + static Response testIngest(String fileName, String fileType) { return given() .get("/api/ingest/test/file?fileName=" + fileName + "&fileType=" + fileType); @@ -1634,11 +1650,19 @@ static Response uploadProvFreeForm(String idOrPersistentId, JsonObject jsonObjec // } // return requestSpecification.delete("/api/files/" + idInPath + "/prov-freeform" + optionalQueryParam); // } + static Response exportDataset(String datasetPersistentId, String exporter) { + return exportDataset(datasetPersistentId, exporter, null); + } static Response exportDataset(String datasetPersistentId, String exporter, String apiToken) { // http://localhost:8080/api/datasets/export?exporter=dataverse_json&persistentId=doi%3A10.5072/FK2/W6WIMQ - return given() - .header(API_TOKEN_HTTP_HEADER, apiToken) + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification + // .header(API_TOKEN_HTTP_HEADER, apiToken) // .get("/api/datasets/:persistentId/export" + "?persistentId=" + datasetPersistentId + "&exporter=" + exporter); .get("/api/datasets/export" + "?persistentId=" + datasetPersistentId + "&exporter=" + exporter); }