Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,12 @@
<version>${smallrye-mpconfig.version}</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/edu.kit.datamanager/ro-crate-java -->
<dependency>
<groupId>edu.kit.datamanager</groupId>
<artifactId>ro-crate-java</artifactId>
<version>1.1.0-rc.1</version>
</dependency>
</dependencies>
<build>
<!-- <testResources>
Expand Down
104 changes: 104 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/export/RoCrateExporter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package edu.harvard.iq.dataverse.export;

import com.google.auto.service.AutoService;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.rocrate.RoCrateManager;
import edu.harvard.iq.dataverse.util.BundleUtil;
import io.gdcc.spi.export.ExportDataProvider;
import io.gdcc.spi.export.ExportException;
import io.gdcc.spi.export.Exporter;
import jakarta.ws.rs.core.MediaType;

import java.io.FileInputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Locale;
import java.util.Optional;
import jakarta.enterprise.inject.spi.CDI;

/**
* Exports the dataset metadata as an RO-Crate JSON using the metadatablocks of the dataset as the schema
* of the RO-Crate. It means, that all data that is present in the dataset in Dataverse is exported as is without
* mapping it to the Schema.org vocabulary, which is the default schema for RO-Crates.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/
@AutoService(Exporter.class)
public class RoCrateExporter implements Exporter
{
@Override
public void exportDataset(ExportDataProvider exportDataProvider, OutputStream outputStream) throws ExportException
{
// The exporter is not loaded as an EJB by the ExporterService but is initialized directly, which means
// we cannot inject EJB-s with the usual @EJB annotation. Instead, we look them up at runtime
var roCrateManager = CDI.current().select(RoCrateManager.class).get();
var datasetService = CDI.current().select(DatasetServiceBean.class).get();

// exportDataProvider doesn't provide the Dataset object only the JSON representation, so we get this to
// read the ID of the Dataset and then load it via the datasetService
var json = exportDataProvider.getDatasetJson();
var ds = datasetService.find(Long.valueOf(json.get("id").toString()));

// Get the version specified in the json. Note: it is actually always the latest version, one cannot
// export metadata of older versions.
var versionId = Long.valueOf(json.getJsonObject("datasetVersion").get("id").toString());
var version = ds.getVersionFromId(versionId);

// Now we can creat the RO-Crate using the roCrateManager
String roCratePath = roCrateManager.getRoCratePath(version);
if (!Files.exists(Paths.get(roCratePath))) {
try {
roCrateManager.createOrUpdateRoCrate(version);
} catch (Exception e) {
throw new ExportException(e.getMessage());
}
}

try (FileInputStream fis = new FileInputStream(roCratePath)) {
byte[] buffer = new byte[1024];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
} catch (Exception e) {
throw new ExportException(e.getMessage());
}
}

@Override
public String getFormatName()
{
return "rocrate";
}

@Override
public String getDisplayName(Locale locale)
{
String displayName = BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.rocrate",locale);
return Optional.ofNullable(displayName).orElse("RO-Crate");
}

@Override
public Boolean isHarvestable()
{
return true;
}

@Override
public Boolean isAvailableToUsers()
{
return true;
}

@Override
public String getMediaType()
{
return MediaType.APPLICATION_JSON;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package edu.harvard.iq.dataverse.rocrate;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.MetadataBlock;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity;
import jakarta.ejb.EJB;
import jakarta.ejb.Stateless;
import jakarta.inject.Named;

import java.util.List;
import java.util.stream.Collectors;

/**
* Manages RO-Crate conformsTo values based on Dataverse metadatablocks.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/
@Stateless
@Named
public class DefaultRoCrateConformsToIdProvider implements RoCrateConformsToIdProvider
{
@EJB
protected MetadataBlockServiceBean metadataBlockSvc;

private static String ID_PREFIX = "https://dataverse.org/metadatablocks/";

@Override
public List<String> generateConformsToIds(Dataset dataset, RootDataEntity rootDataEntity)
{
return dataset.getOwner().getMetadataBlocks().stream()
.map(metadataBlock -> ID_PREFIX+metadataBlock.getName())
.collect(Collectors.toList());
}

@Override
public List<MetadataBlock> findMetadataBlockForConformsToIds(List<String> ids)
{
return ids.stream()
.map(id -> metadataBlockSvc.findByName(id.substring(ID_PREFIX.length())))
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package edu.harvard.iq.dataverse.rocrate;

import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
import jakarta.ejb.Stateless;
import jakarta.inject.Named;

import java.util.stream.Collectors;

/**
* Helps to generate a value for the "name" field of RO-Crate objects for a Dataverse compound field. This
* RoCrateNameProvider implementation genetates a string value similar to the Metadata tab of the Dataverse dataset
* UI.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/
@Stateless
@Named
public class DefaultRoCrateNameProvider implements RoCrateNameProvider
{
@Override
public String generateRoCrateName(DatasetFieldCompoundValue compoundValue)
{
var nameFieldValue = compoundValue.getDisplayValueMap().entrySet().stream()
.map(o -> o.getValue())
.collect(Collectors.joining(" "));
nameFieldValue = (nameFieldValue.length() > 80) ? nameFieldValue.substring(0, 77) + "..." : nameFieldValue;

return nameFieldValue;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package edu.harvard.iq.dataverse.rocrate;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.MetadataBlock;
import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity;

import java.util.List;

/**
* Manages RO-Crate conformsTo values.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/

public interface RoCrateConformsToIdProvider
{
List<String> generateConformsToIds(Dataset dataset, RootDataEntity rootDataEntity);

List<MetadataBlock> findMetadataBlockForConformsToIds(List<String> ids);
}
Loading