Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions easyDataverse/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from .dataset import Dataset # noqa: F401
from .dataverse import Dataverse # noqa: F401
from .license import CustomLicense, License # noqa: F401
import nest_asyncio

__all__ = ["Dataset", "Dataverse", "CustomLicense", "License"]

nest_asyncio.apply()

__version__ = "0.4.3"
16 changes: 12 additions & 4 deletions easyDataverse/dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
from json import dumps
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union

import nob
import xmltodict
Expand All @@ -11,7 +11,7 @@
from dvuploader import File, add_directory

from easyDataverse.base import DataverseBase
from easyDataverse.license import License
from easyDataverse.license import CustomLicense, License
from easyDataverse.uploader import update_dataset, upload_to_dataverse
from easyDataverse.utils import YAMLDumper

Expand All @@ -34,7 +34,8 @@ class Dataset(BaseModel):
validate_assignment=True,
)

license: License = Field(
license: Union[License, CustomLicense, None] = Field(
default=None,
description="The license of the dataset.",
)

Expand Down Expand Up @@ -182,10 +183,17 @@ def dataverse_dict(self) -> dict:
for block in self.metadatablocks.values():
blocks.update(block.dataverse_dict())

if isinstance(self.license, License):
terms = {"license": self.license.name}
elif isinstance(self.license, CustomLicense):
terms = self.license.model_dump(by_alias=True, exclude={"name"})
else:
terms = {}

return {
"datasetVersion": {
"license": self.license.name,
"metadataBlocks": blocks,
**terms,
}
}

Expand Down
26 changes: 19 additions & 7 deletions easyDataverse/dataverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from urllib import parse

import httpx
from easyDataverse.license import License
from easyDataverse.license import CustomLicense, License
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TextColumn
from anytree import Node, findall_by_attr
Expand Down Expand Up @@ -372,10 +372,22 @@ def load_dataset(

# Fetch and extract data
remote_ds = self._fetch_dataset(pid, version)
dataset.license = self.licenses[remote_ds.data.latestVersion.license.name] # type: ignore
dataset.p_id = remote_ds.data.latestVersion.datasetPersistentId # type: ignore
blocks = remote_ds.data.latestVersion.metadataBlocks # type: ignore
files = remote_ds.data.latestVersion.files # type: ignore

# Get the latest version data
latest_version = remote_ds.data.latestVersion # type: ignore

# Handle license information
if hasattr(latest_version, "license") and latest_version.license:
dataset.license = self.licenses.get(latest_version.license.name)
else:
# Try to create a custom license from available fields
custom_license = CustomLicense(**latest_version)
if custom_license.model_dump(exclude_none=True):
dataset.license = custom_license

dataset.p_id = latest_version.datasetPersistentId # type: ignore
blocks = latest_version.metadataBlocks # type: ignore
files = latest_version.files # type: ignore

# Process metadatablocks and files
self._construct_block_classes(blocks, dataset)
Expand Down Expand Up @@ -554,7 +566,7 @@ def _extract_data(self, fields: List, tree: Node):
dvtype = node.typeClass

if dvtype.lower() == "compound":
data[name] = self._process_compound(field.value, tree)
data[name] = self._process_compound(field.value, node)
else:
data[name] = field.value
else:
Expand All @@ -571,7 +583,7 @@ def _process_compound(self, compound, tree):
self._extract_data(list(entry.values()), tree) for entry in compound
]

return self._extract_data(compound, tree)
return self._extract_data(compound.values(), tree)

# ! Importers
def dataset_from_json(self, handler: IO) -> Dataset:
Expand Down
2 changes: 1 addition & 1 deletion easyDataverse/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ async def _download_file(

return File(
filepath=local_path,
file_id=str(file_id),
file_id=str(file_id), # type: ignore
**file,
)

Expand Down
64 changes: 63 additions & 1 deletion easyDataverse/license.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional
from urllib import parse
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
import httpx


Expand Down Expand Up @@ -68,3 +69,64 @@ def fetch_by_name(cls, name: str, server_url: str) -> "License":
return next(filter(lambda x: x["name"] == name, licenses))
except StopIteration:
raise Exception(f"License '{name}' not found at '{server_url}'")


class CustomLicense(BaseModel):
"""
Represents a custom license for a Dataverse dataset.

This class models the custom terms of use information including name, URI, and other metadata
that can be associated with a dataset in Dataverse.
"""

model_config = ConfigDict(
populate_by_name=True,
)

terms_of_use: Optional[str] = Field(
default=None,
description="The terms of use of the dataset.",
alias="termsOfUse",
)

confidentiality_declaration: Optional[str] = Field(
default=None,
description="The confidentiality declaration of the dataset.",
alias="confidentialityDeclaration",
)

special_permissions: Optional[str] = Field(
default=None,
description="Special permissions for the dataset.",
alias="specialPermissions",
)

restrictions: Optional[str] = Field(
default=None,
description="Restrictions applied to the dataset.",
alias="restrictions",
)

citation_requirements: Optional[str] = Field(
default=None,
description="Requirements for citing the dataset.",
alias="citationRequirements",
)

depositor_requirements: Optional[str] = Field(
default=None,
description="Requirements for depositors of the dataset.",
alias="depositorRequirements",
)

conditions: Optional[str] = Field(
default=None,
description="Conditions for using the dataset.",
alias="conditions",
)

disclaimer: Optional[str] = Field(
default=None,
description="Disclaimer for the dataset.",
alias="disclaimer",
)
61 changes: 61 additions & 0 deletions tests/integration/test_dataset_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from easyDataverse.dataset import Dataset

from easyDataverse.dataverse import Dataverse
from easyDataverse.license import CustomLicense


class TestDatasetCreation:
Expand Down Expand Up @@ -130,6 +131,66 @@ def test_creation_other_license(

assert self.sort_citation(dataset) == minimal_upload_other_license

@pytest.mark.integration
def test_creation_custom_terms_of_use(
self,
credentials,
):
# Arrange
base_url, api_token = credentials
dataverse = Dataverse(
server_url=base_url,
api_token=api_token,
)

# Act
dataset = dataverse.create_dataset()
dataset.license = CustomLicense(
termsOfUse="This is a custom terms of use",
confidentialityDeclaration="This is a custom confidentiality declaration",
specialPermissions="This is a custom special permissions",
restrictions="This is a custom restrictions",
citationRequirements="This is a custom citation requirements",
depositorRequirements="This is a custom depositor requirements",
conditions="This is a custom conditions",
disclaimer="This is a custom disclaimer",
)

dataset.citation.title = "My dataset"
dataset.citation.subject = ["Other"]
dataset.citation.add_author(name="John Doe")
dataset.citation.add_ds_description(
value="This is a description of the dataset",
date="2024",
)
dataset.citation.add_dataset_contact(
name="John Doe",
email="john@doe.com",
)

pid = dataset.upload(dataverse_name="root")

# Re-fetch the dataset
dataset = dataverse.load_dataset(pid)

# Check the terms of use
assert isinstance(dataset.license, CustomLicense)
license = dataset.license
assert license.terms_of_use == "This is a custom terms of use"
assert license.special_permissions == "This is a custom special permissions"
assert license.restrictions == "This is a custom restrictions"
assert license.citation_requirements == "This is a custom citation requirements"
assert license.conditions == "This is a custom conditions"
assert license.disclaimer == "This is a custom disclaimer"
assert (
license.confidentiality_declaration
== "This is a custom confidentiality declaration"
)
assert (
license.depositor_requirements == "This is a custom depositor requirements"
)

@pytest.mark.integration
def test_tab_ingest_disabled(
self,
credentials,
Expand Down
3 changes: 0 additions & 3 deletions tests/unit/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@


class TestBase:

@pytest.mark.unit
def test_template(self):

# Arrange
class Child(DataverseBase):
bar: Optional[str] = Field(
Expand All @@ -18,7 +16,6 @@ class Child(DataverseBase):
)

class Test(DataverseBase):

foo: Optional[str] = Field(
default=None,
alias="Foo",
Expand Down