Skip to content

Commit 95288a2

Browse files
author
Michael Fritzsche
committed
added first e2e harvest test for SoftwareMetadata
1 parent a68f2ae commit 95288a2

6 files changed

Lines changed: 132 additions & 98 deletions

File tree

src/hermes/commands/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
# "unused import" errors.
99
# flake8: noqa
1010

11-
from hermes.commands.base import HermesHelpCommand
12-
from hermes.commands.base import HermesVersionCommand
13-
from hermes.commands.clean.base import HermesCleanCommand
14-
from hermes.commands.init.base import HermesInitCommand
15-
from hermes.commands.curate.base import HermesCurateCommand
11+
# from hermes.commands.base import HermesHelpCommand
12+
# from hermes.commands.base import HermesVersionCommand
13+
# from hermes.commands.clean.base import HermesCleanCommand
14+
# from hermes.commands.init.base import HermesInitCommand
15+
# from hermes.commands.curate.base import HermesCurateCommand
1616
from hermes.commands.harvest.base import HermesHarvestCommand
17-
from hermes.commands.process.base import HermesProcessCommand
18-
from hermes.commands.deposit.base import HermesDepositCommand
19-
from hermes.commands.postprocess.base import HermesPostprocessCommand
17+
# from hermes.commands.process.base import HermesProcessCommand
18+
# from hermes.commands.deposit.base import HermesDepositCommand
19+
# from hermes.commands.postprocess.base import HermesPostprocessCommand

src/hermes/commands/base.py

Lines changed: 26 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,20 @@
99
import logging
1010
import pathlib
1111
from importlib import metadata
12-
from typing import Dict, Optional, Type
12+
from typing import Type, Union
1313

1414
import toml
1515
from pydantic import BaseModel
1616
from pydantic_settings import BaseSettings, SettingsConfigDict
1717

1818

19-
class _HermesSettings(BaseSettings):
19+
20+
class HermesSettings(BaseSettings):
2021
"""Root class for HERMES configuration model."""
2122

2223
model_config = SettingsConfigDict(env_file_encoding='utf-8')
2324

24-
logging: Dict = {}
25+
logging: dict = {}
2526

2627

2728
class HermesCommand(abc.ABC):
@@ -31,7 +32,7 @@ class HermesCommand(abc.ABC):
3132
"""
3233

3334
command_name: str = ""
34-
settings_class: Type = _HermesSettings
35+
settings_class: Type = HermesSettings
3536

3637
def __init__(self, parser: argparse.ArgumentParser):
3738
"""Initialize a new instance of any HERMES command.
@@ -45,28 +46,27 @@ def __init__(self, parser: argparse.ArgumentParser):
4546
self.log = logging.getLogger(f"hermes.{self.command_name}")
4647
self.errors = []
4748

48-
@classmethod
49-
def init_plugins(cls):
49+
def init_plugins(self):
5050
"""Collect and initialize the plugins available for the HERMES command."""
5151

5252
# Collect all entry points for this group (i.e., all valid plug-ins for the step)
53-
entry_point_group = f"hermes.{cls.command_name}"
54-
group_plugins = {
55-
entry_point.name: entry_point.load()
56-
for entry_point in metadata.entry_points(group=entry_point_group)
57-
}
58-
59-
# Collect the plug-in specific configurations
60-
cls.derive_settings_class({
61-
plugin_name: plugin_class.settings_class
62-
for plugin_name, plugin_class in group_plugins.items()
63-
if hasattr(plugin_class, "settings_class") and plugin_class.settings_class is not None
64-
})
53+
entry_point_group = f"hermes.{self.command_name}"
54+
group_plugins = {}
55+
group_settings = {}
56+
57+
for entry_point in metadata.entry_points(group=entry_point_group):
58+
plugin_cls = entry_point.load()
59+
60+
group_plugins[entry_point.name] = plugin_cls
61+
if hasattr(plugin_cls, 'settings_class') and plugin_cls.settings_class is not None:
62+
group_settings[entry_point.name] = plugin_cls.settings_class
63+
64+
self.derive_settings_class(group_settings)
6565

6666
return group_plugins
6767

6868
@classmethod
69-
def derive_settings_class(cls, setting_types: Dict[str, Type]) -> None:
69+
def derive_settings_class(cls, setting_types: dict[str, Type]) -> None:
7070
"""Build a new Pydantic data model class for configuration.
7171
7272
This will create a new class that includes all settings from the plugins available.
@@ -131,13 +131,10 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None:
131131

132132
def load_settings(self, args: argparse.Namespace):
133133
"""Load settings from the configuration file (passed in from command line)."""
134-
try:
135-
toml_data = toml.load(args.path / args.config)
136-
self.root_settings = HermesCommand.settings_class.model_validate(toml_data)
137-
self.settings = getattr(self.root_settings, self.command_name)
138-
except FileNotFoundError as e:
139-
self.log.error("hermes.toml was not found. Try to run 'hermes init' first or create one manually.")
140-
raise e # This will lead to our default error message & sys.exit
134+
135+
toml_data = toml.load(args.path / args.config)
136+
self.root_settings = HermesCommand.settings_class.model_validate(toml_data)
137+
self.settings = getattr(self.root_settings, self.command_name)
141138

142139
def patch_settings(self, args: argparse.Namespace):
143140
"""Process command line options for the settings."""
@@ -164,7 +161,9 @@ def __call__(self, args: argparse.Namespace):
164161
class HermesPlugin(abc.ABC):
165162
"""Base class for all HERMES plugins."""
166163

167-
settings_class: Optional[Type] = None
164+
pluing_node = None
165+
166+
settings_class: Union[Type, None] = None
168167

169168
@abc.abstractmethod
170169
def __call__(self, command: HermesCommand) -> None:
@@ -202,27 +201,3 @@ def __call__(self, args: argparse.Namespace) -> None:
202201
# Otherwise, simply show the general help and exit (cleanly).
203202
self.parser.print_help()
204203
self.parser.exit()
205-
206-
def load_settings(self, args: argparse.Namespace):
207-
"""No settings are needed for the help command."""
208-
pass
209-
210-
211-
class HermesVersionSettings(BaseModel):
212-
"""Intentionally empty settings class for the version command."""
213-
pass
214-
215-
216-
class HermesVersionCommand(HermesCommand):
217-
"""Show HERMES version and exit."""
218-
219-
command_name = "version"
220-
settings_class = HermesVersionSettings
221-
222-
def load_settings(self, args: argparse.Namespace):
223-
"""Pass loading settings as not necessary for this command."""
224-
pass
225-
226-
def __call__(self, args: argparse.Namespace) -> None:
227-
self.log.info(metadata.version("hermes"))
228-
self.parser.exit()

src/hermes/commands/harvest/base.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@
55
# SPDX-FileContributor: Michael Meinel
66

77
import argparse
8-
import typing as t
9-
from datetime import datetime
108

119
from pydantic import BaseModel
1210

1311
from hermes.commands.base import HermesCommand, HermesPlugin
14-
from hermes.model.context import HermesContext, HermesHarvestContext
15-
from hermes.model.error import HermesValidationError, HermesMergeError
12+
from hermes.model.context_manager import HermesContext
13+
from hermes.model.error import HermesValidationError
14+
from hermes.model import SoftwareMetadata
1615

1716

1817
class HermesHarvestPlugin(HermesPlugin):
@@ -21,11 +20,11 @@ class HermesHarvestPlugin(HermesPlugin):
2120
TODO: describe the harvesting process and how this is mapped to this plugin.
2221
"""
2322

24-
def __call__(self, command: HermesCommand) -> t.Tuple[t.Dict, t.Dict]:
23+
def __call__(self, command: HermesCommand) -> tuple[SoftwareMetadata, dict]:
2524
pass
2625

2726

28-
class _HarvestSettings(BaseModel):
27+
class HarvestSettings(BaseModel):
2928
"""Generic harvesting settings."""
3029

3130
sources: list[str] = []
@@ -35,32 +34,31 @@ class HermesHarvestCommand(HermesCommand):
3534
""" Harvest metadata from configured sources. """
3635

3736
command_name = "harvest"
38-
settings_class = _HarvestSettings
37+
settings_class = HarvestSettings
3938

4039
def __call__(self, args: argparse.Namespace) -> None:
4140
self.args = args
42-
ctx = HermesContext()
4341

4442
# Initialize the harvest cache directory here to indicate the step ran
45-
ctx.init_cache("harvest")
43+
ctx = HermesContext()
44+
ctx.prepare_step('harvest')
4645

4746
for plugin_name in self.settings.sources:
47+
plugin_cls = self.plugins[plugin_name]
48+
4849
try:
49-
plugin_func = self.plugins[plugin_name]()
50-
harvested_data, tags = plugin_func(self)
51-
52-
with HermesHarvestContext(ctx, plugin_name) as harvest_ctx:
53-
harvest_ctx.update_from(harvested_data,
54-
plugin=plugin_name,
55-
timestamp=datetime.now().isoformat(), **tags)
56-
for _key, ((_value, _tag), *_trace) in harvest_ctx._data.items():
57-
if any(v != _value and t == _tag for v, t in _trace):
58-
raise HermesMergeError(_key, None, _value)
59-
60-
except KeyError as e:
61-
self.log.error("Plugin '%s' not found.", plugin_name)
62-
self.errors.append(e)
50+
# Load plugin and run the harvester
51+
plugin_func = plugin_cls()
52+
harvested_data = plugin_func(self)
53+
54+
with ctx[plugin_name] as plugin_ctx:
55+
plugin_ctx["codemeta"] = harvested_data.compact()
56+
plugin_ctx["context"] = {"@context": harvested_data.full_context}
57+
58+
plugin_ctx["expanded"] = harvested_data.ld_value
6359

6460
except HermesValidationError as e:
6561
self.log.error("Error while executing %s: %s", plugin_name, e)
6662
self.errors.append(e)
63+
64+
ctx.finalize_step('harvest')

src/hermes/commands/harvest/cff.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@
99
import logging
1010
import pathlib
1111
import urllib.request
12-
import typing as t
1312

1413
from pydantic import BaseModel
1514
from ruamel.yaml import YAML
1615
import jsonschema
1716
from cffconvert import Citation
17+
from typing import Any, Union
1818

19-
from hermes.model.context import ContextPath
20-
from hermes.model.errors import HermesValidationError
19+
from hermes.model.error import HermesValidationError
2120
from hermes.commands.harvest.base import HermesHarvestPlugin, HermesHarvestCommand
21+
from hermes.model import SoftwareMetadata
2222

2323

2424
# TODO: should this be configurable via a CLI option?
@@ -35,7 +35,7 @@ class CffHarvestSettings(BaseModel):
3535
class CffHarvestPlugin(HermesHarvestPlugin):
3636
settings_class = CffHarvestSettings
3737

38-
def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]:
38+
def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]:
3939
# Get source files
4040
cff_file = self._get_single_cff(command.args.path)
4141
if not cff_file:
@@ -44,23 +44,24 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]:
4444

4545
# Read the content
4646
cff_data = cff_file.read_text()
47-
48-
# Validate the content to be correct CFF
4947
cff_dict = self._load_cff_from_file(cff_data)
5048

51-
if command.settings.cff.enable_validation and not self._validate(cff_file, cff_dict):
52-
raise HermesValidationError(cff_file)
49+
if command.settings.cff.enable_validation:
50+
# Validate the content to be correct CFF
51+
if not self._validate(cff_file, cff_dict):
52+
raise HermesValidationError(cff_file)
5353

5454
# Convert to CodeMeta using cffconvert
5555
codemeta_dict = self._convert_cff_to_codemeta(cff_data)
56-
# TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309
57-
codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict)
5856
if "version" in codemeta_dict:
5957
codemeta_dict["version"] = str(codemeta_dict["version"]) # Convert Version to string
6058

61-
return codemeta_dict, {'local_path': str(cff_file)}
59+
# TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309
60+
codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict)
61+
ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}})
62+
return ld_codemeta, {}
6263

63-
def _load_cff_from_file(self, cff_data: str) -> t.Any:
64+
def _load_cff_from_file(self, cff_data: str) -> Any:
6465
yaml = YAML(typ='safe')
6566
yaml.constructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = yaml.constructor.yaml_constructors[
6667
u'tag:yaml.org,2002:str']
@@ -73,11 +74,11 @@ def _patch_author_emails(self, cff: dict, codemeta: dict) -> dict:
7374
codemeta["author"][i]["email"] = author["email"]
7475
return codemeta
7576

76-
def _convert_cff_to_codemeta(self, cff_data: str) -> t.Any:
77+
def _convert_cff_to_codemeta(self, cff_data: str) -> Any:
7778
codemeta_str = Citation(cff_data).as_codemeta()
7879
return json.loads(codemeta_str)
7980

80-
def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool:
81+
def _validate(self, cff_file: pathlib.Path, cff_dict: dict) -> bool:
8182
audit_log = logging.getLogger('audit.cff')
8283

8384
cff_schema_url = f'https://citation-file-format.github.io/{_CFF_VERSION}/schema.json'
@@ -93,7 +94,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool:
9394
audit_log.warning('!!! warning "%s is not valid according to <%s>"', cff_file, cff_schema_url)
9495

9596
for error in errors:
96-
path = ContextPath.make(error.absolute_path or ['root'])
97+
path = error.absolute_path or ['root']
9798
audit_log.info(' Invalid input for `%s`.', str(path))
9899
audit_log.info(' !!! message "%s"', error.message)
99100
audit_log.debug(' !!! value "%s"', error.instance)
@@ -108,7 +109,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool:
108109
audit_log.info('- Found valid Citation File Format file at: %s', cff_file)
109110
return True
110111

111-
def _get_single_cff(self, path: pathlib.Path) -> t.Optional[pathlib.Path]:
112+
def _get_single_cff(self, path: pathlib.Path) -> Union[pathlib.Path, None]:
112113
# Find CFF files in directories and subdirectories
113114
cff_file = path / 'CITATION.cff'
114115
if cff_file.exists():

src/hermes/model/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,8 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
# This is an interface file that only provides a public interface, hence linter is disabled to avoid
6+
# "unused import" errors.
7+
# flake8: noqa
8+
59
from hermes.model.api import SoftwareMetadata
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import pytest
2+
from hermes.commands.harvest.cff import CffHarvestPlugin, CffHarvestSettings
3+
from hermes.model import SoftwareMetadata
4+
5+
6+
@pytest.mark.parametrize(
7+
"cff, res",
8+
[
9+
(
10+
"""cff-version: 1.2.0
11+
title: Temp\nmessage: >-
12+
If you use this software, please cite it using the
13+
metadata from this file.
14+
type: software
15+
authors:
16+
- given-names: Max
17+
family-names: Mustermann
18+
email: max@muster.mann""",
19+
SoftwareMetadata({
20+
"@type": "SoftwareSourceCode",
21+
"schema:author": {
22+
"@list": [{
23+
"@type": "Person",
24+
"email": ["max@muster.mann"],
25+
"familyName": ["Mustermann"],
26+
"givenName": ["Max"]
27+
}]
28+
},
29+
"schema:name": ["Temp"]
30+
})
31+
)
32+
]
33+
)
34+
def test_cff_harvest(tmp_path, cff, res):
35+
class Args:
36+
def __init__(self, path):
37+
self.path = path
38+
39+
class Settings:
40+
def __init__(self, cff_settings):
41+
self.cff = cff_settings
42+
43+
class Command:
44+
def __init__(self, args, settings):
45+
self.args = args
46+
self.settings = settings
47+
48+
command = Command(Args(tmp_path), Settings(CffHarvestSettings()))
49+
50+
cff_file = tmp_path / "CITATION.cff"
51+
cff_file.write_text(cff)
52+
53+
result = CffHarvestPlugin().__call__(command)
54+
# FIXME: update to compare the SoftwareMetadata objects instead of the data_dicts
55+
# after merge with refactor/data-model and/or refactor/423-implement-public-api
56+
assert result[0].data_dict == res.data_dict

0 commit comments

Comments
 (0)