|
| 1 | +"""Tracker for missing @id references during database ingestion.""" |
| 2 | + |
| 3 | +from dataclasses import dataclass, field |
| 4 | +from typing import List, Optional |
| 5 | + |
| 6 | +from esgvoc.core.exceptions import EsgvocMissingLinksError |
| 7 | + |
| 8 | + |
| 9 | +@dataclass |
| 10 | +class MissingLinkInfo: |
| 11 | + """Information about a missing @id reference.""" |
| 12 | + |
| 13 | + ingestion_context: str |
| 14 | + """Context where the missing link was found (e.g., 'universe' or 'project:cmip7')""" |
| 15 | + |
| 16 | + current_term: str |
| 17 | + """The URI/path of the term being processed when the missing link was found""" |
| 18 | + |
| 19 | + string_value: str |
| 20 | + """The string value that could not be resolved""" |
| 21 | + |
| 22 | + expected_uri: str |
| 23 | + """The full URI that was expected to exist""" |
| 24 | + |
| 25 | + local_path: str |
| 26 | + """The local path that was tried""" |
| 27 | + |
| 28 | + property_name: Optional[str] = None |
| 29 | + """The property name where the missing link was found (if available)""" |
| 30 | + |
| 31 | + |
| 32 | +@dataclass |
| 33 | +class MissingLinksTracker: |
| 34 | + """ |
| 35 | + Tracks missing @id references during database ingestion. |
| 36 | +
|
| 37 | + When fail_on_missing_links is enabled, this tracker collects all missing |
| 38 | + links encountered during ingestion and prints a summary at the end. |
| 39 | + """ |
| 40 | + |
| 41 | + missing_links: List[MissingLinkInfo] = field(default_factory=list) |
| 42 | + """List of all missing links found during ingestion""" |
| 43 | + |
| 44 | + def add(self, link: MissingLinkInfo) -> None: |
| 45 | + """Add a missing link to the tracker.""" |
| 46 | + self.missing_links.append(link) |
| 47 | + |
| 48 | + def add_from_params( |
| 49 | + self, |
| 50 | + ingestion_context: str, |
| 51 | + current_term: str, |
| 52 | + string_value: str, |
| 53 | + expected_uri: str, |
| 54 | + local_path: str, |
| 55 | + property_name: Optional[str] = None, |
| 56 | + ) -> None: |
| 57 | + """Add a missing link using individual parameters.""" |
| 58 | + self.add( |
| 59 | + MissingLinkInfo( |
| 60 | + ingestion_context=ingestion_context, |
| 61 | + current_term=current_term, |
| 62 | + string_value=string_value, |
| 63 | + expected_uri=expected_uri, |
| 64 | + local_path=local_path, |
| 65 | + property_name=property_name, |
| 66 | + ) |
| 67 | + ) |
| 68 | + |
| 69 | + def has_missing_links(self) -> bool: |
| 70 | + """Check if any missing links have been recorded.""" |
| 71 | + return len(self.missing_links) > 0 |
| 72 | + |
| 73 | + def check_and_raise(self) -> None: |
| 74 | + """Raise EsgvocMissingLinksError if any missing links were found.""" |
| 75 | + if self.has_missing_links(): |
| 76 | + raise EsgvocMissingLinksError(self.missing_links) |
| 77 | + |
| 78 | + def print_summary(self) -> bool: |
| 79 | + """Print a summary of missing links and return True if any were found.""" |
| 80 | + if not self.has_missing_links(): |
| 81 | + return False |
| 82 | + |
| 83 | + print(f"\nFound {len(self.missing_links)} unresolved @id reference(s):") |
| 84 | + for link in self.missing_links: |
| 85 | + print(f" - Context: {link.ingestion_context}") |
| 86 | + print(f" Term: {link.current_term}") |
| 87 | + if link.property_name: |
| 88 | + print(f" Property: {link.property_name}") |
| 89 | + print(f" Missing ID: {link.string_value}") |
| 90 | + print(f" Expected URI: {link.expected_uri}") |
| 91 | + print() |
| 92 | + return True |
| 93 | + |
| 94 | + def clear(self) -> None: |
| 95 | + """Clear all recorded missing links.""" |
| 96 | + self.missing_links.clear() |
0 commit comments