|
11 | 11 | fetch_api_graph(ontology, term_id, apis=None, locales=None) |
12 | 12 | process_skos_source(key, source, config_file=None, locales=None) |
13 | 13 | match_snomed(url, config_file=MENU_CONFIG) |
| 14 | + match_ontology_term(url, config_file=MENU_CONFIG) |
14 | 15 | """ |
15 | 16 |
|
16 | 17 | import json |
@@ -476,3 +477,104 @@ def match_snomed(url, config_file=MENU_CONFIG): |
476 | 477 | print(f"Added source '{key}' (title={title!r}, version={version!r}, " |
477 | 478 | f"description={'set' if description else 'not available'}) to {config_file}") |
478 | 479 | return True |
| 480 | + |
| 481 | + |
| 482 | +# Matches a bare CURIE like ENVO:00010483 or GO:0008150 (letter-started prefix, colon, local ID) |
| 483 | +_CURIE_INPUT_PAT = re.compile(r'^([A-Za-z][A-Za-z0-9]*):([\w]+)$') |
| 484 | +# Matches OBO shorthand with underscore: ENVO_00010483, GO_0008150 (numeric local part) |
| 485 | +_OBO_SHORTHAND_PAT = re.compile(r'^([A-Za-z][A-Za-z0-9]*)_(\d+)$') |
| 486 | +# Matches an OBO Foundry IRI: http(s)://purl.obolibrary.org/obo/PREFIX_localid |
| 487 | +_OBO_IRI_INPUT_PAT = re.compile(r'https?://purl\.obolibrary\.org/obo/([A-Za-z][A-Za-z0-9]*)_([\w]+)') |
| 488 | + |
| 489 | + |
| 490 | +def _find_api_for_prefix(prefix, apis): |
| 491 | + """Return (api_name, api_conf) for the first API whose ontologies list contains *prefix*. |
| 492 | +
|
| 493 | + Comparison is case-insensitive. Falls back to ('ols', apis['ols']) when no |
| 494 | + explicit match is found, since OLS4 accepts any OBO ontology by default. |
| 495 | + """ |
| 496 | + prefix_upper = prefix.upper() |
| 497 | + for name, conf in (apis or {}).items(): |
| 498 | + ontologies = [o.upper() for o in (conf.get("ontologies") or [])] |
| 499 | + if prefix_upper in ontologies: |
| 500 | + return name, conf |
| 501 | + return "ols", (apis or {}).get("ols") or {} |
| 502 | + |
| 503 | + |
| 504 | +def match_ontology_term(url, config_file=MENU_CONFIG): |
| 505 | + """Return True if *url* is an ontology term CURIE, OBO shorthand, or OBO IRI and was handled. |
| 506 | +
|
| 507 | + Accepted forms: |
| 508 | + ENVO:00010483 (bare CURIE, colon separator) |
| 509 | + ENVO_00010483 (OBO shorthand, underscore + numeric ID) |
| 510 | + http://purl.obolibrary.org/obo/ENVO_00010483 (OBO Foundry IRI) |
| 511 | +
|
| 512 | + Looks up the prefix in the `apis` block of menu_config.yaml to find which |
| 513 | + configured API handles the ontology (defaults to OLS4 when none claim it). |
| 514 | + The configured API is written to reachable_from for -l expansion; term |
| 515 | + label and description are always fetched from OLS4 (which is public and |
| 516 | + free) regardless of which API will be used for hierarchy expansion. |
| 517 | + """ |
| 518 | + prefix = term_id = None |
| 519 | + |
| 520 | + m = _CURIE_INPUT_PAT.match(url) |
| 521 | + if m: |
| 522 | + prefix, term_id = m.group(1), m.group(2) |
| 523 | + else: |
| 524 | + m = _OBO_SHORTHAND_PAT.match(url) |
| 525 | + if m: |
| 526 | + prefix, term_id = m.group(1), m.group(2) |
| 527 | + else: |
| 528 | + m = _OBO_IRI_INPUT_PAT.match(url) |
| 529 | + if m: |
| 530 | + prefix, term_id = m.group(1).upper(), m.group(2) |
| 531 | + |
| 532 | + if not prefix: |
| 533 | + return False |
| 534 | + |
| 535 | + with open(config_file) as _cf: |
| 536 | + config = yaml.safe_load(_cf) or {} |
| 537 | + |
| 538 | + apis = config.get("apis") or {} |
| 539 | + api_name, api_conf = _find_api_for_prefix(prefix, apis) |
| 540 | + |
| 541 | + key = f"{prefix}_{term_id}" |
| 542 | + curie = f"{prefix}:{term_id}" |
| 543 | + |
| 544 | + if key in config.get("sources", {}): |
| 545 | + print(f" Skipping {url}: source key '{key}' already exists in {config_file}", |
| 546 | + file=sys.stderr) |
| 547 | + return True |
| 548 | + |
| 549 | + api_type = "sparql" if _get_type_conf(api_conf, "sparql") else "rest" |
| 550 | + |
| 551 | + # Always use OLS4 for the initial label/description lookup — it is public |
| 552 | + # and free, and avoids auth issues with BioPortal or SPARQL endpoints. |
| 553 | + # The api_name/api_type in the source entry controls -l routing only. |
| 554 | + ols_conf = apis.get("ols") or {} |
| 555 | + iri_base = resolve_ols4_iri_base(prefix, ols_conf) |
| 556 | + concept_iri = iri_base + term_id |
| 557 | + |
| 558 | + print(f" Fetching OLS4 ontology metadata for {prefix} ...") |
| 559 | + meta = _fetch_ols4_ontology_meta(prefix, ols_conf) |
| 560 | + version = meta.get("version") or None |
| 561 | + |
| 562 | + print(f" Fetching OLS4 term info for {concept_iri} ...") |
| 563 | + term_info = _fetch_ols4_term_info(prefix, concept_iri, ols_conf) |
| 564 | + title = term_info["label"] or key |
| 565 | + description = term_info["description"] or None |
| 566 | + |
| 567 | + entry = make_source_entry(key, concept_iri, "OntologyAPI", "json", |
| 568 | + title=title, version=version, description=description) |
| 569 | + entry["prefixes"] = {prefix: iri_base} |
| 570 | + entry["reachable_from"] = { |
| 571 | + "api": {api_name: {"type": api_type}}, |
| 572 | + "source_nodes": [curie], |
| 573 | + "include_self": True, |
| 574 | + } |
| 575 | + |
| 576 | + config.setdefault("sources", {})[key] = entry |
| 577 | + write_config(config, config_file) |
| 578 | + print(f"Added source '{key}' (api={api_name}, title={title!r}) to {config_file}") |
| 579 | + print(f" Run: menu_manager.py -l {key} to expand the hierarchy via {api_name}") |
| 580 | + return True |
0 commit comments