jmchilton
diff --git a/‎lib/galaxy/model/dataset_collections/types/collection_semantics.yml‎
Lines changed: 184 additions & 0 deletions b/‎lib/galaxy/model/dataset_collections/types/collection_semantics.yml‎
Lines changed: 184 additions & 0 deletions
diff --git a/‎lib/galaxy/model/dataset_collections/types/semantics.py‎
Lines changed: 89 additions & 2 deletions b/‎lib/galaxy/model/dataset_collections/types/semantics.py‎
Lines changed: 89 additions & 2 deletions
diff --git a/‎lib/galaxy/tool_util/workflow_state/connection_types.py‎
Lines changed: 33 additions & 7 deletions b/‎lib/galaxy/tool_util/workflow_state/connection_types.py‎
Lines changed: 33 additions & 7 deletions
diff --git a/‎lib/galaxy/tool_util/workflow_state/connection_validation.py‎
Lines changed: 12 additions & 5 deletions b/‎lib/galaxy/tool_util/workflow_state/connection_validation.py‎
Lines changed: 12 additions & 5 deletions
@@ -52,12 +52,36 @@ class WorkflowRuntimeTest(BaseModel):
     framework_test: Optional[str] = None
 
 
+class AlgebraCaseRef(BaseModel):
+    """Pointer into connection_type_cases.yml. Algebra cases describe the
+    pure type calculus (can_match / can_map_over / effective_map_over) used
+    by every surface that reasons about connection validity. They are NOT a
+    property of any one surface - they cover the shared library underneath
+    the editor, the workflow invocation system, and the format2 validator."""
+
+    op: Literal["can_match", "can_map_over", "compatible", "effective_map_over"]
+    output: Optional[str] = None
+    input: Optional[str] = None
+
+
+class WorkflowFormatValidationTest(BaseModel):
+    """Tracks format2-level validation coverage - specifically, whether the
+    format2 connection validator has an end-to-end fixture exercising this
+    example. Analogous to workflow_editor/workflow_runtime: scoped to one
+    execution surface. Algebra coverage is tracked separately under
+    ``tests.algebra`` because it cross-cuts every surface."""
+
+    fixture: Optional[str] = None
+
+
 class ExampleTests(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
     tool_runtime: Optional[ToolRuntimeTest] = None
     workflow_runtime: Optional[WorkflowRuntimeTest] = None
     workflow_editor: Optional[str] = None
+    workflow_format_validation: Optional[WorkflowFormatValidationTest] = None
+    algebra: Optional[list[AlgebraCaseRef]] = None
 
 
 class DatasetsDeclaration(BaseModel):
@@ -374,20 +398,83 @@ def main(argv=None) -> None:
     generate_docs()
 
 
-def _load_examples() -> list["Example"]:
+def _workflow_state_test_dir() -> str:
+    return os.path.join(galaxy_directory(), "test", "unit", "tool_util", "workflow_state")
+
+
+def connection_workflows_dir() -> str:
+    return os.path.join(_workflow_state_test_dir(), "connection_workflows")
+
+
+def connection_type_cases_path() -> str:
+    return os.path.join(_workflow_state_test_dir(), "connection_type_cases.yml")
+
+
+def load_examples() -> list["Example"]:
     semantics_yaml = yaml.safe_load(
         resource_string("galaxy.model.dataset_collections.types", "collection_semantics.yml")
     )
     root = YAMLRootModel.model_validate(semantics_yaml)
     return [e.example for e in root.root if isinstance(e, ExampleEntry)]
 
 
+# Back-compat alias (previous name had a leading underscore).
+_load_examples = load_examples
+
+
 def check() -> list[str]:
-    examples = _load_examples()
+    examples = load_examples()
     errors: list[str] = []
     errors.extend(validate_api_test_refs(examples))
     errors.extend(validate_tool_refs(examples))
     errors.extend(validate_workflow_editor_refs(examples))
+    errors.extend(validate_workflow_format_validation_refs(examples))
+    errors.extend(validate_algebra_refs(examples))
+    return errors
+
+
+def _load_type_case_keys() -> set[tuple[str, Optional[str], Optional[str]]]:
+    """Return (op, output, input) tuples declared in connection_type_cases.yml.
+
+    Treat YAML ``null`` (i.e. Python ``None``, which is how unquoted ``NULL``
+    also deserializes) as the sentinel token for matching purposes."""
+    path = connection_type_cases_path()
+    if not os.path.exists(path):
+        return set()
+    with open(path) as f:
+        cases = yaml.safe_load(f) or []
+    return {(c["op"], c.get("output"), c.get("input")) for c in cases}
+
+
+def validate_workflow_format_validation_refs(examples: list["Example"]) -> list[str]:
+    errors: list[str] = []
+    fixtures_dir = connection_workflows_dir()
+    for ex in examples:
+        if not ex.tests or not ex.tests.workflow_format_validation:
+            continue
+        wfv = ex.tests.workflow_format_validation
+        if wfv.fixture:
+            path = os.path.join(fixtures_dir, f"{wfv.fixture}.gxwf.yml")
+            if not os.path.exists(path):
+                errors.append(f"[{ex.label}] workflow_format_validation fixture not found: {wfv.fixture}.gxwf.yml")
+    return errors
+
+
+def validate_algebra_refs(examples: list["Example"]) -> list[str]:
+    if not os.path.exists(connection_type_cases_path()):
+        return []
+    errors: list[str] = []
+    type_case_keys = _load_type_case_keys()
+    for ex in examples:
+        if not ex.tests or not ex.tests.algebra:
+            continue
+        for tc in ex.tests.algebra:
+            if (tc.op, tc.output, tc.input) not in type_case_keys:
+                errors.append(
+                    f"[{ex.label}] algebra entry not found in "
+                    f"connection_type_cases.yml: "
+                    f"op={tc.op} output={tc.output!r} input={tc.input!r}"
+                )
     return errors
 
 
 
@@ -4,9 +4,9 @@
 (NULL_COLLECTION_TYPE, ANY_COLLECTION_TYPE) and free functions for
 connection validation (can_match, can_map_over, effective_map_over).
 
-The base class methods (can_match_type, has_subcollections_of_type,
-effective_collection_type) do the real work — this module handles
-sentinel dispatch and the dataset (non-collection) case.
+The base class methods (accepts, can_map_over, effective_collection_type)
+do the real work — this module handles sentinel dispatch and the dataset
+(non-collection) case.
 """
 
 from typing import (
@@ -72,7 +72,8 @@ def can_match(
 ) -> bool:
     """Can output directly satisfy input (no mapping)?
 
-    Convention: can_match(output, input) — matches TS inputType.canMatch(outputType).
+    Convention: can_match(output, input) — wraps the asymmetric edge check
+    input_type.accepts(output) with sentinel handling.
     """
     if output is NULL_COLLECTION_TYPE or input_type is NULL_COLLECTION_TYPE:
         return False
@@ -83,7 +84,7 @@ def can_match(
     assert isinstance(input_type, CollectionTypeDescription)
     assert isinstance(output, CollectionTypeDescription)
     for variant in _split_collection_type(input_type):
-        if variant.can_match_type(output):
+        if variant.accepts(output):
             return True
     return False
 
@@ -108,11 +109,36 @@ def can_map_over(
         return True
     assert isinstance(input_type, CollectionTypeDescription)
     for variant in _split_collection_type(input_type):
-        if output.has_subcollections_of_type(variant):
+        if output.can_map_over(variant):
             return True
     return False
 
 
+def compatible(
+    a: CollectionTypeOrSentinel,
+    b: CollectionTypeOrSentinel,
+) -> bool:
+    """Symmetric sibling-matching: do a and b match such that they could
+    drive a common map-over over sibling inputs?
+
+    Wraps the symmetric base method ``CollectionTypeDescription.compatible``
+    with sentinel handling. Order of arguments must not change the answer.
+
+    Used at sites resolving sibling map-over contributions where neither
+    side is the input slot — pre-rebase code used asymmetric can_match here
+    and produced order-dependent results.
+    """
+    if a is NULL_COLLECTION_TYPE and b is NULL_COLLECTION_TYPE:
+        return True
+    if a is NULL_COLLECTION_TYPE or b is NULL_COLLECTION_TYPE:
+        return False
+    if a is ANY_COLLECTION_TYPE or b is ANY_COLLECTION_TYPE:
+        return True
+    assert isinstance(a, CollectionTypeDescription)
+    assert isinstance(b, CollectionTypeDescription)
+    return a.compatible(b)
+
+
 def is_list_like(ctd: CollectionTypeDescription) -> bool:
     """Is this a list-like collection type (eligible for multi-data reduction)?
 
@@ -142,7 +168,7 @@ def effective_map_over(
     assert isinstance(input_type, CollectionTypeDescription)
     # Find the matching variant for comma-separated types
     for variant in _split_collection_type(input_type):
-        if output.has_subcollections_of_type(variant):
+        if output.can_map_over(variant):
             effective = output.effective_collection_type(variant)
             return COLLECTION_TYPE_DESCRIPTION_FACTORY.for_collection_type(effective)
     return None
@@ -37,7 +37,9 @@
 from .connection_types import (
     ANY_COLLECTION_TYPE,
     can_match,
+    collection_type_rank,
     CollectionTypeOrSentinel,
+    compatible,
     effective_map_over,
     is_list_like,
     NULL_COLLECTION_TYPE,
@@ -268,7 +270,7 @@ def _ok(mapping: Optional[str] = None):
                 # Simple list -> multi-data: full reduction, no map-over
                 # Deeper (list:list, list:paired, etc.) -> multi-data: outer levels map over
                 inner_list = COLLECTION_TYPE_DESCRIPTION_FACTORY.for_collection_type("list")
-                if source_type.has_subcollections_of_type(inner_list):
+                if source_type.can_map_over(inner_list):
                     remaining = source_type.effective_collection_type(inner_list)
                     return _ok(mapping=remaining)
                 return _ok()
@@ -302,18 +304,23 @@ def _resolve_step_map_over(
 ) -> Optional[CollectionTypeDescription]:
     """Resolve effective map-over from all connection contributions.
 
-    All non-None map-over types must be identical. If any disagree,
-    report an error (the step can't satisfy both map-over structures).
+    Pairs of non-None contributions are checked with the symmetric
+    ``compatible`` so order of arrival of sibling inputs doesn't change the
+    answer (matches TS ``mappingConstraints``). The resolved map-over is
+    the highest-rank compatible type — TS picks "most specific" the same
+    way.
     """
     non_none = [c for c in contributions if c is not None]
     if not non_none:
         return None
 
     best = non_none[0]
     for ctd in non_none[1:]:
-        if ctd.collection_type != best.collection_type:
+        if not compatible(best, ctd):
             step_result.errors.append(f"Incompatible map-over types: {best.collection_type} vs {ctd.collection_type}")
-            return best  # return something, error recorded
+            return best
+        if collection_type_rank(ctd) > collection_type_rank(best):
+            best = ctd
 
     return best