diff --git a/client/src/components/Workflow/Editor/modules/collectionTypeDescription.test.ts b/client/src/components/Workflow/Editor/modules/collectionTypeDescription.test.ts new file mode 100644 index 000000000000..e785a9011910 --- /dev/null +++ b/client/src/components/Workflow/Editor/modules/collectionTypeDescription.test.ts @@ -0,0 +1,74 @@ +import { describe, expect, it } from "vitest"; + +import { + ANY_COLLECTION_TYPE_DESCRIPTION, + CollectionTypeDescription, + NULL_COLLECTION_TYPE_DESCRIPTION, +} from "./collectionTypeDescription"; + +const ct = (collectionType: string) => new CollectionTypeDescription(collectionType); + +describe("accepts (asymmetric subtype check)", () => { + it("same type accepts itself", () => { + expect(ct("list").accepts(ct("list"))).toBe(true); + expect(ct("paired").accepts(ct("paired"))).toBe(true); + expect(ct("list:paired").accepts(ct("list:paired"))).toBe(true); + }); + + it("paired_or_unpaired requirement is satisfied by paired candidate (not vice versa)", () => { + expect(ct("paired_or_unpaired").accepts(ct("paired"))).toBe(true); + expect(ct("paired").accepts(ct("paired_or_unpaired"))).toBe(false); + expect(ct("list:paired_or_unpaired").accepts(ct("list:paired"))).toBe(true); + expect(ct("list:paired").accepts(ct("list:paired_or_unpaired"))).toBe(false); + }); + + it("list requirement is satisfied by sample_sheet candidate (not vice versa)", () => { + expect(ct("list").accepts(ct("sample_sheet"))).toBe(true); + expect(ct("sample_sheet").accepts(ct("list"))).toBe(false); + expect(ct("list:paired").accepts(ct("sample_sheet:paired"))).toBe(true); + expect(ct("sample_sheet:paired").accepts(ct("list:paired"))).toBe(false); + }); + + it("disjoint types do not accept each other", () => { + expect(ct("paired").accepts(ct("list"))).toBe(false); + expect(ct("list").accepts(ct("paired"))).toBe(false); + }); + + it("ANY accepts any non-null collection", () => { + expect(ANY_COLLECTION_TYPE_DESCRIPTION.accepts(ct("list"))).toBe(true); + expect(ANY_COLLECTION_TYPE_DESCRIPTION.accepts(NULL_COLLECTION_TYPE_DESCRIPTION)).toBe(false); + }); + + it("NULL accepts nothing", () => { + expect(NULL_COLLECTION_TYPE_DESCRIPTION.accepts(ct("list"))).toBe(false); + expect(NULL_COLLECTION_TYPE_DESCRIPTION.accepts(ANY_COLLECTION_TYPE_DESCRIPTION)).toBe(false); + }); +}); + +describe("compatible (symmetric sibling-matching check)", () => { + it("is symmetric for subtype pairs", () => { + // sample_sheet <-> list + expect(ct("list").compatible(ct("sample_sheet"))).toBe(true); + expect(ct("sample_sheet").compatible(ct("list"))).toBe(true); + expect(ct("list:paired").compatible(ct("sample_sheet:paired"))).toBe(true); + expect(ct("sample_sheet:paired").compatible(ct("list:paired"))).toBe(true); + + // paired <-> paired_or_unpaired + expect(ct("paired").compatible(ct("paired_or_unpaired"))).toBe(true); + expect(ct("paired_or_unpaired").compatible(ct("paired"))).toBe(true); + expect(ct("list:paired").compatible(ct("list:paired_or_unpaired"))).toBe(true); + expect(ct("list:paired_or_unpaired").compatible(ct("list:paired"))).toBe(true); + }); + + it("same type is compatible with itself", () => { + expect(ct("list").compatible(ct("list"))).toBe(true); + expect(ct("paired").compatible(ct("paired"))).toBe(true); + }); + + it("disjoint types are not compatible (either order)", () => { + expect(ct("paired").compatible(ct("list"))).toBe(false); + expect(ct("list").compatible(ct("paired"))).toBe(false); + expect(ct("list:paired").compatible(ct("list:list"))).toBe(false); + expect(ct("list:list").compatible(ct("list:paired"))).toBe(false); + }); +}); diff --git a/client/src/components/Workflow/Editor/modules/collectionTypeDescription.ts b/client/src/components/Workflow/Editor/modules/collectionTypeDescription.ts index 3932fc77c490..bcb69218ce60 100644 --- a/client/src/components/Workflow/Editor/modules/collectionTypeDescription.ts +++ b/client/src/components/Workflow/Editor/modules/collectionTypeDescription.ts @@ -1,4 +1,21 @@ -/* classes for reasoning about collection map over state +/* Classes for reasoning about collection map-over state and the + compatibility algebra. + + Two operations on collection types: + - accepts(other): asymmetric. input_type.accepts(output_type) is true + iff an output of type other can be connected to an input slot of + type this. Used at workflow-editor edge validation. + - compatible(other): symmetric. True iff this and other match such + that they could drive a common map-over over sibling inputs of one + tool. Used for sibling map-over state checks (neither side is the + input slot), e.g. mappingConstraints in terminals.ts. + + Mirrors the Python implementation in + lib/galaxy/model/dataset_collections/type_description.py — keep in sync. + See lib/galaxy/model/dataset_collections/types/collection_semantics.yml + "Type Compatibility Algebra" for the lattice diagram and worked examples. + + Type variants: null: not a collection? NULL_COLLECTION_TYPE_DESCRIPTION: also not a collection. Is there any difference with null ? ANY_COLLECTION_TYPE_DESCRIPTION: collection, but will never be mapped over (input has no collection_type) @@ -9,7 +26,8 @@ export interface CollectionTypeDescriptor { isCollection: boolean; collectionType: string | null; rank: number; - canMatch(other: CollectionTypeDescriptor): boolean; + accepts(other: CollectionTypeDescriptor): boolean; + compatible(other: CollectionTypeDescriptor): boolean; canMapOver(other: CollectionTypeDescriptor): boolean; append(other: CollectionTypeDescriptor): CollectionTypeDescriptor; equal(other: CollectionTypeDescriptor | null): boolean; @@ -21,7 +39,10 @@ export const NULL_COLLECTION_TYPE_DESCRIPTION: CollectionTypeDescriptor = { isCollection: false, collectionType: null, rank: 0, - canMatch: function (_other) { + accepts: function (_other) { + return false; + }, + compatible: function (_other) { return false; }, canMapOver: function () { @@ -45,7 +66,10 @@ export const ANY_COLLECTION_TYPE_DESCRIPTION: CollectionTypeDescriptor = { isCollection: true, collectionType: "any", rank: -1, - canMatch: function (other) { + accepts: function (other) { + return NULL_COLLECTION_TYPE_DESCRIPTION !== other; + }, + compatible: function (other) { return NULL_COLLECTION_TYPE_DESCRIPTION !== other; }, canMapOver: function () { @@ -95,7 +119,13 @@ export class CollectionTypeDescription implements CollectionTypeDescriptor { } return new CollectionTypeDescription(`${this.collectionType}:${other.collectionType}`); } - canMatch(other: CollectionTypeDescriptor) { + /** + * Asymmetric direct-edge check: does an input slot of type ``this`` + * accept an output of type ``other``? Convention: + * ``input_type.accepts(output_type)``. Used at workflow-editor edge + * validation. + */ + accepts(other: CollectionTypeDescriptor) { const otherCollectionType = other.collectionType; if (other === NULL_COLLECTION_TYPE_DESCRIPTION) { return false; @@ -103,6 +133,16 @@ export class CollectionTypeDescription implements CollectionTypeDescriptor { if (other === ANY_COLLECTION_TYPE_DESCRIPTION) { return true; } + // sample_sheet asymmetry: a sample_sheet input needs column metadata + // that a plain-list output cannot provide. Check raw types before + // normalization (which otherwise equates the two). + if ( + this.collectionType.startsWith("sample_sheet") && + otherCollectionType && + !otherCollectionType.startsWith("sample_sheet") + ) { + return false; + } const normalizedThis = normalizeCollectionType(this.collectionType); const normalizedOther = otherCollectionType ? normalizeCollectionType(otherCollectionType) : null; if (normalizedOther === "paired" && normalizedThis == "paired_or_unpaired") { @@ -120,10 +160,25 @@ export class CollectionTypeDescription implements CollectionTypeDescriptor { } return normalizedOther == normalizedThis; } + /** + * Symmetric sibling-matching check: do ``this`` and ``other`` match + * such that they could drive a common map-over over sibling inputs of + * a single tool? Used for sibling map-over state checks + * (terminals.ts mappingConstraints). + */ + compatible(other: CollectionTypeDescriptor) { + return this.accepts(other) || other.accepts(this); + } canMapOver(other: CollectionTypeDescriptor) { if (!other.collectionType || other.collectionType === "any") { return false; } + // sample_sheet asymmetry: a sample_sheet input requires sample_sheet + // column metadata; a plain-list output cannot be mapped over it. + // ``this`` is the output, ``other`` is the input. + if (other.collectionType.startsWith("sample_sheet") && !this.collectionType.startsWith("sample_sheet")) { + return false; + } const normalizedThis = normalizeCollectionType(this.collectionType); const normalizedOther = normalizeCollectionType(other.collectionType); if (this.rank <= other.rank) { diff --git a/client/src/components/Workflow/Editor/modules/terminals.test.ts b/client/src/components/Workflow/Editor/modules/terminals.test.ts index 5396a6a8b9f2..ac343954024a 100644 --- a/client/src/components/Workflow/Editor/modules/terminals.test.ts +++ b/client/src/components/Workflow/Editor/modules/terminals.test.ts @@ -700,7 +700,7 @@ describe("canAccept", () => { expect(dataIn.canAccept(collectionOut).canAccept).toBe(true); expect(dataIn.mapOver).toEqual(NULL_COLLECTION_TYPE_DESCRIPTION); }); - it("accepts sample_sheet -> list connection (canMatch)", () => { + it("accepts sample_sheet -> list connection (accepts)", () => { const collectionOut = terminals["sample_sheet input"]!["output"] as OutputCollectionTerminal; const dataIn = terminals["list collection input"]!["input1"] as InputCollectionTerminal; expect(dataIn.mapOver).toBe(NULL_COLLECTION_TYPE_DESCRIPTION); @@ -716,7 +716,7 @@ describe("canAccept", () => { dataIn.connect(collectionOut); expect(dataIn.mapOver).toEqual({ collectionType: "sample_sheet", isCollection: true, rank: 1 }); }); - it("accepts sample_sheet:paired -> list:paired connection (canMatch)", () => { + it("accepts sample_sheet:paired -> list:paired connection (accepts)", () => { const collectionOut = terminals["sample_sheet:paired input"]!["output"] as OutputCollectionTerminal; const dataIn = terminals["list:paired collection input"]!["input1"] as InputCollectionTerminal; expect(dataIn.mapOver).toBe(NULL_COLLECTION_TYPE_DESCRIPTION); @@ -740,7 +740,7 @@ describe("canAccept", () => { dataIn.connect(collectionOut); expect(dataIn.mapOver).toEqual({ collectionType: "sample_sheet", isCollection: true, rank: 1 }); }); - it("accepts sample_sheet:paired_or_unpaired -> list:paired_or_unpaired connection (canMatch)", () => { + it("accepts sample_sheet:paired_or_unpaired -> list:paired_or_unpaired connection (accepts)", () => { const collectionOut = terminals["sample_sheet:paired_or_unpaired input"]!["output"] as OutputCollectionTerminal; const dataIn = terminals["list:paired_or_unpaired collection input"]!["f1"] as InputTerminal; expect(dataIn.mapOver).toBe(NULL_COLLECTION_TYPE_DESCRIPTION); diff --git a/client/src/components/Workflow/Editor/modules/terminals.ts b/client/src/components/Workflow/Editor/modules/terminals.ts index cb4a0cc7d12e..fd262262eec9 100644 --- a/client/src/components/Workflow/Editor/modules/terminals.ts +++ b/client/src/components/Workflow/Editor/modules/terminals.ts @@ -501,19 +501,21 @@ export class InputTerminal extends BaseInputTerminal { ); } } - if (mapOver.isCollection && mapOver.canMatch(otherCollectionType)) { + if (mapOver.isCollection && mapOver.accepts(otherCollectionType)) { return this._producesAcceptableDatatypeAndOptionalness(other); } else if ( this.multiple && - new CollectionTypeDescription("list").append(this.mapOver).canMatch(otherCollectionType) + new CollectionTypeDescription("list").append(this.mapOver).accepts(otherCollectionType) ) { // This handles the special case of a list input being connected to a multiple="true" data input. // Nested lists would be correctly mapped over by the above condition. return this._producesAcceptableDatatypeAndOptionalness(other); } else { // Need to check if this would break constraints... + // Sibling map-over states: use symmetric ``compatible`` so order + // of arrival of sibling inputs doesn't change the answer. const mappingConstraints = this._mappingConstraints(); - if (mappingConstraints.every(otherCollectionType.canMatch.bind(otherCollectionType))) { + if (mappingConstraints.every((constraint) => constraint.compatible(otherCollectionType))) { return this._producesAcceptableDatatypeAndOptionalness(other); } else { if (mapOver.isCollection) { @@ -619,8 +621,8 @@ export class InputCollectionTerminal extends BaseInputTerminal { } _effectiveMapOver(otherCollectionType: CollectionTypeDescriptor) { const collectionTypes = this.collectionTypes; - const canMatch = collectionTypes.some((collectionType) => collectionType.canMatch(otherCollectionType)); - if (!canMatch) { + const directlyAccepted = collectionTypes.some((collectionType) => collectionType.accepts(otherCollectionType)); + if (!directlyAccepted) { for (const collectionTypeIndex in collectionTypes) { const collectionType = collectionTypes[collectionTypeIndex]!; @@ -643,12 +645,16 @@ export class InputCollectionTerminal extends BaseInputTerminal { if (otherCollectionType.isCollection) { const effectiveCollectionTypes = this._effectiveCollectionTypes(); const mapOver = this.mapOver; - const canMatch = effectiveCollectionTypes.some((effectiveCollectionType, i) => { - if (!effectiveCollectionType.canMatch(otherCollectionType)) { + // Defense-in-depth: ``accepts`` carries the sample_sheet asymmetry + // guard, but only when the receiver type itself starts with + // "sample_sheet". A non-null ``localMapOver`` could in principle + // produce an effective type like "list:sample_sheet" that hides + // the guard from ``accepts``. Re-check against the raw declared + // input type to be safe — matches the structure HEAD had inline. + const accepted = effectiveCollectionTypes.some((effectiveCollectionType, i) => { + if (!effectiveCollectionType.accepts(otherCollectionType)) { return false; } - // sample_sheet asymmetry: sample_sheet input requires sample_sheet output, - // but sample_sheet output can satisfy list input. const rawInputType = this.collectionTypes[i]?.collectionType; if ( rawInputType?.startsWith("sample_sheet") && @@ -658,7 +664,7 @@ export class InputCollectionTerminal extends BaseInputTerminal { } return true; }); - if (canMatch) { + if (accepted) { // Only way a direct match... return this._producesAcceptableDatatypeAndOptionalness(other); // Otherwise we need to mapOver @@ -675,26 +681,14 @@ export class InputCollectionTerminal extends BaseInputTerminal { "Can't map over this input with output collection type - this step has outputs defined constraining the mapping of this tool. Disconnect outputs and retry.", ); } - } else if ( - this.collectionTypes.some((collectionType) => { - if (!otherCollectionType.canMapOver(collectionType)) { - return false; - } - // sample_sheet asymmetry: same as canMatch guard above - if ( - collectionType.collectionType?.startsWith("sample_sheet") && - !otherCollectionType.collectionType?.startsWith("sample_sheet") - ) { - return false; - } - return true; - }) - ) { + } else if (this.collectionTypes.some((collectionType) => otherCollectionType.canMapOver(collectionType))) { // we're not mapped over - but hey maybe we could be... lets check. const effectiveMapOver = this._effectiveMapOver(otherCollectionType); // Need to check if this would break constraints... + // Sibling map-over states: use symmetric ``compatible`` so order + // of arrival of sibling inputs doesn't change the answer. const mappingConstraints = this._mappingConstraints(); - if (mappingConstraints.every((d) => effectiveMapOver.canMatch(d))) { + if (mappingConstraints.every((d) => d.compatible(effectiveMapOver))) { return this._producesAcceptableDatatypeAndOptionalness(other); } else { return new ConnectionAcceptable( @@ -857,10 +851,13 @@ export class OutputCollectionTerminal extends BaseOutputTerminal { const otherCollectionType = inputTerminal._otherCollectionType(outputTerminal); // we need to find which of the possible input collection types is connected if ("collectionTypes" in inputTerminal) { - // collection_type_source must point at input collection terminal + // collection_type_source must point at input collection terminal. + // Direction here is input_type.accepts(output_type): the + // receiver is the declared input collection type; the + // argument is the connected output's shape. const connectedCollectionType = inputTerminal.collectionTypes.find( (collectionType) => - otherCollectionType.canMatch(collectionType) || + collectionType.accepts(otherCollectionType) || otherCollectionType.canMapOver(collectionType), ); if (connectedCollectionType) { diff --git a/lib/galaxy/model/dataset_collections/matching.py b/lib/galaxy/model/dataset_collections/matching.py index 600c11892a83..cdf9f0f58add 100644 --- a/lib/galaxy/model/dataset_collections/matching.py +++ b/lib/galaxy/model/dataset_collections/matching.py @@ -62,7 +62,7 @@ def __attempt_add_to_linked_match( self.collections[input_name] = hdca self.subcollection_types[input_name] = subcollection_type else: - if not self.linked_structure.can_match(structure): + if not self.linked_structure.compatible_shape(structure): raise exceptions.MessageException(CANNOT_MATCH_ERROR_MESSAGE) self.collections[input_name] = hdca self.subcollection_types[input_name] = subcollection_type diff --git a/lib/galaxy/model/dataset_collections/query.py b/lib/galaxy/model/dataset_collections/query.py index 3c66b11e3ff5..1e559eff573a 100644 --- a/lib/galaxy/model/dataset_collections/query.py +++ b/lib/galaxy/model/dataset_collections/query.py @@ -63,7 +63,7 @@ def direct_match(self, hdca: HdcaLike) -> bool: collection_type_descriptions = self.collection_type_descriptions if collection_type_descriptions is not None: for collection_type_description in collection_type_descriptions: - matches = collection_type_description.can_match_type(hdca.collection.collection_type) + matches = collection_type_description.accepts(hdca.collection.collection_type) if matches: return True return False @@ -78,6 +78,8 @@ def can_map_over(self, hdca: HdcaLike): hdca_collection_type = hdca.collection.collection_type for collection_type_description in collection_type_descriptions: # See note about the way this is sorted above. - if collection_type_description.is_subcollection_of_type(hdca_collection_type): + factory = collection_type_description.collection_type_description_factory + hdca_type = factory.for_collection_type(hdca_collection_type) + if hdca_type.can_map_over(collection_type_description): return collection_type_description return False diff --git a/lib/galaxy/model/dataset_collections/structure.py b/lib/galaxy/model/dataset_collections/structure.py index 32366409bb52..a4cf1e5a3d2e 100644 --- a/lib/galaxy/model/dataset_collections/structure.py +++ b/lib/galaxy/model/dataset_collections/structure.py @@ -139,8 +139,14 @@ def get_element(collection): def is_leaf(self): return False - def can_match(self, other_structure): - if not self.collection_type_description.can_match_type(other_structure.collection_type_description): + def compatible_shape(self, other_structure): + """Symmetric sibling-matching check. + + Both sides have already passed connection-time edge validation; + here we only compare shape. Uses ``compatible`` (not ``accepts``) + so order of arrival does not change the answer. + """ + if not self.collection_type_description.compatible(other_structure.collection_type_description): return False if len(self.children) != len(other_structure.children): @@ -151,7 +157,7 @@ def can_match(self, other_structure): if my_child[1].is_leaf != other_child[1].is_leaf: return False - if not my_child[1].is_leaf and not my_child[1].can_match(other_child[1]): + if not my_child[1].is_leaf and not my_child[1].compatible_shape(other_child[1]): return False return True @@ -263,7 +269,7 @@ def get_structure( elements below ``leaf_subcollection_type`` are treated as leaves. """ if leaf_subcollection_type: - if not collection_type_description.has_subcollections_of_type(leaf_subcollection_type): + if not collection_type_description.can_map_over(leaf_subcollection_type): # The described collection IS the leaf subcollection (no deeper # structure to strip). Don't enumerate its elements; just record # the type so multiply() can combine it with the mapping structure. diff --git a/lib/galaxy/model/dataset_collections/type_description.py b/lib/galaxy/model/dataset_collections/type_description.py index 7c7f6950a405..d233fb614282 100644 --- a/lib/galaxy/model/dataset_collections/type_description.py +++ b/lib/galaxy/model/dataset_collections/type_description.py @@ -1,3 +1,27 @@ +"""Collection type descriptions and the compatibility algebra. + +Three operations on collection types, each answering a distinct question: + +- ``accepts(other)``: asymmetric direct-edge check. True iff an output + collection of type ``other`` can be connected to an input slot whose + declared type is ``self``. Used at workflow-editor edge validation. + Convention: ``input_type.accepts(output_type)``. +- ``compatible(other)``: symmetric sibling-matching check. True iff two + collection types match such that they could drive a common map-over + over sibling inputs of one tool. Used where neither side is the input + and order of arrival must not change the answer. +- ``can_map_over(other)``: asymmetric nesting check. True iff ``self`` has + proper subcollections of type ``other`` — i.e. ``self`` can be mapped + over to feed a slot expecting ``other``. Convention: + ``output_type.can_map_over(input_type)``. + +The TypeScript equivalents live in +``client/src/components/Workflow/Editor/modules/collectionTypeDescription.ts`` +and must stay in sync (``accepts`` / ``compatible`` / ``canMapOver``). See +``types/collection_semantics.yml`` "Type Compatibility Algebra" for the +lattice diagram and worked examples. +""" + import re from typing import ( Optional, @@ -65,7 +89,7 @@ def effective_collection_type(self, subcollection_type): if hasattr(subcollection_type, "collection_type"): subcollection_type = subcollection_type.collection_type - if not self.has_subcollections_of_type(subcollection_type): + if not self.can_map_over(subcollection_type): raise ValueError(f"Cannot compute effective subcollection type of {subcollection_type} over {self}") if subcollection_type == "single_datasets": @@ -101,17 +125,30 @@ def effective_collection_type(self, subcollection_type): return self.collection_type[: -(len(subcollection_type) + 1)] - def has_subcollections_of_type(self, other_collection_type) -> bool: - """Take in another type (either flat string or another - CollectionTypeDescription) and determine if this collection contains - subcollections matching that type. + def can_map_over(self, other_collection_type) -> bool: + """Asymmetric nesting check: can this collection be mapped over to + feed an input requiring ``other_collection_type``? + + Convention: ``output.can_map_over(input)``. True iff ``self`` has + proper subcollections matching ``other`` — a type is not considered + to map over itself (that's a direct edge, handled by ``accepts``). - The way this is used in map/reduce it seems to make the most sense - for this to return True if these subtypes are proper (i.e. a type - is not considered to have subcollections of its own type). + Mirrors TypeScript ``CollectionTypeDescription.canMapOver``. Naming + kept parallel across languages because both encode the same + operational question at workflow-editor connection time. """ if hasattr(other_collection_type, "collection_type"): other_collection_type = other_collection_type.collection_type + # sample_sheet asymmetry: a sample_sheet input can only be fed by a + # sample_sheet output (a plain-list output lacks the column metadata + # the input expects). ``self`` is the output being mapped over; + # ``other`` is the input collection type. Check before normalization + # (which equates sample_sheet and list). + # Duplicates the asymmetry encoded in ``accepts`` — load-bearing for + # ``multiply`` / ``effective_collection_type`` map-over arithmetic. + # Removing this guard is a separate refactor; see follow-up issue. + if other_collection_type.startswith("sample_sheet") and not self.collection_type.startswith("sample_sheet"): + return False collection_type = _normalize_collection_type(self.collection_type) other_collection_type = _normalize_collection_type(other_collection_type) if collection_type == other_collection_type: @@ -138,14 +175,24 @@ def has_subcollections_of_type(self, other_collection_type) -> bool: return True return False - def is_subcollection_of_type(self, other_collection_type): - if not hasattr(other_collection_type, "collection_type"): - other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type) - return other_collection_type.has_subcollections_of_type(self) + def accepts(self, other_collection_type) -> bool: + """Asymmetric direct-edge check: does an input slot of type ``self`` + accept an output of type ``other_collection_type``? + + Convention: ``input_type.accepts(output_type)``. Used at + workflow-editor edge validation. For sibling-matching (where + neither side is the input slot), use ``compatible`` instead. - def can_match_type(self, other_collection_type) -> bool: + See ``types/collection_semantics.yml`` "Type Compatibility Algebra". + """ if hasattr(other_collection_type, "collection_type"): other_collection_type = other_collection_type.collection_type + # sample_sheet asymmetry: a sample_sheet input is only satisfied by a + # sample_sheet output — a plain-list output lacks the column metadata + # the sample_sheet input expects. Check before normalization (which + # otherwise equates the two). + if self.collection_type.startswith("sample_sheet") and not other_collection_type.startswith("sample_sheet"): + return False collection_type = _normalize_collection_type(self.collection_type) other_collection_type = _normalize_collection_type(other_collection_type) if other_collection_type == collection_type: @@ -161,9 +208,25 @@ def can_match_type(self, other_collection_type) -> bool: if other_collection_type == as_paired_list: return True - # can we push this to the type registry somehow? return False + def compatible(self, other_collection_type) -> bool: + """Symmetric sibling-matching check: do ``self`` and ``other`` match + such that they could drive a common map-over over sibling inputs of + a single tool? + + Implemented as ``self.accepts(other) or other.accepts(self)``. Used + at sibling-matching sites (Python ``Tree.compatible_shape`` at + runtime; TS ``mappingConstraints`` at connection time) where + neither side is the input slot and order of arrival should not + change the answer. + + See ``types/collection_semantics.yml`` "Type Compatibility Algebra". + """ + if not hasattr(other_collection_type, "collection_type"): + other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type) + return self.accepts(other_collection_type) or other_collection_type.accepts(self) + def subcollection_type_description(self): if not self.__has_subcollections: raise ValueError(f"Cannot generate subcollection type description for flat type {self.collection_type}") diff --git a/lib/galaxy/model/dataset_collections/types/collection_semantics.yml b/lib/galaxy/model/dataset_collections/types/collection_semantics.yml index 650e39e8427b..734b5551d921 100644 --- a/lib/galaxy/model/dataset_collections/types/collection_semantics.yml +++ b/lib/galaxy/model/dataset_collections/types/collection_semantics.yml @@ -1041,7 +1041,7 @@ tests: workflow_runtime: framework_test: "collection_semantics_cat_collection_1" - workflow_editor: "accepts sample_sheet -> list connection (canMatch)" + workflow_editor: "accepts sample_sheet -> list connection (accepts)" - doc: | Sub-collection mapping works the same as for ``list`` composites. A @@ -1096,7 +1096,7 @@ tests: workflow_runtime: framework_test: "collection_semantics_list_paired_0" - workflow_editor: "accepts sample_sheet:paired -> list:paired connection (canMatch)" + workflow_editor: "accepts sample_sheet:paired -> list:paired connection (accepts)" - doc: | The ``paired_or_unpaired`` integration rules carry over from ``list`` to @@ -1183,7 +1183,7 @@ tests: workflow_runtime: framework_test: "collection_semantics_list_paired_or_unpaired_1" - workflow_editor: "accepts sample_sheet:paired_or_unpaired -> list:paired_or_unpaired connection (canMatch)" + workflow_editor: "accepts sample_sheet:paired_or_unpaired -> list:paired_or_unpaired connection (accepts)" - doc: | The type matching is asymmetric: a ``sample_sheet`` output can satisfy a ``list`` @@ -1248,3 +1248,104 @@ workflow_runtime: framework_test: "collection_semantics_cat_sample_sheet_0" workflow_editor: "accepts sample_sheet -> sample_sheet connection" + +- doc: "## Type Compatibility Algebra" +- doc: | + This section is for implementers. It describes the three operations that + answer "do these collection types fit together?" — used at workflow + editor connection time and at runtime when sibling inputs are matched + under a common map-over. + + ### The lattice + + The base types form a small subtype lattice: + + ``` + list paired_or_unpaired + | | + sample_sheet paired + ``` + + Edges are subtype relations. A `sample_sheet` value carries column + metadata that a `list` does not, so it can be substituted where a + `list` is required (information is preserved); the reverse is not safe. + A `paired` value always has two elements; `paired_or_unpaired` admits + 1 or 2, so a `paired` can be substituted where `paired_or_unpaired` is + required, but not the reverse. + + Nesting composes. `list:paired_or_unpaired` is a supertype of both + `list:paired` and `list` (the latter via the "single-dataset wrapped + as unpaired" interpretation), so it has two incomparable subtypes. + + ### Three operations + + | Operation | Symmetry | Question | Used at | + |---|---|---|---| + | `accepts(other)` | Asymmetric | Does an input slot of type `self` accept an output of type `other`? | Workflow-editor edge validation | + | `compatible(other)` | Symmetric | Do `self` and `other` match such that they could drive a common map-over over sibling inputs of one tool? | Sibling-matching: matching sibling HDCAs / sibling map-over states under a common mapping | + | `can_map_over(other)` | Asymmetric | Does `self` have proper subcollections of type `other` — i.e. can `self` be mapped over to feed an `other` slot? | Connection-time map-over decisions; runtime `effective_collection_type` arithmetic | + + Conventions: `input_type.accepts(output_type)` for direct edges; + `output_type.can_map_over(input_type)` for map-over. `accepts` and + `can_map_over` differ in that `accepts` is the direct-edge case + where ranks already align, while `can_map_over` is the strict nesting + case where the output has *more* rank than the input. `compatible` + is symmetric and either side may go first. + + `compatible(a, b)` is implemented as `a.accepts(b) or b.accepts(a)`. + + ### Where each is used + + - `accepts` is called at single-edge validation: connecting one output + to one input. The asymmetry between input and output sides matters + here. Examples: `connection_types.can_match`, + `query.HistoryQuery.direct_match`, and the workflow-editor input + attachment paths in `terminals.ts`. + + - `compatible` is called when two collections must drive a common + map-over as siblings. Neither side is the input slot; both are + concrete shapes (observed HDCA shapes at runtime, sibling map-over + states at connection time). Order of arrival must not change the + answer. Examples: Python `Tree.compatible_shape` (matching.py, + execute.py) and the `mappingConstraints` checks in `terminals.ts`. + + - `can_map_over` is called when deciding whether an output of higher + rank can drive a map-over into a lower-rank input — for instance, a + `list:paired` output feeding a `paired` input by iterating the outer + list. The Python and TypeScript names match + (`can_map_over` / `canMapOver`) because it is the same operational + question in both layers. + + Routing a sibling-matching question through `accepts` instead of + `compatible` produces order-dependent behavior — which sibling input + arrived first changes whether the workflow validates. This was a real + bug in earlier revisions of both the Python and TypeScript code. + + ### Worked examples + + - `paired.accepts(paired_or_unpaired)` is `False`. A 1-element + paired_or_unpaired output cannot be connected to an input slot + that strictly requires a pair. Test: `test_paired_accepts_relation`. + + - `paired.compatible(paired_or_unpaired)` is `True`. If both observed + sibling collections happen to align in cardinality, they match for + sibling iteration; cardinality checking happens later at the + children level. Test: + `test_paired_and_paired_or_unpaired_match_symmetric`. + + - `sample_sheet.accepts(list)` is `False`; `list.accepts(sample_sheet)` + is `True`. Tests: + `test_sample_sheet_accepts_relation`, workflow editor + `"rejects list -> sample_sheet connection (asymmetry)"`. + + - `sample_sheet.compatible(list)` and `list.compatible(sample_sheet)` + are both `True`. Tests: `test_compatible`, + `test_tree_compatible_shape_sample_sheet_list_symmetric`. + + ### Cross-language synchronization + + The Python implementation lives in + `lib/galaxy/model/dataset_collections/type_description.py`. The + TypeScript implementation lives in + `client/src/components/Workflow/Editor/modules/collectionTypeDescription.ts`. + Both must stay in sync; method names and conventions are identical. diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py index 6df94dbd5954..b0ddd6c1649e 100644 --- a/lib/galaxy/tools/execute.py +++ b/lib/galaxy/tools/execute.py @@ -572,7 +572,7 @@ def _structure_for_output(self, trans, tool_output): _structure = structure.for_dataset_collection( source_collection.collection, collection_type_description=collection_type_description ) - if structure.can_match(_structure): + if structure.compatible_shape(_structure): structure = _structure return structure diff --git a/test/unit/data/dataset_collections/test_matching.py b/test/unit/data/dataset_collections/test_matching.py index 0528166f1ee0..ce2cd513bbd0 100644 --- a/test/unit/data/dataset_collections/test_matching.py +++ b/test/unit/data/dataset_collections/test_matching.py @@ -42,17 +42,27 @@ def test_valid_collection_subcollection_matching(): assert_can_match((nested_list, "paired"), flat_list) -# can pass a paired input to a paired_or_unpaired input but not vice versa -def test_paired_can_act_as_paired_or_unpaired(): +# Sibling matching is symmetric: paired and paired_or_unpaired can be +# zipped under a common map-over regardless of arrival order. The +# substitution-rejection sentiment (paired_or_unpaired cannot be +# substituted *where paired is required*) is a connection-time concern +# tested in test_type_descriptions.py::test_paired_accepts_relation. +def test_paired_and_paired_or_unpaired_match_symmetric(): paired = pair_instance() optional_paired = paired_or_unpaired_pair_instance() assert_can_match(optional_paired, paired) + assert_can_match(paired, optional_paired) -def test_paired_or_unpaired_cannot_act_as_paired(): +def test_paired_or_unpaired_with_one_element_rejected_against_paired(): + """Cardinality safety: 1-element paired_or_unpaired cannot zip with 2-element paired.""" paired = pair_instance() - optional_paired = paired_or_unpaired_pair_instance() - assert_cannot_match(paired, optional_paired) + one_element_optional = collection_instance( + collection_type="paired_or_unpaired", + elements=[hda_element("unpaired")], + ) + assert_cannot_match(paired, one_element_optional) + assert_cannot_match(one_element_optional, paired) def test_query_can_match_list_to_list(): diff --git a/test/unit/data/dataset_collections/test_structure.py b/test/unit/data/dataset_collections/test_structure.py index 5e210c872b78..5b918a971177 100644 --- a/test/unit/data/dataset_collections/test_structure.py +++ b/test/unit/data/dataset_collections/test_structure.py @@ -1,6 +1,7 @@ from galaxy.model.dataset_collections.structure import get_structure from galaxy.model.dataset_collections.type_description import CollectionTypeDescriptionFactory from .test_matching import ( + list_instance, list_of_lists_instance, list_of_paired_and_unpaired_instance, list_paired_instance, @@ -65,6 +66,34 @@ def test_get_structure_list_paired_or_unpaired_over_paired_or_unpaired(): assert tree.children[0][1].is_leaf +def test_tree_compatible_shape_sample_sheet_list_symmetric(): + """Tree.compatible_shape must be symmetric and shape-only. + + By the time matching runs (matching.py:65, execute.py:575) both sides + have already passed connection-time edge validation. Two collections + of equal shape and cardinality must match regardless of which sibling + input was processed first as ``linked_structure``. + """ + sample_sheet_td = factory.for_collection_type("sample_sheet") + list_td = factory.for_collection_type("list") + ss_tree = get_structure(list_instance(collection_type="sample_sheet").collection, sample_sheet_td) + list_tree = get_structure(list_instance(collection_type="list").collection, list_td) + assert ss_tree.compatible_shape(list_tree) + assert list_tree.compatible_shape(ss_tree) + + +def test_tree_compatible_shape_sample_sheet_paired_list_paired_symmetric(): + """Same symmetry one rank deeper.""" + ss_paired_td = factory.for_collection_type("sample_sheet:paired") + list_paired_td = factory.for_collection_type("list:paired") + ss = list_paired_instance() + ss.collection.collection_type = "sample_sheet:paired" + ss_tree = get_structure(ss.collection, ss_paired_td) + list_tree = get_structure(list_paired_instance().collection, list_paired_td) + assert ss_tree.compatible_shape(list_tree) + assert list_tree.compatible_shape(ss_tree) + + def test_get_structure_list_of_lists_over_single_datasests(): list_of_lists_type_description = factory.for_collection_type("list:list") tree = get_structure(list_of_lists_instance().collection, list_of_lists_type_description, "single_datasets") diff --git a/test/unit/data/dataset_collections/test_type_descriptions.py b/test/unit/data/dataset_collections/test_type_descriptions.py index a20410cc82ad..f80da8f900bc 100644 --- a/test/unit/data/dataset_collections/test_type_descriptions.py +++ b/test/unit/data/dataset_collections/test_type_descriptions.py @@ -13,10 +13,10 @@ def c_t(collection_type: str): def test_simple_descriptions(): nested_type_description = c_t("list:paired") paired_type_description = c_t("paired") - assert not nested_type_description.has_subcollections_of_type("list") - assert not nested_type_description.has_subcollections_of_type("list:paired") - assert nested_type_description.has_subcollections_of_type("paired") - assert nested_type_description.has_subcollections_of_type(paired_type_description) + assert not nested_type_description.can_map_over("list") + assert not nested_type_description.can_map_over("list:paired") + assert nested_type_description.can_map_over("paired") + assert nested_type_description.can_map_over(paired_type_description) assert nested_type_description.has_subcollections() assert not paired_type_description.has_subcollections() assert paired_type_description.rank_collection_type() == "paired" @@ -30,62 +30,109 @@ def test_simple_descriptions(): def test_paired_or_unpaired_handling(): list_type_description = c_t("list") - assert list_type_description.has_subcollections_of_type("paired_or_unpaired") + assert list_type_description.can_map_over("paired_or_unpaired") paired_type_description = c_t("paired") - assert not paired_type_description.has_subcollections_of_type("paired_or_unpaired") + assert not paired_type_description.can_map_over("paired_or_unpaired") nested_type_description = factory.for_collection_type("list:paired") - assert nested_type_description.has_subcollections_of_type("paired_or_unpaired") + assert nested_type_description.can_map_over("paired_or_unpaired") nested_list_type_description = factory.for_collection_type("list:list") - assert nested_list_type_description.has_subcollections_of_type("paired_or_unpaired") + assert nested_list_type_description.can_map_over("paired_or_unpaired") mixed_list_type_description = factory.for_collection_type("list:paired_or_unpaired") - assert mixed_list_type_description.can_match_type("list:paired_or_unpaired") - assert mixed_list_type_description.can_match_type("list:paired") - assert mixed_list_type_description.can_match_type("list") + assert mixed_list_type_description.accepts("list:paired_or_unpaired") + assert mixed_list_type_description.accepts("list:paired") + assert mixed_list_type_description.accepts("list") -def test_sample_sheet_acts_like_list(): - """sample_sheet should behave like list for mapping/matching purposes.""" +def test_sample_sheet_accepts_relation(): + """sample_sheet -> list matches; list -> sample_sheet does not. + + A sample_sheet candidate carries list-like structure plus column metadata, + so it can satisfy a list-shaped requirement. A plain list candidate + cannot satisfy a sample_sheet-shaped requirement because the column + metadata is absent. ``accepts`` / ``can_map_over`` follow + the convention ``requirement.accepts(candidate)`` / + ``output.can_map_over(input)``. + """ sample_sheet = c_t("sample_sheet") sample_sheet_paired = c_t("sample_sheet:paired") sample_sheet_paired_or_unpaired = c_t("sample_sheet:paired_or_unpaired") list_type = c_t("list") paired_type = c_t("paired") - # sample_sheet matches list - assert sample_sheet.can_match_type("list") - assert sample_sheet.can_match_type(list_type) - assert list_type.can_match_type("sample_sheet") - - # sample_sheet:paired matches list:paired - assert sample_sheet_paired.can_match_type("list:paired") - assert c_t("list:paired").can_match_type("sample_sheet:paired") - - # sample_sheet:paired_or_unpaired matches list:paired_or_unpaired - assert sample_sheet_paired_or_unpaired.can_match_type("list:paired_or_unpaired") - # and can match list:paired and list (like list:paired_or_unpaired does) - assert sample_sheet_paired_or_unpaired.can_match_type("list:paired") - assert sample_sheet_paired_or_unpaired.can_match_type("list") - assert sample_sheet_paired_or_unpaired.can_match_type("sample_sheet") - assert sample_sheet_paired_or_unpaired.can_match_type("sample_sheet:paired") - - # sample_sheet:paired has subcollections of type paired - assert sample_sheet_paired.has_subcollections_of_type("paired") - assert sample_sheet_paired.has_subcollections_of_type(paired_type) + # list requirement is satisfied by a sample_sheet candidate + assert list_type.accepts("sample_sheet") + assert c_t("list:paired").accepts("sample_sheet:paired") + assert c_t("list:paired_or_unpaired").accepts("sample_sheet:paired_or_unpaired") + + # sample_sheet requirement is NOT satisfied by a plain list candidate + assert not sample_sheet.accepts("list") + assert not sample_sheet.accepts(list_type) + assert not sample_sheet_paired.accepts("list:paired") + assert not sample_sheet_paired_or_unpaired.accepts("list:paired_or_unpaired") + assert not sample_sheet_paired_or_unpaired.accepts("list:paired") + assert not sample_sheet_paired_or_unpaired.accepts("list") + + # sample_sheet <-> sample_sheet still works + assert sample_sheet.accepts("sample_sheet") + assert sample_sheet_paired.accepts("sample_sheet:paired") + assert sample_sheet_paired_or_unpaired.accepts("sample_sheet") + assert sample_sheet_paired_or_unpaired.accepts("sample_sheet:paired") + + # sample_sheet:paired has subcollections of type paired (plain input, OK) + assert sample_sheet_paired.can_map_over("paired") + assert sample_sheet_paired.can_map_over(paired_type) # sample_sheet has subcollections of type paired_or_unpaired (like list does) - assert sample_sheet.has_subcollections_of_type("paired_or_unpaired") + assert sample_sheet.can_map_over("paired_or_unpaired") # sample_sheet does NOT have subcollections of itself - assert not sample_sheet.has_subcollections_of_type("sample_sheet") - assert not sample_sheet.has_subcollections_of_type("list") + assert not sample_sheet.can_map_over("sample_sheet") + assert not sample_sheet.can_map_over("list") + + # Map-over asymmetry: a plain list:* output cannot be mapped over a + # sample_sheet-variant input (lacks column metadata). + assert not c_t("list:list").can_map_over("sample_sheet") + assert not c_t("list:list:paired").can_map_over("sample_sheet:paired") + # but a sample_sheet:* output CAN map over a plain-list-variant input + assert c_t("sample_sheet:paired").can_map_over("paired") # effective collection type works correctly assert sample_sheet_paired.effective_collection_type(paired_type) == "sample_sheet" +def test_paired_accepts_relation(): + """paired_or_unpaired requirement is satisfied by paired candidate; reverse is not.""" + assert c_t("paired_or_unpaired").accepts("paired") + assert not c_t("paired").accepts("paired_or_unpaired") + # nested form + assert c_t("list:paired_or_unpaired").accepts("list:paired") + assert not c_t("list:paired").accepts("list:paired_or_unpaired") + + +def test_compatible(): + """``compatible`` is symmetric — order does not matter.""" + # same type + assert c_t("list").compatible("list") + assert c_t("paired").compatible("paired") + + # subtype pair (either order) + assert c_t("paired").compatible("paired_or_unpaired") + assert c_t("paired_or_unpaired").compatible("paired") + assert c_t("list").compatible("sample_sheet") + assert c_t("sample_sheet").compatible("list") + assert c_t("list:paired").compatible("sample_sheet:paired") + assert c_t("sample_sheet:paired").compatible("list:paired") + + # disjoint types + assert not c_t("paired").compatible("list") + assert not c_t("list").compatible("paired") + assert not c_t("list:paired").compatible("list:list") + assert not c_t("list:list").compatible("list:paired") + + def test_effective_collection_type_paired_or_unpaired_over_paired(): """Test effective_collection_type when paired_or_unpaired maps over paired.""" assert c_t("list:paired").effective_collection_type("paired_or_unpaired") == "list" @@ -102,50 +149,50 @@ def test_endswith_colon_boundary(): """ # list:paired_or_unpaired does NOT have subcollections of type paired # PAIRED_OR_UNPAIRED_NOT_CONSUMED_BY_PAIRED_WHEN_MAPPING - assert not c_t("list:paired_or_unpaired").has_subcollections_of_type("paired") + assert not c_t("list:paired_or_unpaired").can_map_over("paired") # But list:paired DOES have subcollections of type paired (proper boundary) - assert c_t("list:paired").has_subcollections_of_type("paired") + assert c_t("list:paired").can_map_over("paired") # And list:list:paired_or_unpaired does NOT have subcollections of type paired - assert not c_t("list:list:paired_or_unpaired").has_subcollections_of_type("paired") + assert not c_t("list:list:paired_or_unpaired").can_map_over("paired") # Existing cases still work - assert c_t("list:list:paired").has_subcollections_of_type("paired") - assert c_t("list:list:paired").has_subcollections_of_type("list:paired") - assert not c_t("list:list:paired").has_subcollections_of_type("list") + assert c_t("list:list:paired").can_map_over("paired") + assert c_t("list:list:paired").can_map_over("list:paired") + assert not c_t("list:list:paired").can_map_over("list") -def test_compound_paired_or_unpaired_has_subcollections(): - """Test compound :paired_or_unpaired suffix in has_subcollections_of_type. +def test_compound_paired_or_unpaired_can_map_over(): + """Test compound :paired_or_unpaired suffix in can_map_over. Covers collection_semantics.yml examples: - MAPPING_LIST_LIST_OVER_LIST_PAIRED_OR_UNPAIRED - MAPPING_LIST_LIST_PAIRED_OVER_PAIRED_OR_UNPAIRED (via compound path) """ # list:list can map over list:paired_or_unpaired - assert c_t("list:list").has_subcollections_of_type("list:paired_or_unpaired") + assert c_t("list:list").can_map_over("list:paired_or_unpaired") # list:list:paired can map over list:paired_or_unpaired # (paired consumed by paired_or_unpaired, higher ranks list == list) - assert c_t("list:list:paired").has_subcollections_of_type("list:paired_or_unpaired") + assert c_t("list:list:paired").can_map_over("list:paired_or_unpaired") # list:list:list can map over list:paired_or_unpaired - assert c_t("list:list:list").has_subcollections_of_type("list:paired_or_unpaired") + assert c_t("list:list:list").can_map_over("list:paired_or_unpaired") # list:paired cannot map over list:paired_or_unpaired — same rank, # after stripping :paired the higher ranks are equal (no remainder) - assert not c_t("list:paired").has_subcollections_of_type("list:paired_or_unpaired") + assert not c_t("list:paired").can_map_over("list:paired_or_unpaired") # list cannot map over list:paired_or_unpaired — lower rank - assert not c_t("list").has_subcollections_of_type("list:paired_or_unpaired") + assert not c_t("list").can_map_over("list:paired_or_unpaired") # paired cannot map over list:paired_or_unpaired — higher ranks don't align - assert not c_t("paired").has_subcollections_of_type("list:paired_or_unpaired") + assert not c_t("paired").can_map_over("list:paired_or_unpaired") # paired:paired cannot map over list:paired_or_unpaired — higher ranks # don't align (paired != list) - assert not c_t("paired:paired").has_subcollections_of_type("list:paired_or_unpaired") + assert not c_t("paired:paired").can_map_over("list:paired_or_unpaired") def test_compound_paired_or_unpaired_effective_collection_type():