Merge branch 'release_26.0' into dev

mvdbeek · mvdbeek · commit d1e450a5c1ee · 2026-05-02T10:14:57.000+02:00
diff --git a/lib/galaxy/authnz/managers.py b/lib/galaxy/authnz/managers.py
@@ -15,6 +15,7 @@
     AuthForbidden,
     AuthTokenError,
 )
+from social_core.utils import module_member
 
 from galaxy import (
     exceptions,
@@ -34,6 +35,7 @@
     resource_path,
 )
 from .psa_authnz import (
+    BACKENDS,
     BACKENDS_NAME,
     PSAAuthnz,
 )
@@ -161,6 +163,21 @@ def _parse_oidc_backends_config(self, config_file):
 
             if len(self.oidc_backends_config) == 0:
                 raise exceptions.ConfigurationError("No valid provider configuration parsed.")
+            # Force-import each configured backend so missing conditional
+            # dependencies (e.g. pkce) fail Galaxy startup with an actionable
+            # error instead of surfacing as a silent 401 on first OIDC login
+            # (issue #22502).
+            for idp in self.oidc_backends_config:
+                try:
+                    module_member(BACKENDS[idp])
+                except ImportError as e:
+                    raise exceptions.ConfigurationError(
+                        f"Failed to import OIDC backend for provider '{idp}' "
+                        f"({BACKENDS[idp]}): {e}. This typically indicates a "
+                        "missing conditional dependency; re-run Galaxy's "
+                        "common_startup or install requirements from "
+                        "lib/galaxy/dependencies/conditional-requirements.txt."
+                    )
         except ImportError:
             raise
         except (etree.ParseError, exceptions.ConfigurationError) as e:
diff --git a/lib/galaxy/datatypes/tabular.py b/lib/galaxy/datatypes/tabular.py
@@ -165,6 +165,10 @@ def _read_chunk(self, trans, dataset: HasFileName, offset: int, ck_size: Optiona
                         cursor = f.read(1)
             except UnicodeDecodeError:
                 raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.")
+            except EOFError:
+                raise InvalidFileFormatError(
+                    "Dataset appears to be a truncated or corrupt compressed file, cannot display."
+                )
             last_read = f.tell()
         return ck_data, last_read
 
diff --git a/lib/galaxy/dependencies/__init__.py b/lib/galaxy/dependencies/__init__.py
@@ -131,16 +131,15 @@ def collect_types(from_dict):
         except OSError:
             pass
 
-        # Parse oidc_backends_config_file specifically for PKCE support.
-        self.pkce_support = False
-        oidc_backend_conf_xml = self.config_object.oidc_backends_config_file
-        try:
-            for pkce_support_element in parse_xml(oidc_backend_conf_xml).iterfind("./provider/pkce_support"):
-                if pkce_support_element.text == "true":
-                    self.pkce_support = True
-                    break
-        except OSError:
-            pass
+        # Install pkce whenever OIDC is in use. PKCE can be toggled per-provider
+        # in oidc_backends_config.xml at runtime, so tying the install decision
+        # to any top-level OIDC signal avoids rebuilding the venv when
+        # pkce_support is flipped (issue #22502).
+        self.oidc_active = (
+            asbool(self.config.get("enable_oidc", False))
+            or bool(self.config.get("oidc_auth_pipeline"))
+            or bool(self.config.get("oidc_auth_pipeline_extra"))
+        )
 
         # Parse error report config
         error_report_yml = self.config_object.error_report_file
@@ -325,7 +324,7 @@ def check_hvac(self):
         return "hashicorp" == self.vault_type
 
     def check_pkce(self):
-        return self.pkce_support
+        return self.oidc_active
 
     def check_rucio_clients(self):
         return "rucio" in self.object_stores
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
@@ -3881,6 +3881,14 @@ def _add_datasets_to_history(self, history, elements, datasets_visible=False):
                 element_object.visible = datasets_visible
                 history.stage_addition(element_object)
 
+    @staticmethod
+    def _read_text_file_lines(path: str, size_hint: int = 10000000) -> list[str]:
+        try:
+            with open(path) as fh:
+                return fh.readlines(size_hint)
+        except UnicodeDecodeError:
+            raise exceptions.MessageException("Please provide the file as valid UTF-8.")
+
     def produce_outputs(self, trans: "ProvidesUserContext", out_data, output_collections, incoming, history, **kwds):
         return self._outputs_dict()
 
@@ -4505,9 +4513,9 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
                 old_elements_dict = {}
                 for element in elements:
                     old_elements_dict[element.element_identifier] = element
+                sort_lines = self._read_text_file_lines(hda.get_file_name())
                 try:
-                    with open(hda.get_file_name()) as fh:
-                        sorted_elements = [old_elements_dict[line.strip()] for line in fh]
+                    sorted_elements = [old_elements_dict[line.strip()] for line in sort_lines]
                 except KeyError:
                     hdca_history_name = f"{hdca.hid}: {hdca.name}"
                     message = f"List of element identifiers does not match element identifiers in collection '{hdca_history_name}'"
@@ -4709,8 +4717,7 @@ def add_copied_value_to_new_elements(new_label, dce_object, columns):
             new_rows[new_label] = columns
 
         new_labels_path = new_labels_dataset_assoc.get_file_name()
-        with open(new_labels_path) as fh:
-            new_labels = fh.readlines(1024 * 1000000)
+        new_labels = self._read_text_file_lines(new_labels_path)
         if strict and len(hdca.collection.elements) != len(new_labels):
             raise exceptions.MessageException("Relabel mapping file contains incorrect number of identifiers")
         if how_type in ["tabular", "tabular_extended"]:
@@ -4866,8 +4873,7 @@ def add_copied_value_to_new_elements(new_tags_dict, dce):
             new_elements[dce.element_identifier] = copied_value
 
         new_tags_path = new_tags_dataset_assoc.get_file_name()
-        with open(new_tags_path) as fh:
-            new_tags = fh.readlines(1024 * 1000000)
+        new_tags = self._read_text_file_lines(new_tags_path)
         # We have a tabular file, where the first column is an existing element identifier,
         # and the remaining columns represent new tags.
         source_new_tags = (line.strip().split("\t") for line in new_tags)
@@ -4898,8 +4904,7 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
         discarded_rows = {}
 
         filtered_path = filter_dataset_assoc.get_file_name()
-        with open(filtered_path) as fh:
-            filtered_identifiers = [i.strip() for i in fh.readlines(1024 * 1000000)]
+        filtered_identifiers = [i.strip() for i in self._read_text_file_lines(filtered_path)]
 
         # If filtered_dataset_assoc is not a two-column tabular dataset we label with the current line of the dataset
         for dce in hdca.collection.elements:
diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py
@@ -6290,6 +6290,47 @@ def test_workflow_failed_with_message_exception(self, history_id):
         assert message["workflow_step_id"] == 2
         assert "Invalid new collection identifier" in message["details"]
 
+    @skip_without_tool("__RELABEL_FROM_FILE__")
+    def test_relabel_from_file_rejects_non_utf8_labels(self, history_id):
+        # Regression test: a non-UTF-8 labels file (for example UTF-16) should
+        # fail with a clear MessageException rather than crash on decode.
+        summary = self._run_workflow(
+            """
+class: GalaxyWorkflow
+inputs:
+  input_collection:
+    collection_type: list
+    type: collection
+  relabel_file:
+    type: data
+steps:
+  relabel:
+    tool_id: __RELABEL_FROM_FILE__
+    in:
+      input: input_collection
+      how|labels: relabel_file
+test_data:
+  input_collection:
+    collection_type: list
+    elements:
+      - identifier: A
+        content: "alpha"
+      - identifier: B
+        content: "beta"
+  relabel_file:
+    value: random-file
+    type: File
+        """,
+            history_id=history_id,
+            assert_ok=False,
+            wait=True,
+        )
+        invocation_details = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True)
+        assert invocation_details["state"] == "failed"
+        assert len(invocation_details["messages"]) == 1
+        message = invocation_details["messages"][0]
+        assert "UTF-8" in message["details"]
+
     @skip_without_tool("__RELABEL_FROM_FILE__")
     @skip_without_tool("job_properties")
     @skip_without_tool("cat1")
diff --git a/test/unit/app/dependencies/test_deps.py b/test/unit/app/dependencies/test_deps.py
@@ -109,6 +109,36 @@ def test_vault_hashicorp_configured():
         assert cds.check_hvac()
 
 
+def test_pkce_default_disabled():
+    with _config_context() as cc:
+        cds = cc.get_cond_deps()
+        assert cds.check_pkce() is False
+
+
+def test_pkce_enabled_when_enable_oidc():
+    with _config_context() as cc:
+        cds = cc.get_cond_deps(config={"enable_oidc": True})
+        assert cds.check_pkce() is True
+
+
+def test_pkce_disabled_when_enable_oidc_off():
+    with _config_context() as cc:
+        cds = cc.get_cond_deps(config={"enable_oidc": False})
+        assert cds.check_pkce() is False
+
+
+def test_pkce_enabled_via_auth_pipeline():
+    with _config_context() as cc:
+        cds = cc.get_cond_deps(config={"oidc_auth_pipeline": ["galaxy.authnz.psa_authnz.verify"]})
+        assert cds.check_pkce() is True
+
+
+def test_pkce_enabled_via_auth_pipeline_extra():
+    with _config_context() as cc:
+        cds = cc.get_cond_deps(config={"oidc_auth_pipeline_extra": ["galaxy.authnz.psa_authnz.verify"]})
+        assert cds.check_pkce() is True
+
+
 @pytest.mark.parametrize(
     "config,expected",
     [