Skip to content

Commit 7f3823b

Browse files
committed
LazyToolBox: drop _tools_by_old_id bucket + lock remove_tool_by_id
``test_list_credentials_with_missing_tool`` calls ``remove_tool_by_id`` and asserts the next ``get_tool`` returns None. Verified locally that a concurrent in-flight HTTP-thread ``_load_tool_on_demand`` would register a fresh Tool in ``_tools_by_old_id[tool_id]`` *before* MainThread acquired the lock; ``super().remove_tool_by_id`` calls ``self._tools_by_old_id[old_id].remove(tool)`` which removes the single Tool MainThread had a reference to but leaves the sibling Tool registered by the concurrent loader. The eager ``super().get_tool`` fall-through then resurrects it via ``rval.extend(self._tools_by_old_id[tool_id])``. Two fixes (verified locally with ``./run_tests.sh -integration ... test_list_credentials_with_missing_tool`` under ``GALAXY_CONFIG_OVERRIDE_USE_LAZY_TOOLBOX=true``: passes): - Wrap ``remove_tool_by_id`` in ``app._toolbox_lock`` so the cleanup runs atomically against ``_load_tool_on_demand`` / ``_register_loaded_tool``. - Drop the entire ``_tools_by_old_id[tool_id]`` bucket — even with the lock, a load that completed *before* the lock was acquired left a sibling Tool the eager fall-through could find.
1 parent d956189 commit 7f3823b

1 file changed

Lines changed: 45 additions & 25 deletions

File tree

lib/galaxy/tools/lazy_toolbox.py

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,32 +1908,52 @@ def remove_tool_by_id(self, tool_id: str, remove_from_panel: bool = True):
19081908
tool effectively comes back. The eager toolbox doesn't have this
19091909
problem because the tool object isn't created from a serialised
19101910
store. Mirror the deletion across the two backing stores.
1911-
"""
1912-
# Force-materialise so the eager parent's bookkeeping (``_tools_by_old_id``,
1913-
# panel removal, lineage, tool cache expiry) gets a real Tool to work
1914-
# against.
1915-
if self._tools_by_id.get(tool_id) is None:
1916-
self.get_tool(tool_id=tool_id)
1917-
result = super().remove_tool_by_id(tool_id, remove_from_panel=remove_from_panel)
1918-
if self._tool_index is not None:
1919-
self._tool_index.entries.pop(tool_id, None)
1920-
self._tool_index.entries_by_version.pop(tool_id, None)
1921-
# ``super().remove_tool_by_id`` clears ``_tools_by_id`` but leaves
1922-
# ``_tool_versions_by_id`` and the lineage map intact. ``get_tool``'s
1923-
# fall-through walks lineage versions via ``_tool_from_lineage_version``
1924-
# which reads ``_tool_versions_by_id``, so the tool would otherwise
1925-
# come back from there even after removal.
1926-
self._tool_versions_by_id.pop(tool_id, None)
1927-
if hasattr(self, "_lineage_map"):
1928-
from galaxy.util.tool_version import remove_version_from_guid
19291911
1930-
self._lineage_map.lineage_map.pop(tool_id, None)
1931-
versionless = remove_version_from_guid(tool_id)
1932-
if versionless:
1933-
self._lineage_map.lineage_map.pop(versionless, None)
1934-
with self._cache_lock:
1935-
for key in [k for k in self._tool_object_cache.keys() if k.startswith(f"{tool_id}:")]:
1936-
self._tool_object_cache.pop(key, None)
1912+
Wrapped in ``app._toolbox_lock`` so the cleanup is atomic
1913+
against concurrent ``_load_tool_on_demand`` calls — without
1914+
the lock, an in-flight HTTP request thread that's mid-way
1915+
through ``_register_loaded_tool`` can re-populate
1916+
``_tool_versions_by_id`` / ``_lineage_map.lineage_map`` /
1917+
``_tools_by_id`` after this method has already cleared them,
1918+
leaving the tool resurrectable via the eager super().get_tool
1919+
fall-through path that walks lineage versions via
1920+
``_tool_from_lineage_version``.
1921+
"""
1922+
with self.app._toolbox_lock:
1923+
# Force-materialise so the eager parent's bookkeeping (``_tools_by_old_id``,
1924+
# panel removal, lineage, tool cache expiry) gets a real Tool to work
1925+
# against.
1926+
if self._tools_by_id.get(tool_id) is None:
1927+
self.get_tool(tool_id=tool_id)
1928+
result = super().remove_tool_by_id(tool_id, remove_from_panel=remove_from_panel)
1929+
if self._tool_index is not None:
1930+
self._tool_index.entries.pop(tool_id, None)
1931+
self._tool_index.entries_by_version.pop(tool_id, None)
1932+
# ``super().remove_tool_by_id`` clears ``_tools_by_id`` but leaves
1933+
# ``_tool_versions_by_id`` and the lineage map intact. ``get_tool``'s
1934+
# fall-through walks lineage versions via ``_tool_from_lineage_version``
1935+
# which reads ``_tool_versions_by_id``, so the tool would otherwise
1936+
# come back from there even after removal.
1937+
self._tool_versions_by_id.pop(tool_id, None)
1938+
if hasattr(self, "_lineage_map"):
1939+
from galaxy.util.tool_version import remove_version_from_guid
1940+
1941+
self._lineage_map.lineage_map.pop(tool_id, None)
1942+
versionless = remove_version_from_guid(tool_id)
1943+
if versionless:
1944+
self._lineage_map.lineage_map.pop(versionless, None)
1945+
# ``super().remove_tool_by_id`` removes a single Tool object
1946+
# from ``_tools_by_old_id[old_id]``, but if a concurrent
1947+
# ``_register_loaded_tool`` (e.g. an in-flight HTTP request
1948+
# that beat us to the lock) appended a sibling Tool to the
1949+
# same bucket, the sibling survives and the eager
1950+
# super().get_tool fall-through returns it via
1951+
# ``rval.extend(self._tools_by_old_id[tool_id])``. Drop the
1952+
# whole bucket for this tool_id to match the index removal.
1953+
self._tools_by_old_id.pop(tool_id, None)
1954+
with self._cache_lock:
1955+
for key in [k for k in self._tool_object_cache.keys() if k.startswith(f"{tool_id}:")]:
1956+
self._tool_object_cache.pop(key, None)
19371957
return result
19381958

19391959
@property

0 commit comments

Comments
 (0)