From 36ed36c93b55dda9fff973c8184d223d833902f3 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 10 Dec 2024 16:47:23 +0100 Subject: [PATCH 1/4] Properly purge state groups tables when purging a room --- changelog.d/18024.bugfix | 1 + synapse/storage/controllers/purge_events.py | 3 +- .../storage/databases/main/purge_events.py | 71 ++++++++++++------- synapse/storage/databases/state/store.py | 58 --------------- tests/rest/admin/test_room.py | 2 +- 5 files changed, 48 insertions(+), 87 deletions(-) create mode 100644 changelog.d/18024.bugfix diff --git a/changelog.d/18024.bugfix b/changelog.d/18024.bugfix new file mode 100644 index 00000000000..956f43f0362 --- /dev/null +++ b/changelog.d/18024.bugfix @@ -0,0 +1 @@ +Properly purge state groups tables when purging a room with the admin API. diff --git a/synapse/storage/controllers/purge_events.py b/synapse/storage/controllers/purge_events.py index e794b370c25..b65be2257ce 100644 --- a/synapse/storage/controllers/purge_events.py +++ b/synapse/storage/controllers/purge_events.py @@ -42,8 +42,7 @@ async def purge_room(self, room_id: str) -> None: """Deletes all record of a room""" with nested_logging_context(room_id): - state_groups_to_delete = await self.stores.main.purge_room(room_id) - await self.stores.state.purge_room_state(room_id, state_groups_to_delete) + await self.stores.main.purge_room(room_id) async def purge_history( self, room_id: str, token: str, delete_local_events: bool diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index c195685af83..a526e12ab32 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -20,7 +20,7 @@ # import logging -from typing import Any, List, Set, Tuple, cast +from typing import Any, Set, Tuple, cast from synapse.api.errors import SynapseError from synapse.storage.database import LoggingTransaction @@ -332,7 +332,7 @@ def _purge_history_txn( return referenced_state_groups - async def purge_room(self, room_id: str) -> List[int]: + async def purge_room(self, room_id: str) -> None: """Deletes all record of a room Args: @@ -348,7 +348,7 @@ async def purge_room(self, room_id: str) -> List[int]: # purge any of those rows which were added during the first. logger.info("[purge] Starting initial main purge of [1/2]") - state_groups_to_delete = await self.db_pool.runInteraction( + await self.db_pool.runInteraction( "purge_room", self._purge_room_txn, room_id=room_id, @@ -356,18 +356,15 @@ async def purge_room(self, room_id: str) -> List[int]: ) logger.info("[purge] Starting secondary main purge of [2/2]") - state_groups_to_delete.extend( - await self.db_pool.runInteraction( - "purge_room", - self._purge_room_txn, - room_id=room_id, - ), + await self.db_pool.runInteraction( + "purge_room", + self._purge_room_txn, + room_id=room_id, ) - logger.info("[purge] Done with main purge") - return state_groups_to_delete + logger.info("[purge] Done with main purge") - def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: + def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> None: # This collides with event persistence so we cannot write new events and metadata into # a room while deleting it or this transaction will fail. if isinstance(self.database_engine, PostgresEngine): @@ -381,19 +378,6 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: # take a while! txn.execute("SET LOCAL statement_timeout = 0") - # First, fetch all the state groups that should be deleted, before - # we delete that information. - txn.execute( - """ - SELECT DISTINCT state_group FROM events - INNER JOIN event_to_state_groups USING(event_id) - WHERE events.room_id = ? - """, - (room_id,), - ) - - state_groups = [row[0] for row in txn] - # Get all the auth chains that are referenced by events that are to be # deleted. txn.execute( @@ -489,6 +473,8 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: logger.info("[purge] removing from %s", table) txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,)) + self._purge_room_state_txn(txn, room_id) + # Other tables we do NOT need to clear out: # # - blocked_rooms @@ -514,4 +500,37 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: self._invalidate_caches_for_room_and_stream(txn, room_id) - return state_groups + def _purge_room_state_txn( + self, + txn: LoggingTransaction, + room_id: str, + ) -> None: + # Delete all edges that reference a state group linked to room_id + logger.info("[purge] removing %s from state_group_edges", room_id) + txn.execute( + """ + DELETE FROM state_group_edges AS sge WHERE sge.state_group IN ( + SELECT id FROM state_groups AS sg WHERE sg.room_id = ? + )""", + (room_id,), + ) + + # state_groups_state table has a room_id column but no index on it, unlike state_groups, + # so we delete them by matching the room_id through the state_groups table. + logger.info("[purge] removing %s from state_groups_state", room_id) + txn.execute( + """ + DELETE FROM state_groups_state AS sgs WHERE sgs.state_group IN ( + SELECT id FROM state_groups AS sg WHERE sg.room_id = ? + )""", + (room_id,), + ) + + logger.info("[purge] removing %s from state_groups", room_id) + self.db_pool.simple_delete_many_txn( + txn, + table="state_groups", + column="room_id", + values=[room_id], + keyvalues={}, + ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index f7a59c8992d..871253e2831 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -839,61 +839,3 @@ async def get_previous_state_groups( ) return dict(rows) - - async def purge_room_state( - self, room_id: str, state_groups_to_delete: Collection[int] - ) -> None: - """Deletes all record of a room from state tables - - Args: - room_id: - state_groups_to_delete: State groups to delete - """ - - logger.info("[purge] Starting state purge") - await self.db_pool.runInteraction( - "purge_room_state", - self._purge_room_state_txn, - room_id, - state_groups_to_delete, - ) - logger.info("[purge] Done with state purge") - - def _purge_room_state_txn( - self, - txn: LoggingTransaction, - room_id: str, - state_groups_to_delete: Collection[int], - ) -> None: - # first we have to delete the state groups states - logger.info("[purge] removing %s from state_groups_state", room_id) - - self.db_pool.simple_delete_many_txn( - txn, - table="state_groups_state", - column="state_group", - values=state_groups_to_delete, - keyvalues={}, - ) - - # ... and the state group edges - logger.info("[purge] removing %s from state_group_edges", room_id) - - self.db_pool.simple_delete_many_txn( - txn, - table="state_group_edges", - column="state_group", - values=state_groups_to_delete, - keyvalues={}, - ) - - # ... and the state groups - logger.info("[purge] removing %s from state_groups", room_id) - - self.db_pool.simple_delete_many_txn( - txn, - table="state_groups", - column="id", - values=state_groups_to_delete, - keyvalues={}, - ) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 95ed7364510..99df5915290 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -3050,7 +3050,7 @@ def _block_room(self, room_id: str) -> None: "pusher_throttle", "room_account_data", "room_tags", - # "state_groups", # Current impl leaves orphaned state groups around. + "state_groups", "state_groups_state", "federation_inbound_events_staging", ] From 73d59306ae01e18f9b5d3c958c7e6ce21063a0e5 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 18 Dec 2024 23:41:55 +0100 Subject: [PATCH 2/4] move purge_room_state to state store --- synapse/storage/controllers/purge_events.py | 1 + .../storage/databases/main/purge_events.py | 37 ---------------- synapse/storage/databases/state/store.py | 42 +++++++++++++++++++ 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/synapse/storage/controllers/purge_events.py b/synapse/storage/controllers/purge_events.py index b65be2257ce..15c04ffef89 100644 --- a/synapse/storage/controllers/purge_events.py +++ b/synapse/storage/controllers/purge_events.py @@ -43,6 +43,7 @@ async def purge_room(self, room_id: str) -> None: with nested_logging_context(room_id): await self.stores.main.purge_room(room_id) + await self.stores.state.purge_room_state(room_id) async def purge_history( self, room_id: str, token: str, delete_local_events: bool diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index a526e12ab32..ebdeb8fbd70 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -473,8 +473,6 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> None: logger.info("[purge] removing from %s", table) txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,)) - self._purge_room_state_txn(txn, room_id) - # Other tables we do NOT need to clear out: # # - blocked_rooms @@ -499,38 +497,3 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> None: # periodically anyway (https://github.com/matrix-org/synapse/issues/5888) self._invalidate_caches_for_room_and_stream(txn, room_id) - - def _purge_room_state_txn( - self, - txn: LoggingTransaction, - room_id: str, - ) -> None: - # Delete all edges that reference a state group linked to room_id - logger.info("[purge] removing %s from state_group_edges", room_id) - txn.execute( - """ - DELETE FROM state_group_edges AS sge WHERE sge.state_group IN ( - SELECT id FROM state_groups AS sg WHERE sg.room_id = ? - )""", - (room_id,), - ) - - # state_groups_state table has a room_id column but no index on it, unlike state_groups, - # so we delete them by matching the room_id through the state_groups table. - logger.info("[purge] removing %s from state_groups_state", room_id) - txn.execute( - """ - DELETE FROM state_groups_state AS sgs WHERE sgs.state_group IN ( - SELECT id FROM state_groups AS sg WHERE sg.room_id = ? - )""", - (room_id,), - ) - - logger.info("[purge] removing %s from state_groups", room_id) - self.db_pool.simple_delete_many_txn( - txn, - table="state_groups", - column="room_id", - values=[room_id], - keyvalues={}, - ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 871253e2831..c1df5d354c8 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -839,3 +839,45 @@ async def get_previous_state_groups( ) return dict(rows) + + async def purge_room_state(self, room_id: str) -> None: + return await self.db_pool.runInteraction( + "purge_room_state", + self._purge_room_state_txn, + room_id, + ) + + def _purge_room_state_txn( + self, + txn: LoggingTransaction, + room_id: str, + ) -> None: + # Delete all edges that reference a state group linked to room_id + logger.info("[purge] removing %s from state_group_edges", room_id) + txn.execute( + """ + DELETE FROM state_group_edges AS sge WHERE sge.state_group IN ( + SELECT id FROM state_groups AS sg WHERE sg.room_id = ? + )""", + (room_id,), + ) + + # state_groups_state table has a room_id column but no index on it, unlike state_groups, + # so we delete them by matching the room_id through the state_groups table. + logger.info("[purge] removing %s from state_groups_state", room_id) + txn.execute( + """ + DELETE FROM state_groups_state AS sgs WHERE sgs.state_group IN ( + SELECT id FROM state_groups AS sg WHERE sg.room_id = ? + )""", + (room_id,), + ) + + logger.info("[purge] removing %s from state_groups", room_id) + self.db_pool.simple_delete_many_txn( + txn, + table="state_groups", + column="room_id", + values=[room_id], + keyvalues={}, + ) From f46c51832fd23c38ed2f7cc9e2401b9f98e6499b Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Sun, 29 Dec 2024 14:18:40 +0100 Subject: [PATCH 3/4] Update synapse/storage/databases/state/store.py Co-authored-by: Erik Johnston --- synapse/storage/databases/state/store.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index c1df5d354c8..4f525cfe4bd 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -877,7 +877,5 @@ def _purge_room_state_txn( self.db_pool.simple_delete_many_txn( txn, table="state_groups", - column="room_id", - values=[room_id], - keyvalues={}, + keyvalues={"room_id": room_id}, ) From aab137de14529eba66772a95fa3dcce0b1013949 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Sun, 29 Dec 2024 14:18:49 +0100 Subject: [PATCH 4/4] Update synapse/storage/databases/state/store.py Co-authored-by: Erik Johnston --- synapse/storage/databases/state/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 4f525cfe4bd..9944f90015c 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -874,7 +874,7 @@ def _purge_room_state_txn( ) logger.info("[purge] removing %s from state_groups", room_id) - self.db_pool.simple_delete_many_txn( + self.db_pool.simple_delete_txn( txn, table="state_groups", keyvalues={"room_id": room_id},