Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit f31e65a

Browse files
authored
bg update to clear out duplicate outbound_device_list_pokes (#7193)
We seem to have some duplicates, which could do with being cleared out.
1 parent aedeedc commit f31e65a

6 files changed

Lines changed: 234 additions & 13 deletions

File tree

changelog.d/7193.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a background database update job to clear out duplicate `device_lists_outbound_pokes`.

synapse/storage/data_stores/main/client_ips.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
from synapse.metrics.background_process_metrics import wrap_as_background_process
2323
from synapse.storage._base import SQLBaseStore
24-
from synapse.storage.database import Database
24+
from synapse.storage.database import Database, make_tuple_comparison_clause
2525
from synapse.util.caches import CACHE_SIZE_FACTOR
2626
from synapse.util.caches.descriptors import Cache
2727

@@ -303,16 +303,10 @@ def _devices_last_seen_update_txn(txn):
303303
# we'll just end up updating the same device row multiple
304304
# times, which is fine.
305305

306-
if self.database_engine.supports_tuple_comparison:
307-
where_clause = "(user_id, device_id) > (?, ?)"
308-
where_args = [last_user_id, last_device_id]
309-
else:
310-
# We explicitly do a `user_id >= ? AND (...)` here to ensure
311-
# that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)`
312-
# makes it hard for query optimiser to tell that it can use the
313-
# index on user_id
314-
where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)"
315-
where_args = [last_user_id, last_user_id, last_device_id]
306+
where_clause, where_args = make_tuple_comparison_clause(
307+
self.database_engine,
308+
[("user_id", last_user_id), ("device_id", last_device_id)],
309+
)
316310

317311
sql = """
318312
SELECT

synapse/storage/data_stores/main/devices.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@
3232
)
3333
from synapse.metrics.background_process_metrics import run_as_background_process
3434
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
35-
from synapse.storage.database import Database, LoggingTransaction
35+
from synapse.storage.database import (
36+
Database,
37+
LoggingTransaction,
38+
make_tuple_comparison_clause,
39+
)
3640
from synapse.types import Collection, get_verify_key_from_cross_signing_key
3741
from synapse.util.caches.descriptors import (
3842
Cache,
@@ -49,6 +53,8 @@
4953
"drop_device_list_streams_non_unique_indexes"
5054
)
5155

56+
BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES = "remove_dup_outbound_pokes"
57+
5258

5359
class DeviceWorkerStore(SQLBaseStore):
5460
def get_device(self, user_id, device_id):
@@ -714,6 +720,11 @@ def __init__(self, database: Database, db_conn, hs):
714720
self._drop_device_list_streams_non_unique_indexes,
715721
)
716722

723+
# clear out duplicate device list outbound pokes
724+
self.db.updates.register_background_update_handler(
725+
BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, self._remove_duplicate_outbound_pokes,
726+
)
727+
717728
@defer.inlineCallbacks
718729
def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size):
719730
def f(conn):
@@ -728,6 +739,66 @@ def f(conn):
728739
)
729740
return 1
730741

742+
async def _remove_duplicate_outbound_pokes(self, progress, batch_size):
743+
# for some reason, we have accumulated duplicate entries in
744+
# device_lists_outbound_pokes, which makes prune_outbound_device_list_pokes less
745+
# efficient.
746+
#
747+
# For each duplicate, we delete all the existing rows and put one back.
748+
749+
KEY_COLS = ["stream_id", "destination", "user_id", "device_id"]
750+
last_row = progress.get(
751+
"last_row",
752+
{"stream_id": 0, "destination": "", "user_id": "", "device_id": ""},
753+
)
754+
755+
def _txn(txn):
756+
clause, args = make_tuple_comparison_clause(
757+
self.db.engine, [(x, last_row[x]) for x in KEY_COLS]
758+
)
759+
sql = """
760+
SELECT stream_id, destination, user_id, device_id, MAX(ts) AS ts
761+
FROM device_lists_outbound_pokes
762+
WHERE %s
763+
GROUP BY %s
764+
HAVING count(*) > 1
765+
ORDER BY %s
766+
LIMIT ?
767+
""" % (
768+
clause, # WHERE
769+
",".join(KEY_COLS), # GROUP BY
770+
",".join(KEY_COLS), # ORDER BY
771+
)
772+
txn.execute(sql, args + [batch_size])
773+
rows = self.db.cursor_to_dict(txn)
774+
775+
row = None
776+
for row in rows:
777+
self.db.simple_delete_txn(
778+
txn, "device_lists_outbound_pokes", {x: row[x] for x in KEY_COLS},
779+
)
780+
781+
row["sent"] = False
782+
self.db.simple_insert_txn(
783+
txn, "device_lists_outbound_pokes", row,
784+
)
785+
786+
if row:
787+
self.db.updates._background_update_progress_txn(
788+
txn, BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, {"last_row": row},
789+
)
790+
791+
return len(rows)
792+
793+
rows = await self.db.runInteraction(BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, _txn)
794+
795+
if not rows:
796+
await self.db.updates._end_background_update(
797+
BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES
798+
)
799+
800+
return rows
801+
731802

732803
class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
733804
def __init__(self, database: Database, db_conn, hs):
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/* Copyright 2020 The Matrix.org Foundation C.I.C
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
/* for some reason, we have accumulated duplicate entries in
17+
* device_lists_outbound_pokes, which makes prune_outbound_device_list_pokes less
18+
* efficient.
19+
*/
20+
21+
INSERT INTO background_updates (ordering, update_name, progress_json)
22+
VALUES (5800, 'remove_dup_outbound_pokes', '{}');

synapse/storage/database.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,17 @@
1717
import logging
1818
import time
1919
from time import monotonic as monotonic_time
20-
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple
20+
from typing import (
21+
Any,
22+
Callable,
23+
Dict,
24+
Iterable,
25+
Iterator,
26+
List,
27+
Optional,
28+
Tuple,
29+
TypeVar,
30+
)
2131

2232
from six import iteritems, iterkeys, itervalues
2333
from six.moves import intern, range
@@ -1557,3 +1567,74 @@ def make_in_list_sql_clause(
15571567
return "%s = ANY(?)" % (column,), [list(iterable)]
15581568
else:
15591569
return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), list(iterable)
1570+
1571+
1572+
KV = TypeVar("KV")
1573+
1574+
1575+
def make_tuple_comparison_clause(
1576+
database_engine: BaseDatabaseEngine, keys: List[Tuple[str, KV]]
1577+
) -> Tuple[str, List[KV]]:
1578+
"""Returns a tuple comparison SQL clause
1579+
1580+
Depending what the SQL engine supports, builds a SQL clause that looks like either
1581+
"(a, b) > (?, ?)", or "(a > ?) OR (a == ? AND b > ?)".
1582+
1583+
Args:
1584+
database_engine
1585+
keys: A set of (column, value) pairs to be compared.
1586+
1587+
Returns:
1588+
A tuple of SQL query and the args
1589+
"""
1590+
if database_engine.supports_tuple_comparison:
1591+
return (
1592+
"(%s) > (%s)" % (",".join(k[0] for k in keys), ",".join("?" for _ in keys)),
1593+
[k[1] for k in keys],
1594+
)
1595+
1596+
# we want to build a clause
1597+
# (a > ?) OR
1598+
# (a == ? AND b > ?) OR
1599+
# (a == ? AND b == ? AND c > ?)
1600+
# ...
1601+
# (a == ? AND b == ? AND ... AND z > ?)
1602+
#
1603+
# or, equivalently:
1604+
#
1605+
# (a > ? OR (a == ? AND
1606+
# (b > ? OR (b == ? AND
1607+
# ...
1608+
# (y > ? OR (y == ? AND
1609+
# z > ?
1610+
# ))
1611+
# ...
1612+
# ))
1613+
# ))
1614+
#
1615+
# which itself is equivalent to (and apparently easier for the query optimiser):
1616+
#
1617+
# (a >= ? AND (a > ? OR
1618+
# (b >= ? AND (b > ? OR
1619+
# ...
1620+
# (y >= ? AND (y > ? OR
1621+
# z > ?
1622+
# ))
1623+
# ...
1624+
# ))
1625+
# ))
1626+
#
1627+
#
1628+
1629+
clause = ""
1630+
args = [] # type: List[KV]
1631+
for k, v in keys[:-1]:
1632+
clause = clause + "(%s >= ? AND (%s > ? OR " % (k, k)
1633+
args.extend([v, v])
1634+
1635+
(k, v) = keys[-1]
1636+
clause += "%s > ?" % (k,)
1637+
args.append(v)
1638+
1639+
clause += "))" * (len(keys) - 1)
1640+
return clause, args

tests/storage/test_database.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2020 The Matrix.org Foundation C.I.C.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
from synapse.storage.database import make_tuple_comparison_clause
17+
from synapse.storage.engines import BaseDatabaseEngine
18+
19+
from tests import unittest
20+
21+
22+
def _stub_db_engine(**kwargs) -> BaseDatabaseEngine:
23+
# returns a DatabaseEngine, circumventing the abc mechanism
24+
# any kwargs are set as attributes on the class before instantiating it
25+
t = type(
26+
"TestBaseDatabaseEngine",
27+
(BaseDatabaseEngine,),
28+
dict(BaseDatabaseEngine.__dict__),
29+
)
30+
# defeat the abc mechanism
31+
t.__abstractmethods__ = set()
32+
for k, v in kwargs.items():
33+
setattr(t, k, v)
34+
return t(None, None)
35+
36+
37+
class TupleComparisonClauseTestCase(unittest.TestCase):
38+
def test_native_tuple_comparison(self):
39+
db_engine = _stub_db_engine(supports_tuple_comparison=True)
40+
clause, args = make_tuple_comparison_clause(db_engine, [("a", 1), ("b", 2)])
41+
self.assertEqual(clause, "(a,b) > (?,?)")
42+
self.assertEqual(args, [1, 2])
43+
44+
def test_emulated_tuple_comparison(self):
45+
db_engine = _stub_db_engine(supports_tuple_comparison=False)
46+
clause, args = make_tuple_comparison_clause(
47+
db_engine, [("a", 1), ("b", 2), ("c", 3)]
48+
)
49+
self.assertEqual(
50+
clause, "(a >= ? AND (a > ? OR (b >= ? AND (b > ? OR c > ?))))"
51+
)
52+
self.assertEqual(args, [1, 1, 2, 2, 3])

0 commit comments

Comments
 (0)