Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. (#12687)

This commit is contained in:
reivilibre 2022-05-19 14:16:49 +01:00 committed by GitHub
parent f16ec055cc
commit 66a5f6c400
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 139 additions and 0 deletions

1
changelog.d/12687.bugfix Normal file
View File

@ -0,0 +1 @@
Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance.

View File

@ -89,6 +89,96 @@ process, for example:
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
```
# Upgrading to v1.60.0
## Adding a new unique index to `state_group_edges` could fail if your database is corrupted
This release of Synapse will add a unique index to the `state_group_edges` table, in order
to prevent accidentally introducing duplicate information (for example, because a database
backup was restored multiple times).
Duplicate rows being present in this table could cause drastic performance problems; see
[issue 11779](https://github.com/matrix-org/synapse/issues/11779) for more details.
If your Synapse database already has had duplicate rows introduced into this table,
this could fail, with either of these errors:
**On Postgres:**
```
synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges
synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update
...
psycopg2.errors.UniqueViolation: could not create unique index "state_group_edges_unique_idx"
DETAIL: Key (state_group, prev_state_group)=(2, 1) is duplicated.
```
(The numbers may be different.)
**On SQLite:**
```
synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges
synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update
...
sqlite3.IntegrityError: UNIQUE constraint failed: state_group_edges.state_group, state_group_edges.prev_state_group
```
<details>
<summary><b>Expand this section for steps to resolve this problem</b></summary>
### On Postgres
Connect to your database with `psql`.
```sql
BEGIN;
DELETE FROM state_group_edges WHERE (ctid, state_group, prev_state_group) IN (
SELECT row_id, state_group, prev_state_group
FROM (
SELECT
ctid AS row_id,
MIN(ctid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id,
state_group,
prev_state_group
FROM state_group_edges
) AS t1
WHERE row_id <> min_row_id
);
COMMIT;
```
### On SQLite
At the command-line, use `sqlite3 path/to/your-homeserver-database.db`:
```sql
BEGIN;
DELETE FROM state_group_edges WHERE (rowid, state_group, prev_state_group) IN (
SELECT row_id, state_group, prev_state_group
FROM (
SELECT
rowid AS row_id,
MIN(rowid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id,
state_group,
prev_state_group
FROM state_group_edges
)
WHERE row_id <> min_row_id
);
COMMIT;
```
### For more details
[This comment on issue 11779](https://github.com/matrix-org/synapse/issues/11779#issuecomment-1131545970)
has queries that can be used to check a database for this problem in advance.
</details>
# Upgrading to v1.59.0
## Device name lookup over federation has been disabled by default

View File

@ -535,6 +535,7 @@ class BackgroundUpdater:
where_clause: Optional[str] = None,
unique: bool = False,
psql_only: bool = False,
replaces_index: Optional[str] = None,
) -> None:
"""Helper for store classes to do a background index addition
@ -554,6 +555,8 @@ class BackgroundUpdater:
unique: true to make a UNIQUE index
psql_only: true to only create this index on psql databases (useful
for virtual sqlite tables)
replaces_index: The name of an index that this index replaces.
The named index will be dropped upon completion of the new index.
"""
def create_index_psql(conn: Connection) -> None:
@ -585,6 +588,12 @@ class BackgroundUpdater:
}
logger.debug("[SQL] %s", sql)
c.execute(sql)
if replaces_index is not None:
# We drop the old index as the new index has now been created.
sql = f"DROP INDEX IF EXISTS {replaces_index}"
logger.debug("[SQL] %s", sql)
c.execute(sql)
finally:
conn.set_session(autocommit=False) # type: ignore
@ -613,6 +622,12 @@ class BackgroundUpdater:
logger.debug("[SQL] %s", sql)
c.execute(sql)
if replaces_index is not None:
# We drop the old index as the new index has now been created.
sql = f"DROP INDEX IF EXISTS {replaces_index}"
logger.debug("[SQL] %s", sql)
c.execute(sql)
if isinstance(self.db_pool.engine, engines.PostgresEngine):
runner: Optional[Callable[[Connection], None]] = create_index_psql
elif psql_only:

View File

@ -195,6 +195,7 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx"
STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME = "state_group_edges_unique_idx"
def __init__(
self,
@ -217,6 +218,21 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
columns=["room_id"],
)
# `state_group_edges` can cause severe performance issues if duplicate
# rows are introduced, which can accidentally be done by well-meaning
# server admins when trying to restore a database dump, etc.
# See https://github.com/matrix-org/synapse/issues/11779.
# Introduce a unique index to guard against that.
self.db_pool.updates.register_background_index_update(
self.STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME,
index_name="state_group_edges_unique_idx",
table="state_group_edges",
columns=["state_group", "prev_state_group"],
unique=True,
# The old index was on (state_group) and was not unique.
replaces_index="state_group_edges_idx",
)
async def _background_deduplicate_state(
self, progress: dict, batch_size: int
) -> int:

View File

@ -0,0 +1,17 @@
/* Copyright 2022 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(7008, 'state_group_edges_unique_idx', '{}');