Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. (#12687)
This commit is contained in:
parent
f16ec055cc
commit
66a5f6c400
|
@ -0,0 +1 @@
|
|||
Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance.
|
|
@ -89,6 +89,96 @@ process, for example:
|
|||
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
|
||||
```
|
||||
|
||||
# Upgrading to v1.60.0
|
||||
|
||||
## Adding a new unique index to `state_group_edges` could fail if your database is corrupted
|
||||
|
||||
This release of Synapse will add a unique index to the `state_group_edges` table, in order
|
||||
to prevent accidentally introducing duplicate information (for example, because a database
|
||||
backup was restored multiple times).
|
||||
|
||||
Duplicate rows being present in this table could cause drastic performance problems; see
|
||||
[issue 11779](https://github.com/matrix-org/synapse/issues/11779) for more details.
|
||||
|
||||
If your Synapse database already has had duplicate rows introduced into this table,
|
||||
this could fail, with either of these errors:
|
||||
|
||||
|
||||
**On Postgres:**
|
||||
```
|
||||
synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges
|
||||
synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update
|
||||
...
|
||||
psycopg2.errors.UniqueViolation: could not create unique index "state_group_edges_unique_idx"
|
||||
DETAIL: Key (state_group, prev_state_group)=(2, 1) is duplicated.
|
||||
```
|
||||
(The numbers may be different.)
|
||||
|
||||
**On SQLite:**
|
||||
```
|
||||
synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges
|
||||
synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update
|
||||
...
|
||||
sqlite3.IntegrityError: UNIQUE constraint failed: state_group_edges.state_group, state_group_edges.prev_state_group
|
||||
```
|
||||
|
||||
|
||||
<details>
|
||||
<summary><b>Expand this section for steps to resolve this problem</b></summary>
|
||||
|
||||
### On Postgres
|
||||
|
||||
Connect to your database with `psql`.
|
||||
|
||||
```sql
|
||||
BEGIN;
|
||||
DELETE FROM state_group_edges WHERE (ctid, state_group, prev_state_group) IN (
|
||||
SELECT row_id, state_group, prev_state_group
|
||||
FROM (
|
||||
SELECT
|
||||
ctid AS row_id,
|
||||
MIN(ctid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id,
|
||||
state_group,
|
||||
prev_state_group
|
||||
FROM state_group_edges
|
||||
) AS t1
|
||||
WHERE row_id <> min_row_id
|
||||
);
|
||||
COMMIT;
|
||||
```
|
||||
|
||||
|
||||
### On SQLite
|
||||
|
||||
At the command-line, use `sqlite3 path/to/your-homeserver-database.db`:
|
||||
|
||||
```sql
|
||||
BEGIN;
|
||||
DELETE FROM state_group_edges WHERE (rowid, state_group, prev_state_group) IN (
|
||||
SELECT row_id, state_group, prev_state_group
|
||||
FROM (
|
||||
SELECT
|
||||
rowid AS row_id,
|
||||
MIN(rowid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id,
|
||||
state_group,
|
||||
prev_state_group
|
||||
FROM state_group_edges
|
||||
)
|
||||
WHERE row_id <> min_row_id
|
||||
);
|
||||
COMMIT;
|
||||
```
|
||||
|
||||
|
||||
### For more details
|
||||
|
||||
[This comment on issue 11779](https://github.com/matrix-org/synapse/issues/11779#issuecomment-1131545970)
|
||||
has queries that can be used to check a database for this problem in advance.
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
# Upgrading to v1.59.0
|
||||
|
||||
## Device name lookup over federation has been disabled by default
|
||||
|
|
|
@ -535,6 +535,7 @@ class BackgroundUpdater:
|
|||
where_clause: Optional[str] = None,
|
||||
unique: bool = False,
|
||||
psql_only: bool = False,
|
||||
replaces_index: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Helper for store classes to do a background index addition
|
||||
|
||||
|
@ -554,6 +555,8 @@ class BackgroundUpdater:
|
|||
unique: true to make a UNIQUE index
|
||||
psql_only: true to only create this index on psql databases (useful
|
||||
for virtual sqlite tables)
|
||||
replaces_index: The name of an index that this index replaces.
|
||||
The named index will be dropped upon completion of the new index.
|
||||
"""
|
||||
|
||||
def create_index_psql(conn: Connection) -> None:
|
||||
|
@ -585,6 +588,12 @@ class BackgroundUpdater:
|
|||
}
|
||||
logger.debug("[SQL] %s", sql)
|
||||
c.execute(sql)
|
||||
|
||||
if replaces_index is not None:
|
||||
# We drop the old index as the new index has now been created.
|
||||
sql = f"DROP INDEX IF EXISTS {replaces_index}"
|
||||
logger.debug("[SQL] %s", sql)
|
||||
c.execute(sql)
|
||||
finally:
|
||||
conn.set_session(autocommit=False) # type: ignore
|
||||
|
||||
|
@ -613,6 +622,12 @@ class BackgroundUpdater:
|
|||
logger.debug("[SQL] %s", sql)
|
||||
c.execute(sql)
|
||||
|
||||
if replaces_index is not None:
|
||||
# We drop the old index as the new index has now been created.
|
||||
sql = f"DROP INDEX IF EXISTS {replaces_index}"
|
||||
logger.debug("[SQL] %s", sql)
|
||||
c.execute(sql)
|
||||
|
||||
if isinstance(self.db_pool.engine, engines.PostgresEngine):
|
||||
runner: Optional[Callable[[Connection], None]] = create_index_psql
|
||||
elif psql_only:
|
||||
|
|
|
@ -195,6 +195,7 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
|
|||
STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
|
||||
STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
|
||||
STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx"
|
||||
STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME = "state_group_edges_unique_idx"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -217,6 +218,21 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
|
|||
columns=["room_id"],
|
||||
)
|
||||
|
||||
# `state_group_edges` can cause severe performance issues if duplicate
|
||||
# rows are introduced, which can accidentally be done by well-meaning
|
||||
# server admins when trying to restore a database dump, etc.
|
||||
# See https://github.com/matrix-org/synapse/issues/11779.
|
||||
# Introduce a unique index to guard against that.
|
||||
self.db_pool.updates.register_background_index_update(
|
||||
self.STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME,
|
||||
index_name="state_group_edges_unique_idx",
|
||||
table="state_group_edges",
|
||||
columns=["state_group", "prev_state_group"],
|
||||
unique=True,
|
||||
# The old index was on (state_group) and was not unique.
|
||||
replaces_index="state_group_edges_idx",
|
||||
)
|
||||
|
||||
async def _background_deduplicate_state(
|
||||
self, progress: dict, batch_size: int
|
||||
) -> int:
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
/* Copyright 2022 The Matrix.org Foundation C.I.C
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
|
||||
(7008, 'state_group_edges_unique_idx', '{}');
|
Loading…
Reference in New Issue