Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. (#12687)
This commit is contained in:
parent
f16ec055cc
commit
66a5f6c400
|
@ -0,0 +1 @@
|
||||||
|
Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance.
|
|
@ -89,6 +89,96 @@ process, for example:
|
||||||
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
|
dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Upgrading to v1.60.0
|
||||||
|
|
||||||
|
## Adding a new unique index to `state_group_edges` could fail if your database is corrupted
|
||||||
|
|
||||||
|
This release of Synapse will add a unique index to the `state_group_edges` table, in order
|
||||||
|
to prevent accidentally introducing duplicate information (for example, because a database
|
||||||
|
backup was restored multiple times).
|
||||||
|
|
||||||
|
Duplicate rows being present in this table could cause drastic performance problems; see
|
||||||
|
[issue 11779](https://github.com/matrix-org/synapse/issues/11779) for more details.
|
||||||
|
|
||||||
|
If your Synapse database already has had duplicate rows introduced into this table,
|
||||||
|
this could fail, with either of these errors:
|
||||||
|
|
||||||
|
|
||||||
|
**On Postgres:**
|
||||||
|
```
|
||||||
|
synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges
|
||||||
|
synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update
|
||||||
|
...
|
||||||
|
psycopg2.errors.UniqueViolation: could not create unique index "state_group_edges_unique_idx"
|
||||||
|
DETAIL: Key (state_group, prev_state_group)=(2, 1) is duplicated.
|
||||||
|
```
|
||||||
|
(The numbers may be different.)
|
||||||
|
|
||||||
|
**On SQLite:**
|
||||||
|
```
|
||||||
|
synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges
|
||||||
|
synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update
|
||||||
|
...
|
||||||
|
sqlite3.IntegrityError: UNIQUE constraint failed: state_group_edges.state_group, state_group_edges.prev_state_group
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><b>Expand this section for steps to resolve this problem</b></summary>
|
||||||
|
|
||||||
|
### On Postgres
|
||||||
|
|
||||||
|
Connect to your database with `psql`.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
BEGIN;
|
||||||
|
DELETE FROM state_group_edges WHERE (ctid, state_group, prev_state_group) IN (
|
||||||
|
SELECT row_id, state_group, prev_state_group
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
ctid AS row_id,
|
||||||
|
MIN(ctid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id,
|
||||||
|
state_group,
|
||||||
|
prev_state_group
|
||||||
|
FROM state_group_edges
|
||||||
|
) AS t1
|
||||||
|
WHERE row_id <> min_row_id
|
||||||
|
);
|
||||||
|
COMMIT;
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### On SQLite
|
||||||
|
|
||||||
|
At the command-line, use `sqlite3 path/to/your-homeserver-database.db`:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
BEGIN;
|
||||||
|
DELETE FROM state_group_edges WHERE (rowid, state_group, prev_state_group) IN (
|
||||||
|
SELECT row_id, state_group, prev_state_group
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
rowid AS row_id,
|
||||||
|
MIN(rowid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id,
|
||||||
|
state_group,
|
||||||
|
prev_state_group
|
||||||
|
FROM state_group_edges
|
||||||
|
)
|
||||||
|
WHERE row_id <> min_row_id
|
||||||
|
);
|
||||||
|
COMMIT;
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### For more details
|
||||||
|
|
||||||
|
[This comment on issue 11779](https://github.com/matrix-org/synapse/issues/11779#issuecomment-1131545970)
|
||||||
|
has queries that can be used to check a database for this problem in advance.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Upgrading to v1.59.0
|
# Upgrading to v1.59.0
|
||||||
|
|
||||||
## Device name lookup over federation has been disabled by default
|
## Device name lookup over federation has been disabled by default
|
||||||
|
|
|
@ -535,6 +535,7 @@ class BackgroundUpdater:
|
||||||
where_clause: Optional[str] = None,
|
where_clause: Optional[str] = None,
|
||||||
unique: bool = False,
|
unique: bool = False,
|
||||||
psql_only: bool = False,
|
psql_only: bool = False,
|
||||||
|
replaces_index: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Helper for store classes to do a background index addition
|
"""Helper for store classes to do a background index addition
|
||||||
|
|
||||||
|
@ -554,6 +555,8 @@ class BackgroundUpdater:
|
||||||
unique: true to make a UNIQUE index
|
unique: true to make a UNIQUE index
|
||||||
psql_only: true to only create this index on psql databases (useful
|
psql_only: true to only create this index on psql databases (useful
|
||||||
for virtual sqlite tables)
|
for virtual sqlite tables)
|
||||||
|
replaces_index: The name of an index that this index replaces.
|
||||||
|
The named index will be dropped upon completion of the new index.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def create_index_psql(conn: Connection) -> None:
|
def create_index_psql(conn: Connection) -> None:
|
||||||
|
@ -585,6 +588,12 @@ class BackgroundUpdater:
|
||||||
}
|
}
|
||||||
logger.debug("[SQL] %s", sql)
|
logger.debug("[SQL] %s", sql)
|
||||||
c.execute(sql)
|
c.execute(sql)
|
||||||
|
|
||||||
|
if replaces_index is not None:
|
||||||
|
# We drop the old index as the new index has now been created.
|
||||||
|
sql = f"DROP INDEX IF EXISTS {replaces_index}"
|
||||||
|
logger.debug("[SQL] %s", sql)
|
||||||
|
c.execute(sql)
|
||||||
finally:
|
finally:
|
||||||
conn.set_session(autocommit=False) # type: ignore
|
conn.set_session(autocommit=False) # type: ignore
|
||||||
|
|
||||||
|
@ -613,6 +622,12 @@ class BackgroundUpdater:
|
||||||
logger.debug("[SQL] %s", sql)
|
logger.debug("[SQL] %s", sql)
|
||||||
c.execute(sql)
|
c.execute(sql)
|
||||||
|
|
||||||
|
if replaces_index is not None:
|
||||||
|
# We drop the old index as the new index has now been created.
|
||||||
|
sql = f"DROP INDEX IF EXISTS {replaces_index}"
|
||||||
|
logger.debug("[SQL] %s", sql)
|
||||||
|
c.execute(sql)
|
||||||
|
|
||||||
if isinstance(self.db_pool.engine, engines.PostgresEngine):
|
if isinstance(self.db_pool.engine, engines.PostgresEngine):
|
||||||
runner: Optional[Callable[[Connection], None]] = create_index_psql
|
runner: Optional[Callable[[Connection], None]] = create_index_psql
|
||||||
elif psql_only:
|
elif psql_only:
|
||||||
|
|
|
@ -195,6 +195,7 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
|
||||||
STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
|
STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
|
||||||
STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
|
STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
|
||||||
STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx"
|
STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx"
|
||||||
|
STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME = "state_group_edges_unique_idx"
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -217,6 +218,21 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
|
||||||
columns=["room_id"],
|
columns=["room_id"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# `state_group_edges` can cause severe performance issues if duplicate
|
||||||
|
# rows are introduced, which can accidentally be done by well-meaning
|
||||||
|
# server admins when trying to restore a database dump, etc.
|
||||||
|
# See https://github.com/matrix-org/synapse/issues/11779.
|
||||||
|
# Introduce a unique index to guard against that.
|
||||||
|
self.db_pool.updates.register_background_index_update(
|
||||||
|
self.STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME,
|
||||||
|
index_name="state_group_edges_unique_idx",
|
||||||
|
table="state_group_edges",
|
||||||
|
columns=["state_group", "prev_state_group"],
|
||||||
|
unique=True,
|
||||||
|
# The old index was on (state_group) and was not unique.
|
||||||
|
replaces_index="state_group_edges_idx",
|
||||||
|
)
|
||||||
|
|
||||||
async def _background_deduplicate_state(
|
async def _background_deduplicate_state(
|
||||||
self, progress: dict, batch_size: int
|
self, progress: dict, batch_size: int
|
||||||
) -> int:
|
) -> int:
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
/* Copyright 2022 The Matrix.org Foundation C.I.C
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
|
||||||
|
(7008, 'state_group_edges_unique_idx', '{}');
|
Loading…
Reference in New Issue