From 6d14fdc2710688014a7a66cc48485462c6e86a1e Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 31 Jan 2023 11:03:55 +0000 Subject: [PATCH] Make sqlite database migrations transactional again, part two (#14926) #14910 fixed the regression introduced by #13873 where sqlite database migrations would no longer run inside a transaction. However, it committed the transaction before Synapse updated its bookkeeping of which migrations have been run, which means that migrations may be run again after they have completed successfully. Leave the transaction open at the end of `executescript`, to restore the old, correct behaviour. Also make the PostgreSQL behaviour consistent with SQLite. Fixes #14909. Signed-off-by: Sean Quah --- changelog.d/14926.bugfix | 1 + synapse/storage/engines/_base.py | 5 +- synapse/storage/engines/postgres.py | 6 +- synapse/storage/engines/sqlite.py | 6 +- tests/storage/test_database.py | 96 +++++++++++++++++++++++++++++ 5 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 changelog.d/14926.bugfix diff --git a/changelog.d/14926.bugfix b/changelog.d/14926.bugfix new file mode 100644 index 0000000000..f1f34cd6ba --- /dev/null +++ b/changelog.d/14926.bugfix @@ -0,0 +1 @@ +Fix a regression introduced in Synapse 1.69.0 which can result in database corruption when database migrations are interrupted on sqlite. diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py index bc9ca3a53c..0363cdc038 100644 --- a/synapse/storage/engines/_base.py +++ b/synapse/storage/engines/_base.py @@ -133,8 +133,9 @@ class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCM This is not provided by DBAPI2, and so needs engine-specific support. - Some database engines may automatically COMMIT the ongoing transaction both - before and after executing the script. + Any ongoing transaction is committed before executing the script in its own + transaction. The script transaction is left open and it is the responsibility of + the caller to commit it. """ ... diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index f9f562ea45..b350f57ccb 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -220,5 +220,9 @@ class PostgresEngine( """Execute a chunk of SQL containing multiple semicolon-delimited statements. Psycopg2 seems happy to do this in DBAPI2's `execute()` function. + + For consistency with SQLite, any ongoing transaction is committed before + executing the script in its own transaction. The script transaction is + left open and it is the responsibility of the caller to commit it. """ - cursor.execute(script) + cursor.execute(f"COMMIT; BEGIN TRANSACTION; {script}") diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index 2f7df85ce4..28751e89a5 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -135,14 +135,16 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]): > than one statement with it, it will raise a Warning. Use executescript() if > you want to execute multiple SQL statements with one call. - The script is wrapped in transaction control statemnets, since the docs for + The script is prefixed with a `BEGIN TRANSACTION`, since the docs for `executescript` warn: > If there is a pending transaction, an implicit COMMIT statement is executed > first. No other implicit transaction control is performed; any transaction > control must be added to sql_script. """ - cursor.executescript(f"BEGIN TRANSACTION;\n{script}\nCOMMIT;") + # The implementation of `executescript` can be found at + # https://github.com/python/cpython/blob/3.11/Modules/_sqlite/cursor.c#L1035. + cursor.executescript(f"BEGIN TRANSACTION; {script}") # Following functions taken from: https://github.com/coleifer/peewee diff --git a/tests/storage/test_database.py b/tests/storage/test_database.py index 543cce6b3e..8cd7c89ca2 100644 --- a/tests/storage/test_database.py +++ b/tests/storage/test_database.py @@ -22,6 +22,7 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.server import HomeServer from synapse.storage.database import ( DatabasePool, + LoggingDatabaseConnection, LoggingTransaction, make_tuple_comparison_clause, ) @@ -37,6 +38,101 @@ class TupleComparisonClauseTestCase(unittest.TestCase): self.assertEqual(args, [1, 2]) +class ExecuteScriptTestCase(unittest.HomeserverTestCase): + """Tests for `BaseDatabaseEngine.executescript` implementations.""" + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.db_pool: DatabasePool = self.store.db_pool + self.get_success( + self.db_pool.runInteraction( + "create", + lambda txn: txn.execute("CREATE TABLE foo (name TEXT PRIMARY KEY)"), + ) + ) + + def test_transaction(self) -> None: + """Test that all statements are run in a single transaction.""" + + def run(conn: LoggingDatabaseConnection) -> None: + cur = conn.cursor(txn_name="test_transaction") + self.db_pool.engine.executescript( + cur, + ";".join( + [ + "INSERT INTO foo (name) VALUES ('transaction test')", + # This next statement will fail. When `executescript` is not + # transactional, the previous row will be observed later. + "INSERT INTO foo (name) VALUES ('transaction test')", + ] + ), + ) + + self.get_failure( + self.db_pool.runWithConnection(run), + self.db_pool.engine.module.IntegrityError, + ) + + self.assertIsNone( + self.get_success( + self.db_pool.simple_select_one_onecol( + "foo", + keyvalues={"name": "transaction test"}, + retcol="name", + allow_none=True, + ) + ), + "executescript is not running statements inside a transaction", + ) + + def test_commit(self) -> None: + """Test that the script transaction remains open and can be committed.""" + + def run(conn: LoggingDatabaseConnection) -> None: + cur = conn.cursor(txn_name="test_commit") + self.db_pool.engine.executescript( + cur, "INSERT INTO foo (name) VALUES ('commit test')" + ) + cur.execute("COMMIT") + + self.get_success(self.db_pool.runWithConnection(run)) + + self.assertIsNotNone( + self.get_success( + self.db_pool.simple_select_one_onecol( + "foo", + keyvalues={"name": "commit test"}, + retcol="name", + allow_none=True, + ) + ), + ) + + def test_rollback(self) -> None: + """Test that the script transaction remains open and can be rolled back.""" + + def run(conn: LoggingDatabaseConnection) -> None: + cur = conn.cursor(txn_name="test_rollback") + self.db_pool.engine.executescript( + cur, "INSERT INTO foo (name) VALUES ('rollback test')" + ) + cur.execute("ROLLBACK") + + self.get_success(self.db_pool.runWithConnection(run)) + + self.assertIsNone( + self.get_success( + self.db_pool.simple_select_one_onecol( + "foo", + keyvalues={"name": "rollback test"}, + retcol="name", + allow_none=True, + ) + ), + "executescript is not leaving the script transaction open", + ) + + class CallbacksTestCase(unittest.HomeserverTestCase): """Tests for transaction callbacks."""