From fff9b955fa39bda2cca1fa726b561c7886e746a1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 20 Sep 2022 14:14:12 +0100 Subject: [PATCH] Generate separate snapshots for logical databases (#13792) * Generate separate snapshots for sqlite, postgres and common * Cleanup postgres dbs in the TRAP * Say which logical DB we're applying updates to * Run background updates on the state DB * Add new option for accepting a SCHEMA_NUMBER --- changelog.d/13792.misc | 1 + scripts-dev/make_full_schema.sh | 166 +++++++++++++++----- synapse/_scripts/update_synapse_database.py | 14 +- synapse/storage/background_updates.py | 5 +- 4 files changed, 140 insertions(+), 46 deletions(-) create mode 100644 changelog.d/13792.misc diff --git a/changelog.d/13792.misc b/changelog.d/13792.misc new file mode 100644 index 0000000000..36ac91400a --- /dev/null +++ b/changelog.d/13792.misc @@ -0,0 +1 @@ +Update the script which makes full schema dumps. diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh index 61394360ce..d8cd06ee4f 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh @@ -2,23 +2,16 @@ # # This script generates SQL files for creating a brand new Synapse DB with the latest # schema, on both SQLite3 and Postgres. -# -# It does so by having Synapse generate an up-to-date SQLite DB, then running -# synapse_port_db to convert it to Postgres. It then dumps the contents of both. export PGHOST="localhost" -POSTGRES_DB_NAME="synapse_full_schema.$$" - -SQLITE_SCHEMA_FILE="schema.sql.sqlite" -SQLITE_ROWS_FILE="rows.sql.sqlite" -POSTGRES_SCHEMA_FILE="full.sql.postgres" -POSTGRES_ROWS_FILE="rows.sql.postgres" - +POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$" +POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$" +POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$" REQUIRED_DEPS=("matrix-synapse" "psycopg2") usage() { echo - echo "Usage: $0 -p -o [-c] [-n] [-h]" + echo "Usage: $0 -p -o [-c] [-n ] [-h]" echo echo "-p " echo " Username to connect to local postgres instance. The password will be requested" @@ -27,11 +20,16 @@ usage() { echo " CI mode. Prints every command that the script runs." echo "-o " echo " Directory to output full schema files to." + echo "-n " + echo " Schema number for the new snapshot. Used to set the location of files within " + echo " the output directory, mimicking that of synapse/storage/schemas." + echo " Defaults to 9999." echo "-h" echo " Display this help text." } -while getopts "p:co:h" opt; do +SCHEMA_NUMBER="9999" +while getopts "p:co:hn:" opt; do case $opt in p) export PGUSER=$OPTARG @@ -48,6 +46,9 @@ while getopts "p:co:h" opt; do usage exit ;; + n) + SCHEMA_NUMBER="$OPTARG" + ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 usage @@ -95,12 +96,21 @@ cd "$(dirname "$0")/.." TMPDIR=$(mktemp -d) KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path SQLITE_CONFIG=$TMPDIR/sqlite.conf -SQLITE_DB=$TMPDIR/homeserver.db +SQLITE_MAIN_DB=$TMPDIR/main.db +SQLITE_STATE_DB=$TMPDIR/state.db +SQLITE_COMMON_DB=$TMPDIR/common.db POSTGRES_CONFIG=$TMPDIR/postgres.conf # Ensure these files are delete on script exit -# TODO: the trap should also drop the temp postgres DB -trap 'rm -rf $TMPDIR' EXIT +cleanup() { + echo "Cleaning up temporary sqlite database and config files..." + rm -r "$TMPDIR" + echo "Cleaning up temporary Postgres database..." + dropdb --if-exists "$POSTGRES_COMMON_DB_NAME" + dropdb --if-exists "$POSTGRES_MAIN_DB_NAME" + dropdb --if-exists "$POSTGRES_STATE_DB_NAME" +} +trap 'cleanup' EXIT cat > "$SQLITE_CONFIG" < "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE" -sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE" +sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES" +sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES" +sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES" +psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES" -echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..." -pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE" -pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE" +# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation +# details behind full text search tables. Omit these from the dumps. -echo "Cleaning up temporary Postgres database..." -dropdb $POSTGRES_DB_NAME +sqlite3 "$SQLITE_MAIN_DB" <<< " +DROP TABLE event_search_content; +DROP TABLE event_search_segments; +DROP TABLE event_search_segdir; +DROP TABLE event_search_docsize; +DROP TABLE event_search_stat; +DROP TABLE user_directory_search_content; +DROP TABLE user_directory_search_segments; +DROP TABLE user_directory_search_segdir; +DROP TABLE user_directory_search_docsize; +DROP TABLE user_directory_search_stat; +" + +echo "Dumping SQLite3 schema..." + +mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER" +sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite" + +cleanup_pg_schema() { + sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' +} + +echo "Dumping Postgres schema..." + +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres" echo "Done! Files dumped to: $OUTPUT_DIR" diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py index b4aeae6dd5..fb1fb83f50 100755 --- a/synapse/_scripts/update_synapse_database.py +++ b/synapse/_scripts/update_synapse_database.py @@ -48,10 +48,13 @@ class MockHomeserver(HomeServer): def run_background_updates(hs: HomeServer) -> None: - store = hs.get_datastores().main + main = hs.get_datastores().main + state = hs.get_datastores().state async def run_background_updates() -> None: - await store.db_pool.updates.run_background_updates(sleep=False) + await main.db_pool.updates.run_background_updates(sleep=False) + if state: + await state.db_pool.updates.run_background_updates(sleep=False) # Stop the reactor to exit the script once every background update is run. reactor.stop() @@ -97,8 +100,11 @@ def main() -> None: # Load, process and sanity-check the config. hs_config = yaml.safe_load(args.database_config) - if "database" not in hs_config: - sys.stderr.write("The configuration file must have a 'database' section.\n") + if "database" not in hs_config and "databases" not in hs_config: + sys.stderr.write( + "The configuration file must have a 'database' or 'databases' section. " + "See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database" + ) sys.exit(4) config = HomeServerConfig() diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index bf5e7ee7be..2056ecb2c3 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -285,7 +285,10 @@ class BackgroundUpdater: back_to_back_failures = 0 try: - logger.info("Starting background schema updates") + logger.info( + "Starting background schema updates for database %s", + self._database_name, + ) while self.enabled: try: result = await self.do_next_background_update(sleep)