forked from MirrorHub/synapse
Generate separate snapshots for logical databases (#13792)
* Generate separate snapshots for sqlite, postgres and common * Cleanup postgres dbs in the TRAP * Say which logical DB we're applying updates to * Run background updates on the state DB * Add new option for accepting a SCHEMA_NUMBER
This commit is contained in:
parent
42d261c32f
commit
fff9b955fa
4 changed files with 140 additions and 46 deletions
1
changelog.d/13792.misc
Normal file
1
changelog.d/13792.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Update the script which makes full schema dumps.
|
|
@ -2,23 +2,16 @@
|
||||||
#
|
#
|
||||||
# This script generates SQL files for creating a brand new Synapse DB with the latest
|
# This script generates SQL files for creating a brand new Synapse DB with the latest
|
||||||
# schema, on both SQLite3 and Postgres.
|
# schema, on both SQLite3 and Postgres.
|
||||||
#
|
|
||||||
# It does so by having Synapse generate an up-to-date SQLite DB, then running
|
|
||||||
# synapse_port_db to convert it to Postgres. It then dumps the contents of both.
|
|
||||||
|
|
||||||
export PGHOST="localhost"
|
export PGHOST="localhost"
|
||||||
POSTGRES_DB_NAME="synapse_full_schema.$$"
|
POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$"
|
||||||
|
POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$"
|
||||||
SQLITE_SCHEMA_FILE="schema.sql.sqlite"
|
POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$"
|
||||||
SQLITE_ROWS_FILE="rows.sql.sqlite"
|
|
||||||
POSTGRES_SCHEMA_FILE="full.sql.postgres"
|
|
||||||
POSTGRES_ROWS_FILE="rows.sql.postgres"
|
|
||||||
|
|
||||||
REQUIRED_DEPS=("matrix-synapse" "psycopg2")
|
REQUIRED_DEPS=("matrix-synapse" "psycopg2")
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo
|
echo
|
||||||
echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n] [-h]"
|
echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]"
|
||||||
echo
|
echo
|
||||||
echo "-p <postgres_username>"
|
echo "-p <postgres_username>"
|
||||||
echo " Username to connect to local postgres instance. The password will be requested"
|
echo " Username to connect to local postgres instance. The password will be requested"
|
||||||
|
@ -27,11 +20,16 @@ usage() {
|
||||||
echo " CI mode. Prints every command that the script runs."
|
echo " CI mode. Prints every command that the script runs."
|
||||||
echo "-o <path>"
|
echo "-o <path>"
|
||||||
echo " Directory to output full schema files to."
|
echo " Directory to output full schema files to."
|
||||||
|
echo "-n <schema number>"
|
||||||
|
echo " Schema number for the new snapshot. Used to set the location of files within "
|
||||||
|
echo " the output directory, mimicking that of synapse/storage/schemas."
|
||||||
|
echo " Defaults to 9999."
|
||||||
echo "-h"
|
echo "-h"
|
||||||
echo " Display this help text."
|
echo " Display this help text."
|
||||||
}
|
}
|
||||||
|
|
||||||
while getopts "p:co:h" opt; do
|
SCHEMA_NUMBER="9999"
|
||||||
|
while getopts "p:co:hn:" opt; do
|
||||||
case $opt in
|
case $opt in
|
||||||
p)
|
p)
|
||||||
export PGUSER=$OPTARG
|
export PGUSER=$OPTARG
|
||||||
|
@ -48,6 +46,9 @@ while getopts "p:co:h" opt; do
|
||||||
usage
|
usage
|
||||||
exit
|
exit
|
||||||
;;
|
;;
|
||||||
|
n)
|
||||||
|
SCHEMA_NUMBER="$OPTARG"
|
||||||
|
;;
|
||||||
\?)
|
\?)
|
||||||
echo "ERROR: Invalid option: -$OPTARG" >&2
|
echo "ERROR: Invalid option: -$OPTARG" >&2
|
||||||
usage
|
usage
|
||||||
|
@ -95,12 +96,21 @@ cd "$(dirname "$0")/.."
|
||||||
TMPDIR=$(mktemp -d)
|
TMPDIR=$(mktemp -d)
|
||||||
KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
|
KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
|
||||||
SQLITE_CONFIG=$TMPDIR/sqlite.conf
|
SQLITE_CONFIG=$TMPDIR/sqlite.conf
|
||||||
SQLITE_DB=$TMPDIR/homeserver.db
|
SQLITE_MAIN_DB=$TMPDIR/main.db
|
||||||
|
SQLITE_STATE_DB=$TMPDIR/state.db
|
||||||
|
SQLITE_COMMON_DB=$TMPDIR/common.db
|
||||||
POSTGRES_CONFIG=$TMPDIR/postgres.conf
|
POSTGRES_CONFIG=$TMPDIR/postgres.conf
|
||||||
|
|
||||||
# Ensure these files are delete on script exit
|
# Ensure these files are delete on script exit
|
||||||
# TODO: the trap should also drop the temp postgres DB
|
cleanup() {
|
||||||
trap 'rm -rf $TMPDIR' EXIT
|
echo "Cleaning up temporary sqlite database and config files..."
|
||||||
|
rm -r "$TMPDIR"
|
||||||
|
echo "Cleaning up temporary Postgres database..."
|
||||||
|
dropdb --if-exists "$POSTGRES_COMMON_DB_NAME"
|
||||||
|
dropdb --if-exists "$POSTGRES_MAIN_DB_NAME"
|
||||||
|
dropdb --if-exists "$POSTGRES_STATE_DB_NAME"
|
||||||
|
}
|
||||||
|
trap 'cleanup' EXIT
|
||||||
|
|
||||||
cat > "$SQLITE_CONFIG" <<EOF
|
cat > "$SQLITE_CONFIG" <<EOF
|
||||||
server_name: "test"
|
server_name: "test"
|
||||||
|
@ -110,10 +120,22 @@ macaroon_secret_key: "abcde"
|
||||||
|
|
||||||
report_stats: false
|
report_stats: false
|
||||||
|
|
||||||
database:
|
databases:
|
||||||
name: "sqlite3"
|
common:
|
||||||
args:
|
name: "sqlite3"
|
||||||
database: "$SQLITE_DB"
|
data_stores: []
|
||||||
|
args:
|
||||||
|
database: "$SQLITE_COMMON_DB"
|
||||||
|
main:
|
||||||
|
name: "sqlite3"
|
||||||
|
data_stores: ["main"]
|
||||||
|
args:
|
||||||
|
database: "$SQLITE_MAIN_DB"
|
||||||
|
state:
|
||||||
|
name: "sqlite3"
|
||||||
|
data_stores: ["state"]
|
||||||
|
args:
|
||||||
|
database: "$SQLITE_STATE_DB"
|
||||||
|
|
||||||
# Suppress the key server warning.
|
# Suppress the key server warning.
|
||||||
trusted_key_servers: []
|
trusted_key_servers: []
|
||||||
|
@ -127,13 +149,32 @@ macaroon_secret_key: "abcde"
|
||||||
|
|
||||||
report_stats: false
|
report_stats: false
|
||||||
|
|
||||||
database:
|
databases:
|
||||||
name: "psycopg2"
|
common:
|
||||||
args:
|
name: "psycopg2"
|
||||||
user: "$PGUSER"
|
data_stores: []
|
||||||
host: "$PGHOST"
|
args:
|
||||||
password: "$PGPASSWORD"
|
user: "$PGUSER"
|
||||||
database: "$POSTGRES_DB_NAME"
|
host: "$PGHOST"
|
||||||
|
password: "$PGPASSWORD"
|
||||||
|
database: "$POSTGRES_COMMON_DB_NAME"
|
||||||
|
main:
|
||||||
|
name: "psycopg2"
|
||||||
|
data_stores: ["main"]
|
||||||
|
args:
|
||||||
|
user: "$PGUSER"
|
||||||
|
host: "$PGHOST"
|
||||||
|
password: "$PGPASSWORD"
|
||||||
|
database: "$POSTGRES_MAIN_DB_NAME"
|
||||||
|
state:
|
||||||
|
name: "psycopg2"
|
||||||
|
data_stores: ["state"]
|
||||||
|
args:
|
||||||
|
user: "$PGUSER"
|
||||||
|
host: "$PGHOST"
|
||||||
|
password: "$PGPASSWORD"
|
||||||
|
database: "$POSTGRES_STATE_DB_NAME"
|
||||||
|
|
||||||
|
|
||||||
# Suppress the key server warning.
|
# Suppress the key server warning.
|
||||||
trusted_key_servers: []
|
trusted_key_servers: []
|
||||||
|
@ -148,33 +189,76 @@ echo "Running db background jobs..."
|
||||||
synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
|
synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
|
||||||
|
|
||||||
# Create the PostgreSQL database.
|
# Create the PostgreSQL database.
|
||||||
echo "Creating postgres database..."
|
echo "Creating postgres databases..."
|
||||||
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
|
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME"
|
||||||
|
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME"
|
||||||
|
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME"
|
||||||
|
|
||||||
echo "Running db background jobs..."
|
echo "Running db background jobs..."
|
||||||
synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
|
synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
|
||||||
|
|
||||||
|
|
||||||
# Delete schema_version, applied_schema_deltas and applied_module_schemas tables
|
|
||||||
# Also delete any shadow tables from fts4
|
|
||||||
echo "Dropping unwanted db tables..."
|
echo "Dropping unwanted db tables..."
|
||||||
SQL="
|
|
||||||
|
# Some common tables are created and updated by Synapse itself and do not belong in the
|
||||||
|
# schema.
|
||||||
|
DROP_APP_MANAGED_TABLES="
|
||||||
DROP TABLE schema_version;
|
DROP TABLE schema_version;
|
||||||
|
DROP TABLE schema_compat_version;
|
||||||
DROP TABLE applied_schema_deltas;
|
DROP TABLE applied_schema_deltas;
|
||||||
DROP TABLE applied_module_schemas;
|
DROP TABLE applied_module_schemas;
|
||||||
"
|
"
|
||||||
sqlite3 "$SQLITE_DB" <<< "$SQL"
|
# Other common tables are not created by Synapse and do belong in the schema.
|
||||||
psql "$POSTGRES_DB_NAME" -w <<< "$SQL"
|
# TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But
|
||||||
|
# since there's only one table there, I haven't bothered to do so.
|
||||||
|
DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES
|
||||||
|
DROP TABLE background_updates;
|
||||||
|
"
|
||||||
|
|
||||||
echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..."
|
sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES"
|
||||||
sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE"
|
sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES"
|
||||||
sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE"
|
sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES"
|
||||||
|
psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES"
|
||||||
|
psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
|
||||||
|
psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
|
||||||
|
|
||||||
echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..."
|
# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation
|
||||||
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE"
|
# details behind full text search tables. Omit these from the dumps.
|
||||||
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE"
|
|
||||||
|
|
||||||
echo "Cleaning up temporary Postgres database..."
|
sqlite3 "$SQLITE_MAIN_DB" <<< "
|
||||||
dropdb $POSTGRES_DB_NAME
|
DROP TABLE event_search_content;
|
||||||
|
DROP TABLE event_search_segments;
|
||||||
|
DROP TABLE event_search_segdir;
|
||||||
|
DROP TABLE event_search_docsize;
|
||||||
|
DROP TABLE event_search_stat;
|
||||||
|
DROP TABLE user_directory_search_content;
|
||||||
|
DROP TABLE user_directory_search_segments;
|
||||||
|
DROP TABLE user_directory_search_segdir;
|
||||||
|
DROP TABLE user_directory_search_docsize;
|
||||||
|
DROP TABLE user_directory_search_stat;
|
||||||
|
"
|
||||||
|
|
||||||
|
echo "Dumping SQLite3 schema..."
|
||||||
|
|
||||||
|
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
|
||||||
|
sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
|
||||||
|
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
|
||||||
|
sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
|
||||||
|
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
|
||||||
|
sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
|
||||||
|
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
|
||||||
|
|
||||||
|
cleanup_pg_schema() {
|
||||||
|
sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Dumping Postgres schema..."
|
||||||
|
|
||||||
|
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
|
||||||
|
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
|
||||||
|
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
|
||||||
|
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
|
||||||
|
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
|
||||||
|
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
|
||||||
|
|
||||||
echo "Done! Files dumped to: $OUTPUT_DIR"
|
echo "Done! Files dumped to: $OUTPUT_DIR"
|
||||||
|
|
|
@ -48,10 +48,13 @@ class MockHomeserver(HomeServer):
|
||||||
|
|
||||||
|
|
||||||
def run_background_updates(hs: HomeServer) -> None:
|
def run_background_updates(hs: HomeServer) -> None:
|
||||||
store = hs.get_datastores().main
|
main = hs.get_datastores().main
|
||||||
|
state = hs.get_datastores().state
|
||||||
|
|
||||||
async def run_background_updates() -> None:
|
async def run_background_updates() -> None:
|
||||||
await store.db_pool.updates.run_background_updates(sleep=False)
|
await main.db_pool.updates.run_background_updates(sleep=False)
|
||||||
|
if state:
|
||||||
|
await state.db_pool.updates.run_background_updates(sleep=False)
|
||||||
# Stop the reactor to exit the script once every background update is run.
|
# Stop the reactor to exit the script once every background update is run.
|
||||||
reactor.stop()
|
reactor.stop()
|
||||||
|
|
||||||
|
@ -97,8 +100,11 @@ def main() -> None:
|
||||||
# Load, process and sanity-check the config.
|
# Load, process and sanity-check the config.
|
||||||
hs_config = yaml.safe_load(args.database_config)
|
hs_config = yaml.safe_load(args.database_config)
|
||||||
|
|
||||||
if "database" not in hs_config:
|
if "database" not in hs_config and "databases" not in hs_config:
|
||||||
sys.stderr.write("The configuration file must have a 'database' section.\n")
|
sys.stderr.write(
|
||||||
|
"The configuration file must have a 'database' or 'databases' section. "
|
||||||
|
"See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database"
|
||||||
|
)
|
||||||
sys.exit(4)
|
sys.exit(4)
|
||||||
|
|
||||||
config = HomeServerConfig()
|
config = HomeServerConfig()
|
||||||
|
|
|
@ -285,7 +285,10 @@ class BackgroundUpdater:
|
||||||
back_to_back_failures = 0
|
back_to_back_failures = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info("Starting background schema updates")
|
logger.info(
|
||||||
|
"Starting background schema updates for database %s",
|
||||||
|
self._database_name,
|
||||||
|
)
|
||||||
while self.enabled:
|
while self.enabled:
|
||||||
try:
|
try:
|
||||||
result = await self.do_next_background_update(sleep)
|
result = await self.do_next_background_update(sleep)
|
||||||
|
|
Loading…
Reference in a new issue