Clean up prepare_database.py a bit and add comments

This commit is contained in:
Erik Johnston 2019-10-22 18:43:31 +01:00
parent 6cc497f99b
commit 23d62eded2

View file

@ -97,7 +97,8 @@ def prepare_database(db_conn, database_engine, config):
def _setup_new_database(cur, database_engine, data_stores): def _setup_new_database(cur, database_engine, data_stores):
"""Sets up the database by finding a base set of "full schemas" and then """Sets up the database by finding a base set of "full schemas" and then
applying any necessary deltas. applying any necessary deltas, including schemas from the given data
stores.
The "full_schemas" directory has subdirectories named after versions. This The "full_schemas" directory has subdirectories named after versions. This
function searches for the highest version less than or equal to function searches for the highest version less than or equal to
@ -122,6 +123,15 @@ def _setup_new_database(cur, database_engine, data_stores):
In the example foo.sql and bar.sql would be run, and then any delta files In the example foo.sql and bar.sql would be run, and then any delta files
for versions strictly greater than 11. for versions strictly greater than 11.
Note: we apply the full schemas and deltas from the top level `schema/`
folder as well those in the data stores specified.
Args:
cur (Cursor): a database cursor
database_engine (DatabaseEngine)
data_stores (list[str]): The names of the data stores to instantiate
on the given database.
""" """
current_dir = os.path.join(dir_path, "schema", "full_schemas") current_dir = os.path.join(dir_path, "schema", "full_schemas")
directory_entries = os.listdir(current_dir) directory_entries = os.listdir(current_dir)
@ -245,6 +255,10 @@ def _upgrade_existing_database(
only if `upgraded` is True. Then `foo.sql` and `bar.py` would be run in only if `upgraded` is True. Then `foo.sql` and `bar.py` would be run in
some arbitrary order. some arbitrary order.
Note: we apply the delta files from the specified data stores as well as
those in the top-level schema. We apply all delta files across data stores
for a version before applying those in the next version.
Args: Args:
cur (Cursor) cur (Cursor)
current_version (int): The current version of the schema. current_version (int): The current version of the schema.
@ -254,6 +268,14 @@ def _upgrade_existing_database(
applied deltas or from full schema file. If `True` the function applied deltas or from full schema file. If `True` the function
will never apply delta files for the given `current_version`, since will never apply delta files for the given `current_version`, since
the current_version wasn't generated by applying those delta files. the current_version wasn't generated by applying those delta files.
database_engine (DatabaseEngine)
config (synapse.config.homeserver.HomeServerConfig|None):
application config, or None if we are connecting to an existing
database which we expect to be configured already
data_stores (list[str]): The names of the data stores to instantiate
on the given database.
is_empty (bool): Is this a blank database? I.e. do we need to run the
upgrade portions of the delta scripts.
""" """
if current_version > SCHEMA_VERSION: if current_version > SCHEMA_VERSION:
@ -305,21 +327,19 @@ def _upgrade_existing_database(
# Data stores can have empty entries for a given version delta. # Data stores can have empty entries for a given version delta.
pass pass
except OSError: except OSError:
logger.exception("Could not open delta dir for version %d", v)
raise UpgradeDatabaseException( raise UpgradeDatabaseException(
"Could not open delta dir for version %d" % (v,) "Could not open delta dir for version %d: %s" % (v, directory)
) )
if not directory_entries: # We sort to ensure that we apply the delta files in a consistent
continue # order (to avoid bugs caused by inconsistent directory listing order)
directory_entries.sort() directory_entries.sort()
for entry in directory_entries: for entry in directory_entries:
file_name = entry.file_name file_name = entry.file_name
relative_path = os.path.join(str(v), file_name) relative_path = os.path.join(str(v), file_name)
absolute_path = entry.absolute_path absolute_path = entry.absolute_path
logger.debug("Found file: %s", relative_path) logger.debug("Found file: %s (%s)", relative_path, absolute_path)
if relative_path in applied_delta_files: if relative_path in applied_delta_files:
continue continue
@ -511,6 +531,9 @@ def _get_or_create_schema_state(txn, database_engine):
class _DirectoryListing(object): class _DirectoryListing(object):
"""Helper class to store schema file name and the """Helper class to store schema file name and the
absolute path to it. absolute path to it.
These entries get sorted, so for consistency we want to ensure that
`file_name` attr is kept first.
""" """
file_name = attr.ib() file_name = attr.ib()