Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. (#13127)

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
2025-04-29 19:24:35 +02:00 · 2022-06-30 12:58:12 +01:00 · 2022-06-30 12:58:12 +01:00 · 9667bad55d
commit 9667bad55d
parent 09f6e43025
9 changed files with 243 additions and 51 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -328,51 +328,8 @@ jobs:
          - arrangement: monolith
            database: Postgres

-    steps:
-      # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
-      # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path
-      - name: "Set Go Version"
-        run: |
-          # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2
-          echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH
-          # Add the Go path to the PATH: We need this so we can call gotestfmt
-          echo "~/go/bin" >> $GITHUB_PATH
-
-      - name: "Install Complement Dependencies"
-        run: |
-          sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev
-          go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest
-
-      - name: Run actions/checkout@v2 for synapse
-        uses: actions/checkout@v2
-        with:
-          path: synapse
-
-      - name: "Install custom gotestfmt template"
-        run: |
-          mkdir .gotestfmt/github -p
-          cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl
-
-      # Attempt to check out the same branch of Complement as the PR. If it
-      # doesn't exist, fallback to HEAD.
-      - name: Checkout complement
-        run: synapse/.ci/scripts/checkout_complement.sh
-
-      - run: |
-          set -o pipefail
-          POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
-        shell: bash
-        name: Run Complement Tests
-
-  # We only run the workers tests on `develop` for now, because they're too slow to wait for on PRs.
-  # Sadly, you can't have an `if` condition on the value of a matrix, so this is a temporary, separate job for now.
-  # GitHub Actions doesn't support YAML anchors, so it's full-on duplication for now.
-  complement-developonly:
-    if: "${{ !failure() && !cancelled() && (github.ref == 'refs/heads/develop') }}"
-    needs: linting-done
-    runs-on: ubuntu-latest
-    
-    name: "Complement Workers (develop only)"
+          - arrangement: workers
+            database: Postgres

    steps:
      # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement.
@ -406,7 +363,7 @@ jobs:

      - run: |
          set -o pipefail
-          WORKERS=1 COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
+          POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt
        shell: bash
        name: Run Complement Tests

--- a/changelog.d/13127.misc
+++ b/changelog.d/13127.misc
@ -0,0 +1 @@
+Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments.
--- a/docker/complement/conf/start_for_complement.sh
+++ b/docker/complement/conf/start_for_complement.sh
@ -59,6 +59,9 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then
      synchrotron, \
      appservice, \
      pusher"
+
+  # Improve startup times by using a launcher based on fork()
+  export SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER=1
 else
  # Empty string here means 'main process only'
  export SYNAPSE_WORKER_TYPES=""
--- a/docker/conf-workers/synapse.supervisord.conf.j2
+++ b/docker/conf-workers/synapse.supervisord.conf.j2
@ -1,3 +1,24 @@
+{% if use_forking_launcher %}
+[program:synapse_fork]
+command=/usr/local/bin/python -m synapse.app.complement_fork_starter
+  {{ main_config_path }}
+  synapse.app.homeserver
+  --config-path="{{ main_config_path }}"
+  --config-path=/conf/workers/shared.yaml
+  {%- for worker in workers %}
+    -- {{ worker.app }}
+    --config-path="{{ main_config_path }}"
+    --config-path=/conf/workers/shared.yaml
+    --config-path=/conf/workers/{{ worker.name }}.yaml
+  {%- endfor %}
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+autorestart=unexpected
+exitcodes=0
+
+{% else %}
 [program:synapse_main]
 command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver
  --config-path="{{ main_config_path }}"
@ -13,7 +34,7 @@ autorestart=unexpected
 exitcodes=0


-{% for worker in workers %}
+  {% for worker in workers %}
 [program:synapse_{{ worker.name }}]
 command=/usr/local/bin/prefix-log /usr/local/bin/python -m {{ worker.app }}
  --config-path="{{ main_config_path }}"
@ -27,4 +48,5 @@ stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
 stderr_logfile_maxbytes=0

-{% endfor %}
+  {% endfor %}
+{% endif %}
--- a/docker/conf/log.config
+++ b/docker/conf/log.config
@ -2,7 +2,11 @@ version: 1

 formatters:
  precise:
+    {% if include_worker_name_in_log_line %}
+    format: '{{ worker_name }} | %(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s'
+    {% else %}
    format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s'
+    {% endif %}

 handlers:
 {% if LOG_FILE_PATH %}
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@ -26,6 +26,9 @@
 #   * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format.
 #   * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified,
 #         Nginx will be configured to serve TLS on port 8448.
+#   * SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER: Whether to use the forking launcher,
+#         only intended for usage in Complement at the moment.
+#         No stability guarantees are provided.
 #
 # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined
 # in the project's README), this script may be run multiple times, and functionality should
@ -525,6 +528,7 @@ def generate_worker_files(
        "/etc/supervisor/conf.d/synapse.conf",
        workers=worker_descriptors,
        main_config_path=config_path,
+        use_forking_launcher=environ.get("SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER"),
    )

    # healthcheck config
@ -560,6 +564,9 @@ def generate_worker_log_config(
        log_config_filepath,
        worker_name=worker_name,
        **extra_log_template_args,
+        include_worker_name_in_log_line=environ.get(
+            "SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER"
+        ),
    )
    return log_config_filepath

--- a/docker/start.py
+++ b/docker/start.py
@ -110,7 +110,11 @@ def generate_config_from_template(

    log_config_file = environ["SYNAPSE_LOG_CONFIG"]
    log("Generating log config file " + log_config_file)
-    convert("/conf/log.config", log_config_file, environ)
+    convert(
+        "/conf/log.config",
+        log_config_file,
+        {**environ, "include_worker_name_in_log_line": False},
+    )

    # Hopefully we already have a signing key, but generate one if not.
    args = [
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@ -106,7 +106,9 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs)
 def start_worker_reactor(
    appname: str,
    config: HomeServerConfig,
-    run_command: Callable[[], None] = reactor.run,
+    # Use a lambda to avoid binding to a given reactor at import time.
+    # (needed when synapse.app.complement_fork_starter is being used)
+    run_command: Callable[[], None] = lambda: reactor.run(),
 ) -> None:
    """Run the reactor in the main process

@ -141,7 +143,9 @@ def start_reactor(
    daemonize: bool,
    print_pidfile: bool,
    logger: logging.Logger,
-    run_command: Callable[[], None] = reactor.run,
+    # Use a lambda to avoid binding to a given reactor at import time.
+    # (needed when synapse.app.complement_fork_starter is being used)
+    run_command: Callable[[], None] = lambda: reactor.run(),
 ) -> None:
    """Run the reactor in the main process

--- a/synapse/app/complement_fork_starter.py
+++ b/synapse/app/complement_fork_starter.py
@ -0,0 +1,190 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ## What this script does
+#
+# This script spawns multiple workers, whilst only going through the code loading
+# process once. The net effect is that start-up time for a swarm of workers is
+# reduced, particularly in CPU-constrained environments.
+#
+# Before the workers are spawned, the database is prepared in order to avoid the
+# workers racing.
+#
+# ## Stability
+#
+# This script is only intended for use within the Synapse images for the
+# Complement test suite.
+# There are currently no stability guarantees whatsoever; especially not about:
+# - whether it will continue to exist in future versions;
+# - the format of its command-line arguments; or
+# - any details about its behaviour or principles of operation.
+#
+# ## Usage
+#
+# The first argument should be the path to the database configuration, used to
+# set up the database. The rest of the arguments are used as follows:
+# Each worker is specified as an argument group (each argument group is
+# separated by '--').
+# The first argument in each argument group is the Python module name of the application
+# to start. Further arguments are then passed to that module as-is.
+#
+# ## Example
+#
+#   python -m synapse.app.complement_fork_starter path_to_db_config.yaml \
+#     synapse.app.homeserver [args..] -- \
+#     synapse.app.generic_worker [args..] -- \
+#   ...
+#     synapse.app.generic_worker [args..]
+#
+import argparse
+import importlib
+import itertools
+import multiprocessing
+import sys
+from typing import Any, Callable, List
+
+from twisted.internet.main import installReactor
+
+
+class ProxiedReactor:
+    """
+    Twisted tracks the 'installed' reactor as a global variable.
+    (Actually, it does some module trickery, but the effect is similar.)
+
+    The default EpollReactor is buggy if it's created before a process is
+    forked, then used in the child.
+    See https://twistedmatrix.com/trac/ticket/4759#comment:17.
+
+    However, importing certain Twisted modules will automatically create and
+    install a reactor if one hasn't already been installed.
+    It's not normally possible to re-install a reactor.
+
+    Given the goal of launching workers with fork() to only import the code once,
+    this presents a conflict.
+    Our work around is to 'install' this ProxiedReactor which prevents Twisted
+    from creating and installing one, but which lets us replace the actual reactor
+    in use later on.
+    """
+
+    def __init__(self) -> None:
+        self.___reactor_target: Any = None
+
+    def _install_real_reactor(self, new_reactor: Any) -> None:
+        """
+        Install a real reactor for this ProxiedReactor to forward lookups onto.
+
+        This method is specific to our ProxiedReactor and should not clash with
+        any names used on an actual Twisted reactor.
+        """
+        self.___reactor_target = new_reactor
+
+    def __getattr__(self, attr_name: str) -> Any:
+        return getattr(self.___reactor_target, attr_name)
+
+
+def _worker_entrypoint(
+    func: Callable[[], None], proxy_reactor: ProxiedReactor, args: List[str]
+) -> None:
+    """
+    Entrypoint for a forked worker process.
+
+    We just need to set up the command-line arguments, create our real reactor
+    and then kick off the worker's main() function.
+    """
+
+    sys.argv = args
+
+    from twisted.internet.epollreactor import EPollReactor
+
+    proxy_reactor._install_real_reactor(EPollReactor())
+    func()
+
+
+def main() -> None:
+    """
+    Entrypoint for the forking launcher.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("db_config", help="Path to database config file")
+    parser.add_argument(
+        "args",
+        nargs="...",
+        help="Argument groups separated by `--`. "
+        "The first argument of each group is a Synapse app name. "
+        "Subsequent arguments are passed through.",
+    )
+    ns = parser.parse_args()
+
+    # Split up the subsequent arguments into each workers' arguments;
+    # `--` is our delimiter of choice.
+    args_by_worker: List[List[str]] = [
+        list(args)
+        for cond, args in itertools.groupby(ns.args, lambda ele: ele != "--")
+        if cond and args
+    ]
+
+    # Prevent Twisted from installing a shared reactor that all the workers will
+    # inherit when we fork(), by installing our own beforehand.
+    proxy_reactor = ProxiedReactor()
+    installReactor(proxy_reactor)
+
+    # Import the entrypoints for all the workers.
+    worker_functions = []
+    for worker_args in args_by_worker:
+        worker_module = importlib.import_module(worker_args[0])
+        worker_functions.append(worker_module.main)
+
+    # We need to prepare the database first as otherwise all the workers will
+    # try to create a schema version table and some will crash out.
+    from synapse._scripts import update_synapse_database
+
+    update_proc = multiprocessing.Process(
+        target=_worker_entrypoint,
+        args=(
+            update_synapse_database.main,
+            proxy_reactor,
+            [
+                "update_synapse_database",
+                "--database-config",
+                ns.db_config,
+                "--run-background-updates",
+            ],
+        ),
+    )
+    print("===== PREPARING DATABASE =====", file=sys.stderr)
+    update_proc.start()
+    update_proc.join()
+    print("===== PREPARED DATABASE =====", file=sys.stderr)
+
+    # At this point, we've imported all the main entrypoints for all the workers.
+    # Now we basically just fork() out to create the workers we need.
+    # Because we're using fork(), all the workers get a clone of this launcher's
+    # memory space and don't need to repeat the work of loading the code!
+    # Instead of using fork() directly, we use the multiprocessing library,
+    # which uses fork() on Unix platforms.
+    processes = []
+    for (func, worker_args) in zip(worker_functions, args_by_worker):
+        process = multiprocessing.Process(
+            target=_worker_entrypoint, args=(func, proxy_reactor, worker_args)
+        )
+        process.start()
+        processes.append(process)
+
+    # Be a good parent and wait for our children to die before exiting.
+    for process in processes:
+        process.join()
+
+
+if __name__ == "__main__":
+    main()
				`@ -0,0 +1 @@`
				`Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments.`