From ebd137cf6b2fbd767625dc5289b0bef6d1e51971 Mon Sep 17 00:00:00 2001
From: Neil Alexander <neilalexander@users.noreply.github.com>
Date: Wed, 5 Oct 2022 11:07:17 +0100
Subject: [PATCH] Check PostgreSQL connection count (#2760)

This PR queries PostgreSQL for the `max_connections` and
`superuser_reserved_connections` settings and then ensures that
Dendrite's `max_open_conns` doesn't exceed the allowed value.

This is a really common source of configuration problems and can either
result in blocking queries or deadlocks, so it seems reasonable that we
complain as loudly as possible when it happens.
---
 internal/sqlutil/sqlutil.go | 39 +++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/internal/sqlutil/sqlutil.go b/internal/sqlutil/sqlutil.go
index 0cdae6d30..789bceeac 100644
--- a/internal/sqlutil/sqlutil.go
+++ b/internal/sqlutil/sqlutil.go
@@ -2,6 +2,7 @@ package sqlutil
 
 import (
 	"database/sql"
+	"flag"
 	"fmt"
 	"regexp"
 
@@ -9,6 +10,8 @@ import (
 	"github.com/sirupsen/logrus"
 )
 
+var skipSanityChecks = flag.Bool("skip-db-sanity", false, "Ignore sanity checks on the database connections (NOT RECOMMENDED!)")
+
 // Open opens a database specified by its database driver name and a driver-specific data source name,
 // usually consisting of at least a database name and connection information. Includes tracing driver
 // if DENDRITE_TRACE_SQL=1
@@ -37,15 +40,39 @@ func Open(dbProperties *config.DatabaseOptions, writer Writer) (*sql.DB, error)
 		return nil, err
 	}
 	if driverName != "sqlite3" {
-		logrus.WithFields(logrus.Fields{
-			"MaxOpenConns":    dbProperties.MaxOpenConns(),
-			"MaxIdleConns":    dbProperties.MaxIdleConns(),
-			"ConnMaxLifetime": dbProperties.ConnMaxLifetime(),
-			"dataSourceName":  regexp.MustCompile(`://[^@]*@`).ReplaceAllLiteralString(dsn, "://"),
-		}).Debug("Setting DB connection limits")
+		logger := logrus.WithFields(logrus.Fields{
+			"max_open_conns":    dbProperties.MaxOpenConns(),
+			"max_idle_conns":    dbProperties.MaxIdleConns(),
+			"conn_max_lifetime": dbProperties.ConnMaxLifetime(),
+			"data_source_name":  regexp.MustCompile(`://[^@]*@`).ReplaceAllLiteralString(dsn, "://"),
+		})
+		logger.Debug("Setting DB connection limits")
 		db.SetMaxOpenConns(dbProperties.MaxOpenConns())
 		db.SetMaxIdleConns(dbProperties.MaxIdleConns())
 		db.SetConnMaxLifetime(dbProperties.ConnMaxLifetime())
+
+		if !*skipSanityChecks {
+			if dbProperties.MaxOpenConns() == 0 {
+				logrus.Warnf("WARNING: Configuring 'max_open_conns' to be unlimited is not recommended. This can result in bad performance or deadlocks.")
+			}
+
+			switch driverName {
+			case "postgres":
+				// Perform a quick sanity check if possible that we aren't trying to use more database
+				// connections than PostgreSQL is willing to give us.
+				var max, reserved int
+				if err := db.QueryRow("SELECT setting::integer FROM pg_settings WHERE name='max_connections';").Scan(&max); err != nil {
+					return nil, fmt.Errorf("failed to find maximum connections: %w", err)
+				}
+				if err := db.QueryRow("SELECT setting::integer FROM pg_settings WHERE name='superuser_reserved_connections';").Scan(&reserved); err != nil {
+					return nil, fmt.Errorf("failed to find reserved connections: %w", err)
+				}
+				if configured, allowed := dbProperties.MaxOpenConns(), max-reserved; configured > allowed {
+					logrus.Errorf("ERROR: The configured 'max_open_conns' is greater than the %d non-superuser connections that PostgreSQL is configured to allow. This can result in bad performance or deadlocks. Please pay close attention to your configured database connection counts. If you REALLY know what you are doing and want to override this error, pass the --skip-db-sanity option to Dendrite.", allowed)
+					return nil, fmt.Errorf("database sanity checks failed")
+				}
+			}
+		}
 	}
 	return db, nil
 }