From f131bf8d3e934bb548615214239cfe131f8e33f5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 1 Jul 2018 05:08:51 +0100 Subject: [PATCH 1/4] don't mix unicode strings with utf8-in-byte-strings otherwise we explode with: ``` Traceback (most recent call last): File /usr/lib/python2.7/logging/handlers.py, line 78, in emit logging.FileHandler.emit(self, record) File /usr/lib/python2.7/logging/__init__.py, line 950, in emit StreamHandler.emit(self, record) File /usr/lib/python2.7/logging/__init__.py, line 887, in emit self.handleError(record) File /usr/lib/python2.7/logging/__init__.py, line 810, in handleError None, sys.stderr) File /usr/lib/python2.7/traceback.py, line 124, in print_exception _print(file, 'Traceback (most recent call last):') File /usr/lib/python2.7/traceback.py, line 13, in _print file.write(str+terminator) File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/logger/_io.py, line 170, in write self.log.emit(self.level, format=u{log_io}, log_io=line) File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/logger/_logger.py, line 144, in emit self.observer(event) File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/logger/_observer.py, line 136, in __call__ errorLogger = self._errorLoggerForObserver(brokenObserver) File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/logger/_observer.py, line 156, in _errorLoggerForObserver if obs is not observer File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/logger/_observer.py, line 81, in __init__ self.log = Logger(observer=self) File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/logger/_logger.py, line 64, in __init__ namespace = self._namespaceFromCallingContext() File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/logger/_logger.py, line 42, in _namespaceFromCallingContext return currentframe(2).f_globals[__name__] File /home/matrix/.synapse/local/lib/python2.7/site-packages/twisted/python/compat.py, line 93, in currentframe for x in range(n + 1): RuntimeError: maximum recursion depth exceeded while calling a Python object Logged from file site.py, line 129 File /usr/lib/python2.7/logging/__init__.py, line 859, in emit msg = self.format(record) File /usr/lib/python2.7/logging/__init__.py, line 732, in format return fmt.format(record) File /usr/lib/python2.7/logging/__init__.py, line 471, in format record.message = record.getMessage() File /usr/lib/python2.7/logging/__init__.py, line 335, in getMessage msg = msg % self.args UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 4: ordinal not in range(128) Logged from file site.py, line 129 ``` ...where the logger apparently recurses whilst trying to log the error, hitting the maximum recursion depth and killing everything badly. --- synapse/http/site.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index 74a752d6c..2ab0d8ff7 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -113,7 +113,9 @@ class SynapseRequest(Request): " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]", self.getClientIP(), self.site.site_tag, - self.authenticated_entity, + # need to decode as it could be raw utf-8 bytes + # from a IDN servname in an auth header + self.authenticated_entity.decode("utf-8"), end_time - self.start_time, ru_utime, ru_stime, @@ -125,7 +127,12 @@ class SynapseRequest(Request): self.method, self.get_redacted_uri(), self.clientproto, - self.get_user_agent(), + # need to decode as could be raw utf-8 bytes + # from a utf-8 user-agent. + # N.B. if you don't do this, the logger explodes + # with maximum recursion trying to log errors about + # the charset problem. + self.get_user_agent().decode("utf-8"), evt_db_fetch_count, ) From 1c867f5391bff1b060290f834b177f4c339af65d Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 1 Jul 2018 11:56:33 +0100 Subject: [PATCH 2/4] a fix which doesn't NPE everywhere --- synapse/http/site.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index 2ab0d8ff7..14ea9c21c 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -107,15 +107,28 @@ class SynapseRequest(Request): end_time = time.time() + # need to decode as it could be raw utf-8 bytes + # from a IDN servname in an auth header + authenticated_entity = self.authenticated_entity + if authenticated_entity is not None: + authenticated_entity = authenticated_entity.decode("utf-8") + + # ...or could be raw utf-8 bytes in the User-Agent header. + # N.B. if you don't do this, the logger explodes cryptically + # with maximum recursion trying to log errors about + # the charset problem. + # c.f. https://github.com/matrix-org/synapse/issues/3471 + user_agent = self.get_user_agent() + if user_agent is not None: + user_agent = user_agent.decode("utf-8") + self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]", self.getClientIP(), self.site.site_tag, - # need to decode as it could be raw utf-8 bytes - # from a IDN servname in an auth header - self.authenticated_entity.decode("utf-8"), + authenticated_entity, end_time - self.start_time, ru_utime, ru_stime, @@ -127,12 +140,7 @@ class SynapseRequest(Request): self.method, self.get_redacted_uri(), self.clientproto, - # need to decode as could be raw utf-8 bytes - # from a utf-8 user-agent. - # N.B. if you don't do this, the logger explodes - # with maximum recursion trying to log errors about - # the charset problem. - self.get_user_agent().decode("utf-8"), + user_agent, evt_db_fetch_count, ) From fc4f8f33be7bcfa8fca5d1aa1f07a1507e8d56e7 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 2 Jul 2018 11:33:02 +0100 Subject: [PATCH 3/4] replace invalid utf8 with \ufffd --- synapse/http/site.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index 14ea9c21c..fe93643b1 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -111,7 +111,7 @@ class SynapseRequest(Request): # from a IDN servname in an auth header authenticated_entity = self.authenticated_entity if authenticated_entity is not None: - authenticated_entity = authenticated_entity.decode("utf-8") + authenticated_entity = authenticated_entity.decode("utf-8", "replace") # ...or could be raw utf-8 bytes in the User-Agent header. # N.B. if you don't do this, the logger explodes cryptically @@ -120,7 +120,7 @@ class SynapseRequest(Request): # c.f. https://github.com/matrix-org/synapse/issues/3471 user_agent = self.get_user_agent() if user_agent is not None: - user_agent = user_agent.decode("utf-8") + user_agent = user_agent.decode("utf-8", "replace") self.site.access_logger.info( "%s - %s - {%s}" From 6ec3aa2f72b2a0cc8d58a2448de7d7af5ed5f05e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 2 Jul 2018 13:43:34 +0100 Subject: [PATCH 4/4] news snippet --- changelog.d/3470.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3470.bugfix diff --git a/changelog.d/3470.bugfix b/changelog.d/3470.bugfix new file mode 100644 index 000000000..130893119 --- /dev/null +++ b/changelog.d/3470.bugfix @@ -0,0 +1 @@ +Fix bug where synapse would explode when receiving unicode in HTTP User-Agent header