From e85c7873dc885c18705c2a77d8487517379d64fb Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 16:26:37 +0100 Subject: [PATCH 1/3] Allow non-ascii filenames for attachments --- synapse/rest/media/v1/base_resource.py | 17 +++++++++++++---- synapse/rest/media/v1/upload_resource.py | 6 ++---- synapse/util/stringutils.py | 2 ++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 4e21527c3d..24297b20f1 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -33,6 +33,7 @@ import os import cgi import logging +import urllib logger = logging.getLogger(__name__) @@ -181,10 +182,18 @@ class BaseMediaResource(Resource): if os.path.isfile(file_path): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % (upload_name.encode("utf-8"),), - ) + if is_ascii(upload_name): + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % (upload_name.encode("utf-8"),), + ) + else: + request.setHeader( + b"Content-Disposition", + b"inline; filename*=utf-8''%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index cdd1d44e07..21d8fb9ce9 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -15,7 +15,7 @@ from synapse.http.server import respond_with_json, request_handler -from synapse.util.stringutils import random_string, is_ascii +from synapse.util.stringutils import random_string from synapse.api.errors import SynapseError from twisted.web.server import NOT_DONE_YET @@ -86,9 +86,7 @@ class UploadResource(BaseMediaResource): upload_name = request.args.get("filename", None) if upload_name: - upload_name = upload_name[0] - if upload_name and not is_ascii(upload_name): - raise SynapseError(400, "filename must be ascii") + upload_name = upload_name[0].decode('UTF-8') headers = request.requestHeaders diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index 7a1e96af37..f3a36340e4 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -38,6 +38,8 @@ def random_string_with_symbols(length): def is_ascii(s): try: s.encode("ascii") + except UnicodeEncodeError: + return False except UnicodeDecodeError: return False else: From 5a9e0c36824ffc8bb365cdb30a273d427f997bd9 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 17:08:47 +0100 Subject: [PATCH 2/3] Handle unicode filenames given when downloading or received over federation --- synapse/rest/media/v1/base_resource.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index 24297b20f1..ad2c9d4e74 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -34,6 +34,7 @@ import os import cgi import logging import urllib +import urlparse logger = logging.getLogger(__name__) @@ -43,10 +44,13 @@ def parse_media_id(request): # This allows users to append e.g. /test.png to the URL. Useful for # clients that parse the URL to see content type. server_name, media_id = request.postpath[:2] - if len(request.postpath) > 2 and is_ascii(request.postpath[-1]): - return server_name, media_id, request.postpath[-1] - else: - return server_name, media_id, None + file_name = None + if len(request.postpath) > 2: + try: + file_name = urlparse.unquote(request.postpath[-1]).decode("utf-8") + except UnicodeDecodeError: + pass + return server_name, media_id, file_name except: raise SynapseError( 404, @@ -144,6 +148,16 @@ class BaseMediaResource(Resource): upload_name = params.get("filename", None) if upload_name and not is_ascii(upload_name): upload_name = None + else: + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + if upload_name: + upload_name = urlparse.unquote(upload_name) + try: + upload_name = upload_name.decode("utf-8"); + except UnicodeDecodeError: + upload_name = None else: upload_name = None @@ -185,7 +199,9 @@ class BaseMediaResource(Resource): if is_ascii(upload_name): request.setHeader( b"Content-Disposition", - b"inline; filename=%s" % (upload_name.encode("utf-8"),), + b"inline; filename=%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), ) else: request.setHeader( From c9cb354b58972b9e0e91cd6d6398e9bb02f7b967 Mon Sep 17 00:00:00 2001 From: Mark Haines Date: Wed, 26 Aug 2015 17:27:23 +0100 Subject: [PATCH 3/3] Give a sensible error message if the filename is invalid UTF-8 --- synapse/rest/media/v1/base_resource.py | 2 +- synapse/rest/media/v1/upload_resource.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/base_resource.py b/synapse/rest/media/v1/base_resource.py index ad2c9d4e74..60751da1d1 100644 --- a/synapse/rest/media/v1/base_resource.py +++ b/synapse/rest/media/v1/base_resource.py @@ -155,7 +155,7 @@ class BaseMediaResource(Resource): if upload_name: upload_name = urlparse.unquote(upload_name) try: - upload_name = upload_name.decode("utf-8"); + upload_name = upload_name.decode("utf-8") except UnicodeDecodeError: upload_name = None else: diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 21d8fb9ce9..031bfa80f8 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -86,7 +86,13 @@ class UploadResource(BaseMediaResource): upload_name = request.args.get("filename", None) if upload_name: - upload_name = upload_name[0].decode('UTF-8') + try: + upload_name = upload_name[0].decode('UTF-8') + except UnicodeDecodeError: + raise SynapseError( + msg="Invalid UTF-8 filename parameter: %r" % (upload_name), + code=400, + ) headers = request.requestHeaders