Fix parsing of Content-Disposition headers (#4763)
* Fix parsing of Content-Disposition headers TIL: filenames in content-dispostion headers can contain semicolons, and aren't %-encoded. * fix python2 incompatibility * Fix docstrings
This commit is contained in:
parent
f2a753ea38
commit
68f47d6744
|
@ -0,0 +1 @@
|
|||
Fix parsing of Content-Disposition headers on remote media requests and URL previews.
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2014-2016 OpenMarket Ltd
|
||||
# Copyright 2019 New Vector Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -213,8 +214,7 @@ def get_filename_from_headers(headers):
|
|||
Content-Disposition HTTP header.
|
||||
|
||||
Args:
|
||||
headers (twisted.web.http_headers.Headers): The HTTP
|
||||
request headers.
|
||||
headers (dict[bytes, list[bytes]]): The HTTP request headers.
|
||||
|
||||
Returns:
|
||||
A Unicode string of the filename, or None.
|
||||
|
@ -225,23 +225,12 @@ def get_filename_from_headers(headers):
|
|||
if not content_disposition[0]:
|
||||
return
|
||||
|
||||
# dict of unicode: bytes, corresponding to the key value sections of the
|
||||
# Content-Disposition header.
|
||||
params = {}
|
||||
parts = content_disposition[0].split(b";")
|
||||
for i in parts:
|
||||
# Split into key-value pairs, if able
|
||||
# We don't care about things like `inline`, so throw it out
|
||||
if b"=" not in i:
|
||||
continue
|
||||
|
||||
key, value = i.strip().split(b"=")
|
||||
params[key.decode('ascii')] = value
|
||||
_, params = _parse_header(content_disposition[0])
|
||||
|
||||
upload_name = None
|
||||
|
||||
# First check if there is a valid UTF-8 filename
|
||||
upload_name_utf8 = params.get("filename*", None)
|
||||
upload_name_utf8 = params.get(b"filename*", None)
|
||||
if upload_name_utf8:
|
||||
if upload_name_utf8.lower().startswith(b"utf-8''"):
|
||||
upload_name_utf8 = upload_name_utf8[7:]
|
||||
|
@ -267,12 +256,68 @@ def get_filename_from_headers(headers):
|
|||
|
||||
# If there isn't check for an ascii name.
|
||||
if not upload_name:
|
||||
upload_name_ascii = params.get("filename", None)
|
||||
upload_name_ascii = params.get(b"filename", None)
|
||||
if upload_name_ascii and is_ascii(upload_name_ascii):
|
||||
# Make sure there's no %-quoted bytes. If there is, reject it as
|
||||
# non-valid ASCII.
|
||||
if b"%" not in upload_name_ascii:
|
||||
upload_name = upload_name_ascii.decode('ascii')
|
||||
upload_name = upload_name_ascii.decode('ascii')
|
||||
|
||||
# This may be None here, indicating we did not find a matching name.
|
||||
return upload_name
|
||||
|
||||
|
||||
def _parse_header(line):
|
||||
"""Parse a Content-type like header.
|
||||
|
||||
Cargo-culted from `cgi`, but works on bytes rather than strings.
|
||||
|
||||
Args:
|
||||
line (bytes): header to be parsed
|
||||
|
||||
Returns:
|
||||
Tuple[bytes, dict[bytes, bytes]]:
|
||||
the main content-type, followed by the parameter dictionary
|
||||
"""
|
||||
parts = _parseparam(b';' + line)
|
||||
key = next(parts)
|
||||
pdict = {}
|
||||
for p in parts:
|
||||
i = p.find(b'=')
|
||||
if i >= 0:
|
||||
name = p[:i].strip().lower()
|
||||
value = p[i + 1:].strip()
|
||||
|
||||
# strip double-quotes
|
||||
if len(value) >= 2 and value[0:1] == value[-1:] == b'"':
|
||||
value = value[1:-1]
|
||||
value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')
|
||||
pdict[name] = value
|
||||
|
||||
return key, pdict
|
||||
|
||||
|
||||
def _parseparam(s):
|
||||
"""Generator which splits the input on ;, respecting double-quoted sequences
|
||||
|
||||
Cargo-culted from `cgi`, but works on bytes rather than strings.
|
||||
|
||||
Args:
|
||||
s (bytes): header to be parsed
|
||||
|
||||
Returns:
|
||||
Iterable[bytes]: the split input
|
||||
"""
|
||||
while s[:1] == b';':
|
||||
s = s[1:]
|
||||
|
||||
# look for the next ;
|
||||
end = s.find(b';')
|
||||
|
||||
# if there is an odd number of " marks between here and the next ;, skip to the
|
||||
# next ; instead
|
||||
while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
|
||||
end = s.find(b';', end + 1)
|
||||
|
||||
if end < 0:
|
||||
end = len(s)
|
||||
f = s[:end]
|
||||
yield f.strip()
|
||||
s = s[end:]
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2019 New Vector Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from synapse.rest.media.v1._base import get_filename_from_headers
|
||||
|
||||
from tests import unittest
|
||||
|
||||
|
||||
class GetFileNameFromHeadersTests(unittest.TestCase):
|
||||
# input -> expected result
|
||||
TEST_CASES = {
|
||||
b"inline; filename=abc.txt": u"abc.txt",
|
||||
b'inline; filename="azerty"': u"azerty",
|
||||
b'inline; filename="aze%20rty"': u"aze%20rty",
|
||||
b'inline; filename="aze\"rty"': u'aze"rty',
|
||||
b'inline; filename="azer;ty"': u"azer;ty",
|
||||
|
||||
b"inline; filename*=utf-8''foo%C2%A3bar": u"foo£bar",
|
||||
}
|
||||
|
||||
def tests(self):
|
||||
for hdr, expected in self.TEST_CASES.items():
|
||||
res = get_filename_from_headers(
|
||||
{
|
||||
b'Content-Disposition': [hdr],
|
||||
},
|
||||
)
|
||||
self.assertEqual(
|
||||
res, expected,
|
||||
"expected output for %s to be %s but was %s" % (
|
||||
hdr, expected, res,
|
||||
)
|
||||
)
|
Loading…
Reference in New Issue