Fix parsing of Content-Disposition headers (#4763)
* Fix parsing of Content-Disposition headers TIL: filenames in content-dispostion headers can contain semicolons, and aren't %-encoded. * fix python2 incompatibility * Fix docstrings
This commit is contained in:
parent
f2a753ea38
commit
68f47d6744
|
@ -0,0 +1 @@
|
||||||
|
Fix parsing of Content-Disposition headers on remote media requests and URL previews.
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright 2014-2016 OpenMarket Ltd
|
# Copyright 2014-2016 OpenMarket Ltd
|
||||||
|
# Copyright 2019 New Vector Ltd.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -213,8 +214,7 @@ def get_filename_from_headers(headers):
|
||||||
Content-Disposition HTTP header.
|
Content-Disposition HTTP header.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
headers (twisted.web.http_headers.Headers): The HTTP
|
headers (dict[bytes, list[bytes]]): The HTTP request headers.
|
||||||
request headers.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A Unicode string of the filename, or None.
|
A Unicode string of the filename, or None.
|
||||||
|
@ -225,23 +225,12 @@ def get_filename_from_headers(headers):
|
||||||
if not content_disposition[0]:
|
if not content_disposition[0]:
|
||||||
return
|
return
|
||||||
|
|
||||||
# dict of unicode: bytes, corresponding to the key value sections of the
|
_, params = _parse_header(content_disposition[0])
|
||||||
# Content-Disposition header.
|
|
||||||
params = {}
|
|
||||||
parts = content_disposition[0].split(b";")
|
|
||||||
for i in parts:
|
|
||||||
# Split into key-value pairs, if able
|
|
||||||
# We don't care about things like `inline`, so throw it out
|
|
||||||
if b"=" not in i:
|
|
||||||
continue
|
|
||||||
|
|
||||||
key, value = i.strip().split(b"=")
|
|
||||||
params[key.decode('ascii')] = value
|
|
||||||
|
|
||||||
upload_name = None
|
upload_name = None
|
||||||
|
|
||||||
# First check if there is a valid UTF-8 filename
|
# First check if there is a valid UTF-8 filename
|
||||||
upload_name_utf8 = params.get("filename*", None)
|
upload_name_utf8 = params.get(b"filename*", None)
|
||||||
if upload_name_utf8:
|
if upload_name_utf8:
|
||||||
if upload_name_utf8.lower().startswith(b"utf-8''"):
|
if upload_name_utf8.lower().startswith(b"utf-8''"):
|
||||||
upload_name_utf8 = upload_name_utf8[7:]
|
upload_name_utf8 = upload_name_utf8[7:]
|
||||||
|
@ -267,12 +256,68 @@ def get_filename_from_headers(headers):
|
||||||
|
|
||||||
# If there isn't check for an ascii name.
|
# If there isn't check for an ascii name.
|
||||||
if not upload_name:
|
if not upload_name:
|
||||||
upload_name_ascii = params.get("filename", None)
|
upload_name_ascii = params.get(b"filename", None)
|
||||||
if upload_name_ascii and is_ascii(upload_name_ascii):
|
if upload_name_ascii and is_ascii(upload_name_ascii):
|
||||||
# Make sure there's no %-quoted bytes. If there is, reject it as
|
upload_name = upload_name_ascii.decode('ascii')
|
||||||
# non-valid ASCII.
|
|
||||||
if b"%" not in upload_name_ascii:
|
|
||||||
upload_name = upload_name_ascii.decode('ascii')
|
|
||||||
|
|
||||||
# This may be None here, indicating we did not find a matching name.
|
# This may be None here, indicating we did not find a matching name.
|
||||||
return upload_name
|
return upload_name
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_header(line):
|
||||||
|
"""Parse a Content-type like header.
|
||||||
|
|
||||||
|
Cargo-culted from `cgi`, but works on bytes rather than strings.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
line (bytes): header to be parsed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[bytes, dict[bytes, bytes]]:
|
||||||
|
the main content-type, followed by the parameter dictionary
|
||||||
|
"""
|
||||||
|
parts = _parseparam(b';' + line)
|
||||||
|
key = next(parts)
|
||||||
|
pdict = {}
|
||||||
|
for p in parts:
|
||||||
|
i = p.find(b'=')
|
||||||
|
if i >= 0:
|
||||||
|
name = p[:i].strip().lower()
|
||||||
|
value = p[i + 1:].strip()
|
||||||
|
|
||||||
|
# strip double-quotes
|
||||||
|
if len(value) >= 2 and value[0:1] == value[-1:] == b'"':
|
||||||
|
value = value[1:-1]
|
||||||
|
value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')
|
||||||
|
pdict[name] = value
|
||||||
|
|
||||||
|
return key, pdict
|
||||||
|
|
||||||
|
|
||||||
|
def _parseparam(s):
|
||||||
|
"""Generator which splits the input on ;, respecting double-quoted sequences
|
||||||
|
|
||||||
|
Cargo-culted from `cgi`, but works on bytes rather than strings.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
s (bytes): header to be parsed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iterable[bytes]: the split input
|
||||||
|
"""
|
||||||
|
while s[:1] == b';':
|
||||||
|
s = s[1:]
|
||||||
|
|
||||||
|
# look for the next ;
|
||||||
|
end = s.find(b';')
|
||||||
|
|
||||||
|
# if there is an odd number of " marks between here and the next ;, skip to the
|
||||||
|
# next ; instead
|
||||||
|
while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
|
||||||
|
end = s.find(b';', end + 1)
|
||||||
|
|
||||||
|
if end < 0:
|
||||||
|
end = len(s)
|
||||||
|
f = s[:end]
|
||||||
|
yield f.strip()
|
||||||
|
s = s[end:]
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright 2019 New Vector Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from synapse.rest.media.v1._base import get_filename_from_headers
|
||||||
|
|
||||||
|
from tests import unittest
|
||||||
|
|
||||||
|
|
||||||
|
class GetFileNameFromHeadersTests(unittest.TestCase):
|
||||||
|
# input -> expected result
|
||||||
|
TEST_CASES = {
|
||||||
|
b"inline; filename=abc.txt": u"abc.txt",
|
||||||
|
b'inline; filename="azerty"': u"azerty",
|
||||||
|
b'inline; filename="aze%20rty"': u"aze%20rty",
|
||||||
|
b'inline; filename="aze\"rty"': u'aze"rty',
|
||||||
|
b'inline; filename="azer;ty"': u"azer;ty",
|
||||||
|
|
||||||
|
b"inline; filename*=utf-8''foo%C2%A3bar": u"foo£bar",
|
||||||
|
}
|
||||||
|
|
||||||
|
def tests(self):
|
||||||
|
for hdr, expected in self.TEST_CASES.items():
|
||||||
|
res = get_filename_from_headers(
|
||||||
|
{
|
||||||
|
b'Content-Disposition': [hdr],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
res, expected,
|
||||||
|
"expected output for %s to be %s but was %s" % (
|
||||||
|
hdr, expected, res,
|
||||||
|
)
|
||||||
|
)
|
Loading…
Reference in New Issue