fix typos and needless try/except from PR review
This commit is contained in:
parent
83b2f83da0
commit
5ffacc5e84
|
@ -54,7 +54,7 @@ class PreviewUrlResource(BaseMediaResource):
|
||||||
if html:
|
if html:
|
||||||
pass
|
pass
|
||||||
except:
|
except:
|
||||||
raise RunTimeError("Disabling PreviewUrlResource as lxml not available")
|
raise RuntimeError("Disabling PreviewUrlResource as lxml not available")
|
||||||
|
|
||||||
if not hasattr(hs.config, "url_preview_ip_range_blacklist"):
|
if not hasattr(hs.config, "url_preview_ip_range_blacklist"):
|
||||||
logger.warn(
|
logger.warn(
|
||||||
|
@ -62,7 +62,7 @@ class PreviewUrlResource(BaseMediaResource):
|
||||||
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
||||||
"to work"
|
"to work"
|
||||||
)
|
)
|
||||||
raise RunTimeError(
|
raise RuntimeError(
|
||||||
"Disabling PreviewUrlResource as "
|
"Disabling PreviewUrlResource as "
|
||||||
"url_preview_ip_range_blacklist not specified"
|
"url_preview_ip_range_blacklist not specified"
|
||||||
)
|
)
|
||||||
|
@ -91,157 +91,154 @@ class PreviewUrlResource(BaseMediaResource):
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _async_render_GET(self, request):
|
def _async_render_GET(self, request):
|
||||||
|
|
||||||
try:
|
# XXX: if get_user_by_req fails, what should we do in an async render?
|
||||||
# XXX: if get_user_by_req fails, what should we do in an async render?
|
requester = yield self.auth.get_user_by_req(request)
|
||||||
requester = yield self.auth.get_user_by_req(request)
|
url = request.args.get("url")[0]
|
||||||
url = request.args.get("url")[0]
|
if "ts" in request.args:
|
||||||
if "ts" in request.args:
|
ts = int(request.args.get("ts")[0])
|
||||||
ts = int(request.args.get("ts")[0])
|
else:
|
||||||
else:
|
ts = self.clock.time_msec()
|
||||||
ts = self.clock.time_msec()
|
|
||||||
|
|
||||||
# impose the URL pattern blacklist
|
# impose the URL pattern blacklist
|
||||||
if hasattr(self, "url_preview_url_blacklist"):
|
if hasattr(self, "url_preview_url_blacklist"):
|
||||||
url_tuple = urlsplit(url)
|
url_tuple = urlsplit(url)
|
||||||
for entry in self.url_preview_url_blacklist:
|
for entry in self.url_preview_url_blacklist:
|
||||||
match = True
|
match = True
|
||||||
for attrib in entry:
|
for attrib in entry:
|
||||||
pattern = entry[attrib]
|
pattern = entry[attrib]
|
||||||
value = getattr(url_tuple, attrib)
|
value = getattr(url_tuple, attrib)
|
||||||
logger.debug((
|
logger.debug((
|
||||||
"Matching attrib '%s' with value '%s' against"
|
"Matching attrib '%s' with value '%s' against"
|
||||||
" pattern '%s'"
|
" pattern '%s'"
|
||||||
) % (attrib, value, pattern))
|
) % (attrib, value, pattern))
|
||||||
|
|
||||||
if value is None:
|
if value is None:
|
||||||
|
match = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if pattern.startswith('^'):
|
||||||
|
if not re.match(pattern, getattr(url_tuple, attrib)):
|
||||||
match = False
|
match = False
|
||||||
continue
|
continue
|
||||||
|
else:
|
||||||
|
if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
|
||||||
|
match = False
|
||||||
|
continue
|
||||||
|
if match:
|
||||||
|
logger.warn(
|
||||||
|
"URL %s blocked by url_blacklist entry %s", url, entry
|
||||||
|
)
|
||||||
|
raise SynapseError(
|
||||||
|
403, "URL blocked by url pattern blacklist entry",
|
||||||
|
Codes.UNKNOWN
|
||||||
|
)
|
||||||
|
|
||||||
if pattern.startswith('^'):
|
# first check the memory cache - good to handle all the clients on this
|
||||||
if not re.match(pattern, getattr(url_tuple, attrib)):
|
# HS thundering away to preview the same URL at the same time.
|
||||||
match = False
|
og = self.cache.get(url)
|
||||||
continue
|
if og:
|
||||||
else:
|
respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True)
|
||||||
if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
|
return
|
||||||
match = False
|
|
||||||
continue
|
|
||||||
if match:
|
|
||||||
logger.warn(
|
|
||||||
"URL %s blocked by url_blacklist entry %s", url, entry
|
|
||||||
)
|
|
||||||
raise SynapseError(
|
|
||||||
403, "URL blocked by url pattern blacklist entry",
|
|
||||||
Codes.UNKNOWN
|
|
||||||
)
|
|
||||||
|
|
||||||
# first check the memory cache - good to handle all the clients on this
|
# then check the URL cache in the DB (which will also provide us with
|
||||||
# HS thundering away to preview the same URL at the same time.
|
# historical previews, if we have any)
|
||||||
og = self.cache.get(url)
|
cache_result = yield self.store.get_url_cache(url, ts)
|
||||||
if og:
|
if (
|
||||||
respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True)
|
cache_result and
|
||||||
return
|
cache_result["download_ts"] + cache_result["expires"] > ts and
|
||||||
|
cache_result["response_code"] / 100 == 2
|
||||||
|
):
|
||||||
|
respond_with_json_bytes(
|
||||||
|
request, 200, cache_result["og"].encode('utf-8'),
|
||||||
|
send_cors=True
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
# then check the URL cache in the DB (which will also provide us with
|
# Ensure only one download for a given URL is active at a time
|
||||||
# historical previews, if we have any)
|
download = self.downloads.get(url)
|
||||||
cache_result = yield self.store.get_url_cache(url, ts)
|
if download is None:
|
||||||
if (
|
download = self._download_url(url, requester.user)
|
||||||
cache_result and
|
download = ObservableDeferred(
|
||||||
cache_result["download_ts"] + cache_result["expires"] > ts and
|
download,
|
||||||
cache_result["response_code"] / 100 == 2
|
consumeErrors=True
|
||||||
):
|
)
|
||||||
respond_with_json_bytes(
|
self.downloads[url] = download
|
||||||
request, 200, cache_result["og"].encode('utf-8'),
|
|
||||||
send_cors=True
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Ensure only one download for a given URL is active at a time
|
@download.addBoth
|
||||||
download = self.downloads.get(url)
|
def callback(media_info):
|
||||||
if download is None:
|
del self.downloads[url]
|
||||||
download = self._download_url(url, requester.user)
|
return media_info
|
||||||
download = ObservableDeferred(
|
media_info = yield download.observe()
|
||||||
download,
|
|
||||||
consumeErrors=True
|
|
||||||
)
|
|
||||||
self.downloads[url] = download
|
|
||||||
|
|
||||||
@download.addBoth
|
# FIXME: we should probably update our cache now anyway, so that
|
||||||
def callback(media_info):
|
# even if the OG calculation raises, we don't keep hammering on the
|
||||||
del self.downloads[url]
|
# remote server. For now, leave it uncached to aid debugging OG
|
||||||
return media_info
|
# calculation problems
|
||||||
media_info = yield download.observe()
|
|
||||||
|
|
||||||
# FIXME: we should probably update our cache now anyway, so that
|
logger.debug("got media_info of '%s'" % media_info)
|
||||||
# even if the OG calculation raises, we don't keep hammering on the
|
|
||||||
# remote server. For now, leave it uncached to aid debugging OG
|
|
||||||
# calculation problems
|
|
||||||
|
|
||||||
logger.debug("got media_info of '%s'" % media_info)
|
if self._is_media(media_info['media_type']):
|
||||||
|
dims = yield self._generate_local_thumbnails(
|
||||||
if self._is_media(media_info['media_type']):
|
media_info['filesystem_id'], media_info
|
||||||
dims = yield self._generate_local_thumbnails(
|
|
||||||
media_info['filesystem_id'], media_info
|
|
||||||
)
|
|
||||||
|
|
||||||
og = {
|
|
||||||
"og:description": media_info['download_name'],
|
|
||||||
"og:image": "mxc://%s/%s" % (
|
|
||||||
self.server_name, media_info['filesystem_id']
|
|
||||||
),
|
|
||||||
"og:image:type": media_info['media_type'],
|
|
||||||
"matrix:image:size": media_info['media_length'],
|
|
||||||
}
|
|
||||||
|
|
||||||
if dims:
|
|
||||||
og["og:image:width"] = dims['width']
|
|
||||||
og["og:image:height"] = dims['height']
|
|
||||||
else:
|
|
||||||
logger.warn("Couldn't get dims for %s" % url)
|
|
||||||
|
|
||||||
# define our OG response for this media
|
|
||||||
elif self._is_html(media_info['media_type']):
|
|
||||||
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
|
||||||
|
|
||||||
try:
|
|
||||||
tree = html.parse(media_info['filename'])
|
|
||||||
og = yield self._calc_og(tree, media_info, requester)
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
# XXX: evil evil bodge
|
|
||||||
# Empirically, sites like google.com mix Latin-1 and utf-8
|
|
||||||
# encodings in the same page. The rogue Latin-1 characters
|
|
||||||
# cause lxml to choke with a UnicodeDecodeError, so if we
|
|
||||||
# see this we go and do a manual decode of the HTML before
|
|
||||||
# handing it to lxml as utf-8 encoding, counter-intuitively,
|
|
||||||
# which seems to make it happier...
|
|
||||||
file = open(media_info['filename'])
|
|
||||||
body = file.read()
|
|
||||||
file.close()
|
|
||||||
tree = html.fromstring(body.decode('utf-8', 'ignore'))
|
|
||||||
og = yield self._calc_og(tree, media_info, requester)
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.warn("Failed to find any OG data in %s", url)
|
|
||||||
og = {}
|
|
||||||
|
|
||||||
logger.debug("Calculated OG for %s as %s" % (url, og))
|
|
||||||
|
|
||||||
# store OG in ephemeral in-memory cache
|
|
||||||
self.cache[url] = og
|
|
||||||
|
|
||||||
# store OG in history-aware DB cache
|
|
||||||
yield self.store.store_url_cache(
|
|
||||||
url,
|
|
||||||
media_info["response_code"],
|
|
||||||
media_info["etag"],
|
|
||||||
media_info["expires"],
|
|
||||||
json.dumps(og),
|
|
||||||
media_info["filesystem_id"],
|
|
||||||
media_info["created_ts"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True)
|
og = {
|
||||||
except Exception as e:
|
"og:description": media_info['download_name'],
|
||||||
raise e
|
"og:image": "mxc://%s/%s" % (
|
||||||
|
self.server_name, media_info['filesystem_id']
|
||||||
|
),
|
||||||
|
"og:image:type": media_info['media_type'],
|
||||||
|
"matrix:image:size": media_info['media_length'],
|
||||||
|
}
|
||||||
|
|
||||||
|
if dims:
|
||||||
|
og["og:image:width"] = dims['width']
|
||||||
|
og["og:image:height"] = dims['height']
|
||||||
|
else:
|
||||||
|
logger.warn("Couldn't get dims for %s" % url)
|
||||||
|
|
||||||
|
# define our OG response for this media
|
||||||
|
elif self._is_html(media_info['media_type']):
|
||||||
|
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = html.parse(media_info['filename'])
|
||||||
|
og = yield self._calc_og(tree, media_info, requester)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# XXX: evil evil bodge
|
||||||
|
# Empirically, sites like google.com mix Latin-1 and utf-8
|
||||||
|
# encodings in the same page. The rogue Latin-1 characters
|
||||||
|
# cause lxml to choke with a UnicodeDecodeError, so if we
|
||||||
|
# see this we go and do a manual decode of the HTML before
|
||||||
|
# handing it to lxml as utf-8 encoding, counter-intuitively,
|
||||||
|
# which seems to make it happier...
|
||||||
|
file = open(media_info['filename'])
|
||||||
|
body = file.read()
|
||||||
|
file.close()
|
||||||
|
tree = html.fromstring(body.decode('utf-8', 'ignore'))
|
||||||
|
og = yield self._calc_og(tree, media_info, requester)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.warn("Failed to find any OG data in %s", url)
|
||||||
|
og = {}
|
||||||
|
|
||||||
|
logger.debug("Calculated OG for %s as %s" % (url, og))
|
||||||
|
|
||||||
|
# store OG in ephemeral in-memory cache
|
||||||
|
self.cache[url] = og
|
||||||
|
|
||||||
|
# store OG in history-aware DB cache
|
||||||
|
yield self.store.store_url_cache(
|
||||||
|
url,
|
||||||
|
media_info["response_code"],
|
||||||
|
media_info["etag"],
|
||||||
|
media_info["expires"],
|
||||||
|
json.dumps(og),
|
||||||
|
media_info["filesystem_id"],
|
||||||
|
media_info["created_ts"],
|
||||||
|
)
|
||||||
|
|
||||||
|
respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True)
|
||||||
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
|
|
Loading…
Reference in New Issue