Cache failures in url_preview handler
Reshuffle the caching logic in the url_preview handler so that failures are cached (and to generally simplify things and fix the logcontext leaks).
This commit is contained in:
parent
4d0414c714
commit
46790f50cf
|
@ -20,6 +20,7 @@ from twisted.web.resource import Resource
|
||||||
from synapse.api.errors import (
|
from synapse.api.errors import (
|
||||||
SynapseError, Codes,
|
SynapseError, Codes,
|
||||||
)
|
)
|
||||||
|
from synapse.util.logcontext import preserve_fn, make_deferred_yieldable
|
||||||
from synapse.util.stringutils import random_string
|
from synapse.util.stringutils import random_string
|
||||||
from synapse.util.caches.expiringcache import ExpiringCache
|
from synapse.util.caches.expiringcache import ExpiringCache
|
||||||
from synapse.http.client import SpiderHttpClient
|
from synapse.http.client import SpiderHttpClient
|
||||||
|
@ -63,16 +64,15 @@ class PreviewUrlResource(Resource):
|
||||||
|
|
||||||
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
|
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
|
||||||
|
|
||||||
# simple memory cache mapping urls to OG metadata
|
# memory cache mapping urls to an ObservableDeferred returning
|
||||||
self.cache = ExpiringCache(
|
# JSON-encoded OG metadata
|
||||||
|
self._cache = ExpiringCache(
|
||||||
cache_name="url_previews",
|
cache_name="url_previews",
|
||||||
clock=self.clock,
|
clock=self.clock,
|
||||||
# don't spider URLs more often than once an hour
|
# don't spider URLs more often than once an hour
|
||||||
expiry_ms=60 * 60 * 1000,
|
expiry_ms=60 * 60 * 1000,
|
||||||
)
|
)
|
||||||
self.cache.start()
|
self._cache.start()
|
||||||
|
|
||||||
self.downloads = {}
|
|
||||||
|
|
||||||
self._cleaner_loop = self.clock.looping_call(
|
self._cleaner_loop = self.clock.looping_call(
|
||||||
self._expire_url_cache_data, 10 * 1000
|
self._expire_url_cache_data, 10 * 1000
|
||||||
|
@ -94,6 +94,7 @@ class PreviewUrlResource(Resource):
|
||||||
else:
|
else:
|
||||||
ts = self.clock.time_msec()
|
ts = self.clock.time_msec()
|
||||||
|
|
||||||
|
# XXX: we could move this into _do_preview if we wanted.
|
||||||
url_tuple = urlparse.urlsplit(url)
|
url_tuple = urlparse.urlsplit(url)
|
||||||
for entry in self.url_preview_url_blacklist:
|
for entry in self.url_preview_url_blacklist:
|
||||||
match = True
|
match = True
|
||||||
|
@ -126,14 +127,40 @@ class PreviewUrlResource(Resource):
|
||||||
Codes.UNKNOWN
|
Codes.UNKNOWN
|
||||||
)
|
)
|
||||||
|
|
||||||
# first check the memory cache - good to handle all the clients on this
|
# the in-memory cache:
|
||||||
# HS thundering away to preview the same URL at the same time.
|
# * ensures that only one request is active at a time
|
||||||
og = self.cache.get(url)
|
# * takes load off the DB for the thundering herds
|
||||||
if og:
|
# * also caches any failures (unlike the DB) so we don't keep
|
||||||
respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True)
|
# requesting the same endpoint
|
||||||
return
|
|
||||||
|
|
||||||
# then check the URL cache in the DB (which will also provide us with
|
observable = self._cache.get(url)
|
||||||
|
|
||||||
|
if not observable:
|
||||||
|
download = preserve_fn(self._do_preview)(
|
||||||
|
url, requester.user, ts,
|
||||||
|
)
|
||||||
|
observable = ObservableDeferred(
|
||||||
|
download,
|
||||||
|
consumeErrors=True
|
||||||
|
)
|
||||||
|
self._cache[url] = observable
|
||||||
|
|
||||||
|
og = yield make_deferred_yieldable(observable.observe())
|
||||||
|
respond_with_json_bytes(request, 200, og, send_cors=True)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _do_preview(self, url, user, ts):
|
||||||
|
"""Check the db, and download the URL and build a preview
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str):
|
||||||
|
user (str):
|
||||||
|
ts (int):
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deferred[str]: json-encoded og data
|
||||||
|
"""
|
||||||
|
# check the URL cache in the DB (which will also provide us with
|
||||||
# historical previews, if we have any)
|
# historical previews, if we have any)
|
||||||
cache_result = yield self.store.get_url_cache(url, ts)
|
cache_result = yield self.store.get_url_cache(url, ts)
|
||||||
if (
|
if (
|
||||||
|
@ -141,32 +168,10 @@ class PreviewUrlResource(Resource):
|
||||||
cache_result["expires_ts"] > ts and
|
cache_result["expires_ts"] > ts and
|
||||||
cache_result["response_code"] / 100 == 2
|
cache_result["response_code"] / 100 == 2
|
||||||
):
|
):
|
||||||
respond_with_json_bytes(
|
defer.returnValue(cache_result["og"])
|
||||||
request, 200, cache_result["og"].encode('utf-8'),
|
|
||||||
send_cors=True
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# Ensure only one download for a given URL is active at a time
|
media_info = yield self._download_url(url, user)
|
||||||
download = self.downloads.get(url)
|
|
||||||
if download is None:
|
|
||||||
download = self._download_url(url, requester.user)
|
|
||||||
download = ObservableDeferred(
|
|
||||||
download,
|
|
||||||
consumeErrors=True
|
|
||||||
)
|
|
||||||
self.downloads[url] = download
|
|
||||||
|
|
||||||
@download.addBoth
|
|
||||||
def callback(media_info):
|
|
||||||
del self.downloads[url]
|
|
||||||
return media_info
|
|
||||||
media_info = yield download.observe()
|
|
||||||
|
|
||||||
# FIXME: we should probably update our cache now anyway, so that
|
|
||||||
# even if the OG calculation raises, we don't keep hammering on the
|
|
||||||
# remote server. For now, leave it uncached to aid debugging OG
|
|
||||||
# calculation problems
|
|
||||||
|
|
||||||
logger.debug("got media_info of '%s'" % media_info)
|
logger.debug("got media_info of '%s'" % media_info)
|
||||||
|
|
||||||
|
@ -212,7 +217,7 @@ class PreviewUrlResource(Resource):
|
||||||
# just rely on the caching on the master request to speed things up.
|
# just rely on the caching on the master request to speed things up.
|
||||||
if 'og:image' in og and og['og:image']:
|
if 'og:image' in og and og['og:image']:
|
||||||
image_info = yield self._download_url(
|
image_info = yield self._download_url(
|
||||||
_rebase_url(og['og:image'], media_info['uri']), requester.user
|
_rebase_url(og['og:image'], media_info['uri']), user
|
||||||
)
|
)
|
||||||
|
|
||||||
if _is_media(image_info['media_type']):
|
if _is_media(image_info['media_type']):
|
||||||
|
@ -239,8 +244,7 @@ class PreviewUrlResource(Resource):
|
||||||
|
|
||||||
logger.debug("Calculated OG for %s as %s" % (url, og))
|
logger.debug("Calculated OG for %s as %s" % (url, og))
|
||||||
|
|
||||||
# store OG in ephemeral in-memory cache
|
jsonog = json.dumps(og)
|
||||||
self.cache[url] = og
|
|
||||||
|
|
||||||
# store OG in history-aware DB cache
|
# store OG in history-aware DB cache
|
||||||
yield self.store.store_url_cache(
|
yield self.store.store_url_cache(
|
||||||
|
@ -248,12 +252,12 @@ class PreviewUrlResource(Resource):
|
||||||
media_info["response_code"],
|
media_info["response_code"],
|
||||||
media_info["etag"],
|
media_info["etag"],
|
||||||
media_info["expires"] + media_info["created_ts"],
|
media_info["expires"] + media_info["created_ts"],
|
||||||
json.dumps(og),
|
jsonog,
|
||||||
media_info["filesystem_id"],
|
media_info["filesystem_id"],
|
||||||
media_info["created_ts"],
|
media_info["created_ts"],
|
||||||
)
|
)
|
||||||
|
|
||||||
respond_with_json_bytes(request, 200, json.dumps(og), send_cors=True)
|
defer.returnValue(jsonog)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _download_url(self, url, user):
|
def _download_url(self, url, user):
|
||||||
|
|
Loading…
Reference in New Issue