diff --git a/synapse/http/client.py b/synapse/http/client.py index 1b6f7cb795..b21bf17378 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -23,7 +23,8 @@ from canonicaljson import encode_canonical_json from twisted.internet import defer, reactor, ssl, protocol from twisted.web.client import ( - BrowserLikeRedirectAgent, Agent, readBody, FileBodyProducer, PartialDownloadError, + BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent, + readBody, FileBodyProducer, PartialDownloadError, ) from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers @@ -269,6 +270,10 @@ class SimpleHttpClient(object): # XXX: do we want to explicitly drop the connection here somehow? if so, how? raise # what should we be raising here? + if response.code > 299: + logger.warn("Got %d when downloading %s" % (response.code, url)) + raise + # TODO: if our Content-Type is HTML or something, just read the first # N bytes into RAM rather than saving it all to disk only to read it # straight back in again @@ -366,11 +371,11 @@ class SpiderHttpClient(SimpleHttpClient): def __init__(self, hs): SimpleHttpClient.__init__(self, hs) # clobber the base class's agent and UA: - self.agent = BrowserLikeRedirectAgent(Agent( + self.agent = ContentDecoderAgent(BrowserLikeRedirectAgent(Agent( reactor, connectTimeout=15, contextFactory=hs.get_http_client_context_factory() - )) + )), [('gzip', GzipDecoder)]) # Look like Chrome for now #self.user_agent = ("Mozilla/5.0 (%s) (KHTML, like Gecko) Chrome Safari" % hs.version_string) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 3ffdafce09..162e09ba71 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -200,7 +200,7 @@ class PreviewUrlResource(BaseMediaResource): og["og:image:height"] = dims['height'] else: logger.warn("Couldn't get dims for %s" % og["og:image"]) - + og["og:image"] = "mxc://%s/%s" % (self.server_name, image_info['filesystem_id']) og["og:image:type"] = image_info['media_type'] else: @@ -259,7 +259,8 @@ class PreviewUrlResource(BaseMediaResource): length, headers, uri = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) - # FIXME: handle 404s sanely - don't spider an error page + # FIXME: pass through 404s and other error messages nicely + media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec()