support gzip compression, and don't pass through error msgs
This commit is contained in:
parent
9377157961
commit
d1b154a10f
|
@ -23,7 +23,8 @@ from canonicaljson import encode_canonical_json
|
|||
|
||||
from twisted.internet import defer, reactor, ssl, protocol
|
||||
from twisted.web.client import (
|
||||
BrowserLikeRedirectAgent, Agent, readBody, FileBodyProducer, PartialDownloadError,
|
||||
BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent,
|
||||
readBody, FileBodyProducer, PartialDownloadError,
|
||||
)
|
||||
from twisted.web.http import PotentialDataLoss
|
||||
from twisted.web.http_headers import Headers
|
||||
|
@ -269,6 +270,10 @@ class SimpleHttpClient(object):
|
|||
# XXX: do we want to explicitly drop the connection here somehow? if so, how?
|
||||
raise # what should we be raising here?
|
||||
|
||||
if response.code > 299:
|
||||
logger.warn("Got %d when downloading %s" % (response.code, url))
|
||||
raise
|
||||
|
||||
# TODO: if our Content-Type is HTML or something, just read the first
|
||||
# N bytes into RAM rather than saving it all to disk only to read it
|
||||
# straight back in again
|
||||
|
@ -366,11 +371,11 @@ class SpiderHttpClient(SimpleHttpClient):
|
|||
def __init__(self, hs):
|
||||
SimpleHttpClient.__init__(self, hs)
|
||||
# clobber the base class's agent and UA:
|
||||
self.agent = BrowserLikeRedirectAgent(Agent(
|
||||
self.agent = ContentDecoderAgent(BrowserLikeRedirectAgent(Agent(
|
||||
reactor,
|
||||
connectTimeout=15,
|
||||
contextFactory=hs.get_http_client_context_factory()
|
||||
))
|
||||
)), [('gzip', GzipDecoder)])
|
||||
# Look like Chrome for now
|
||||
#self.user_agent = ("Mozilla/5.0 (%s) (KHTML, like Gecko) Chrome Safari" % hs.version_string)
|
||||
|
||||
|
|
|
@ -200,7 +200,7 @@ class PreviewUrlResource(BaseMediaResource):
|
|||
og["og:image:height"] = dims['height']
|
||||
else:
|
||||
logger.warn("Couldn't get dims for %s" % og["og:image"])
|
||||
|
||||
|
||||
og["og:image"] = "mxc://%s/%s" % (self.server_name, image_info['filesystem_id'])
|
||||
og["og:image:type"] = image_info['media_type']
|
||||
else:
|
||||
|
@ -259,7 +259,8 @@ class PreviewUrlResource(BaseMediaResource):
|
|||
length, headers, uri = yield self.client.get_file(
|
||||
url, output_stream=f, max_size=self.max_spider_size,
|
||||
)
|
||||
# FIXME: handle 404s sanely - don't spider an error page
|
||||
# FIXME: pass through 404s and other error messages nicely
|
||||
|
||||
media_type = headers["Content-Type"][0]
|
||||
time_now_ms = self.clock.time_msec()
|
||||
|
||||
|
|
Loading…
Reference in New Issue