Use MediaStorage for remote media

This commit is contained in:
Erik Johnston 2018-01-08 17:52:06 +00:00
parent dd3092c3a3
commit 9e20840e02
2 changed files with 154 additions and 134 deletions

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
import synapse.http.servlet import synapse.http.servlet
from ._base import parse_media_id, respond_with_file, respond_404 from ._base import parse_media_id, respond_404
from twisted.web.resource import Resource from twisted.web.resource import Resource
from synapse.http.server import request_handler, set_cors_headers from synapse.http.server import request_handler, set_cors_headers
@ -59,13 +59,6 @@ class DownloadResource(Resource):
if server_name == self.server_name: if server_name == self.server_name:
yield self.media_repo.get_local_media(request, media_id, name) yield self.media_repo.get_local_media(request, media_id, name)
else: else:
yield self._respond_remote_file(
request, server_name, media_id, name
)
@defer.inlineCallbacks
def _respond_remote_file(self, request, server_name, media_id, name):
# don't forward requests for remote media if allow_remote is false
allow_remote = synapse.http.servlet.parse_boolean( allow_remote = synapse.http.servlet.parse_boolean(
request, "allow_remote", default=True) request, "allow_remote", default=True)
if not allow_remote: if not allow_remote:
@ -76,18 +69,4 @@ class DownloadResource(Resource):
respond_404(request) respond_404(request)
return return
media_info = yield self.media_repo.get_remote_media(server_name, media_id) yield self.media_repo.get_remote_media(request, server_name, media_id, name)
media_type = media_info["media_type"]
media_length = media_info["media_length"]
filesystem_id = media_info["filesystem_id"]
upload_name = name if name else media_info["upload_name"]
file_path = self.filepaths.remote_media_filepath(
server_name, filesystem_id
)
yield respond_with_file(
request, media_type, file_path, media_length,
upload_name=upload_name,
)

View File

@ -19,7 +19,7 @@ import twisted.internet.error
import twisted.web.http import twisted.web.http
from twisted.web.resource import Resource from twisted.web.resource import Resource
from ._base import respond_404, RequestWriter, FileInfo, respond_with_responder from ._base import respond_404, FileInfo, respond_with_responder
from .upload_resource import UploadResource from .upload_resource import UploadResource
from .download_resource import DownloadResource from .download_resource import DownloadResource
from .thumbnail_resource import ThumbnailResource from .thumbnail_resource import ThumbnailResource
@ -161,42 +161,85 @@ class MediaRepository(object):
) )
@defer.inlineCallbacks @defer.inlineCallbacks
def get_remote_media(self, server_name, media_id): def get_remote_media(self, request, server_name, media_id, name):
"""Respond to requests for remote media.
"""
self.recently_accessed_remotes.add((server_name, media_id))
# We linearize here to ensure that we don't try and download remote
# media mutliple times concurrently
key = (server_name, media_id) key = (server_name, media_id)
with (yield self.remote_media_linearizer.queue(key)): with (yield self.remote_media_linearizer.queue(key)):
media_info = yield self._get_remote_media_impl(server_name, media_id) responder, media_info = yield self._get_remote_media_impl(
defer.returnValue(media_info) server_name, media_id,
)
# We purposefully stream the file outside the lock
if responder:
media_type = media_info["media_type"]
media_length = media_info["media_length"]
upload_name = name if name else media_info["upload_name"]
yield respond_with_responder(
request, responder, media_type, media_length, upload_name,
)
else:
respond_404(request)
@defer.inlineCallbacks @defer.inlineCallbacks
def _get_remote_media_impl(self, server_name, media_id): def _get_remote_media_impl(self, server_name, media_id):
"""Looks for media in local cache, if not there then attempt to
download from remote server.
Returns:
Deferred((Respodner, media_info))
"""
media_info = yield self.store.get_cached_remote_media( media_info = yield self.store.get_cached_remote_media(
server_name, media_id server_name, media_id
) )
if not media_info:
media_info = yield self._download_remote_file(
server_name, media_id
)
elif media_info["quarantined_by"]:
raise NotFoundError()
else:
self.recently_accessed_remotes.add((server_name, media_id))
yield self.store.update_cached_last_access_time(
[(server_name, media_id)], self.clock.time_msec()
)
defer.returnValue(media_info)
@defer.inlineCallbacks # file_id is the ID we use to track the file locally. If we've already
def _download_remote_file(self, server_name, media_id): # seen the file then reuse the existing ID, otherwise genereate a new
# one.
if media_info:
file_id = media_info["filesystem_id"]
else:
file_id = random_string(24) file_id = random_string(24)
fpath = self.filepaths.remote_media_filepath_rel( file_info = FileInfo(server_name, file_id)
server_name, file_id
)
fname = os.path.join(self.primary_base_path, fpath)
self._makedirs(fname)
try: # If we have an entry in the DB, try and look for it
with open(fname, "wb") as f: if media_info:
if media_info["quarantined_by"]:
raise NotFoundError()
responder = yield self.media_storage.fetch_media(file_info)
if responder:
defer.returnValue((responder, media_info))
# Failed to find the file anywhere, lets download it.
media_info = yield self._download_remote_file(
server_name, media_id, file_id
)
responder = yield self.media_storage.fetch_media(file_info)
if responder:
defer.returnValue((responder, media_info))
defer.returnValue((None, media_info))
@defer.inlineCallbacks
def _download_remote_file(self, server_name, media_id, file_id):
"""Attempt to download the remote file from the given server name,
using the given file_id as the local id.
"""
file_info = FileInfo(
server_name=server_name,
file_id=file_id,
)
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
request_path = "/".join(( request_path = "/".join((
"/_matrix/media/v1/download", server_name, media_id, "/_matrix/media/v1/download", server_name, media_id,
)) ))
@ -234,9 +277,10 @@ class MediaRepository(object):
server_name, media_id) server_name, media_id)
raise SynapseError(502, "Failed to fetch remote media") raise SynapseError(502, "Failed to fetch remote media")
yield self.copy_to_backup(fpath) yield finish()
media_type = headers["Content-Type"][0] media_type = headers["Content-Type"][0]
time_now_ms = self.clock.time_msec() time_now_ms = self.clock.time_msec()
content_disposition = headers.get("Content-Disposition", None) content_disposition = headers.get("Content-Disposition", None)
@ -276,9 +320,6 @@ class MediaRepository(object):
media_length=length, media_length=length,
filesystem_id=file_id, filesystem_id=file_id,
) )
except Exception:
os.remove(fname)
raise
media_info = { media_info = {
"media_type": media_type, "media_type": media_type,