Merge pull request #2538 from matrix-org/erikj/media_backup
Basic implementation of backup media store
This commit is contained in:
commit
db3d84f46c
|
@ -70,7 +70,19 @@ class ContentRepositoryConfig(Config):
|
||||||
self.max_upload_size = self.parse_size(config["max_upload_size"])
|
self.max_upload_size = self.parse_size(config["max_upload_size"])
|
||||||
self.max_image_pixels = self.parse_size(config["max_image_pixels"])
|
self.max_image_pixels = self.parse_size(config["max_image_pixels"])
|
||||||
self.max_spider_size = self.parse_size(config["max_spider_size"])
|
self.max_spider_size = self.parse_size(config["max_spider_size"])
|
||||||
|
|
||||||
self.media_store_path = self.ensure_directory(config["media_store_path"])
|
self.media_store_path = self.ensure_directory(config["media_store_path"])
|
||||||
|
|
||||||
|
self.backup_media_store_path = config.get("backup_media_store_path")
|
||||||
|
if self.backup_media_store_path:
|
||||||
|
self.backup_media_store_path = self.ensure_directory(
|
||||||
|
self.backup_media_store_path
|
||||||
|
)
|
||||||
|
|
||||||
|
self.synchronous_backup_media_store = config.get(
|
||||||
|
"synchronous_backup_media_store", False
|
||||||
|
)
|
||||||
|
|
||||||
self.uploads_path = self.ensure_directory(config["uploads_path"])
|
self.uploads_path = self.ensure_directory(config["uploads_path"])
|
||||||
self.dynamic_thumbnails = config["dynamic_thumbnails"]
|
self.dynamic_thumbnails = config["dynamic_thumbnails"]
|
||||||
self.thumbnail_requirements = parse_thumbnail_requirements(
|
self.thumbnail_requirements = parse_thumbnail_requirements(
|
||||||
|
@ -115,6 +127,14 @@ class ContentRepositoryConfig(Config):
|
||||||
# Directory where uploaded images and attachments are stored.
|
# Directory where uploaded images and attachments are stored.
|
||||||
media_store_path: "%(media_store)s"
|
media_store_path: "%(media_store)s"
|
||||||
|
|
||||||
|
# A secondary directory where uploaded images and attachments are
|
||||||
|
# stored as a backup.
|
||||||
|
# backup_media_store_path: "%(media_store)s"
|
||||||
|
|
||||||
|
# Whether to wait for successful write to backup media store before
|
||||||
|
# returning successfully.
|
||||||
|
# synchronous_backup_media_store: false
|
||||||
|
|
||||||
# Directory where in-progress uploads are stored.
|
# Directory where in-progress uploads are stored.
|
||||||
uploads_path: "%(uploads_path)s"
|
uploads_path: "%(uploads_path)s"
|
||||||
|
|
||||||
|
|
|
@ -15,80 +15,111 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import functools
|
||||||
|
|
||||||
NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d")
|
NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d")
|
||||||
|
|
||||||
|
|
||||||
|
def _wrap_in_base_path(func):
|
||||||
|
"""Takes a function that returns a relative path and turns it into an
|
||||||
|
absolute path based on the location of the primary media store
|
||||||
|
"""
|
||||||
|
@functools.wraps(func)
|
||||||
|
def _wrapped(self, *args, **kwargs):
|
||||||
|
path = func(self, *args, **kwargs)
|
||||||
|
return os.path.join(self.base_path, path)
|
||||||
|
|
||||||
|
return _wrapped
|
||||||
|
|
||||||
|
|
||||||
class MediaFilePaths(object):
|
class MediaFilePaths(object):
|
||||||
|
"""Describes where files are stored on disk.
|
||||||
|
|
||||||
def __init__(self, base_path):
|
Most of the functions have a `*_rel` variant which returns a file path that
|
||||||
self.base_path = base_path
|
is relative to the base media store path. This is mainly used when we want
|
||||||
|
to write to the backup media store (when one is configured)
|
||||||
|
"""
|
||||||
|
|
||||||
def default_thumbnail(self, default_top_level, default_sub_type, width,
|
def __init__(self, primary_base_path):
|
||||||
height, content_type, method):
|
self.base_path = primary_base_path
|
||||||
|
|
||||||
|
def default_thumbnail_rel(self, default_top_level, default_sub_type, width,
|
||||||
|
height, content_type, method):
|
||||||
top_level_type, sub_type = content_type.split("/")
|
top_level_type, sub_type = content_type.split("/")
|
||||||
file_name = "%i-%i-%s-%s-%s" % (
|
file_name = "%i-%i-%s-%s-%s" % (
|
||||||
width, height, top_level_type, sub_type, method
|
width, height, top_level_type, sub_type, method
|
||||||
)
|
)
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "default_thumbnails", default_top_level,
|
"default_thumbnails", default_top_level,
|
||||||
default_sub_type, file_name
|
default_sub_type, file_name
|
||||||
)
|
)
|
||||||
|
|
||||||
def local_media_filepath(self, media_id):
|
default_thumbnail = _wrap_in_base_path(default_thumbnail_rel)
|
||||||
|
|
||||||
|
def local_media_filepath_rel(self, media_id):
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "local_content",
|
"local_content",
|
||||||
media_id[0:2], media_id[2:4], media_id[4:]
|
media_id[0:2], media_id[2:4], media_id[4:]
|
||||||
)
|
)
|
||||||
|
|
||||||
def local_media_thumbnail(self, media_id, width, height, content_type,
|
local_media_filepath = _wrap_in_base_path(local_media_filepath_rel)
|
||||||
method):
|
|
||||||
|
def local_media_thumbnail_rel(self, media_id, width, height, content_type,
|
||||||
|
method):
|
||||||
top_level_type, sub_type = content_type.split("/")
|
top_level_type, sub_type = content_type.split("/")
|
||||||
file_name = "%i-%i-%s-%s-%s" % (
|
file_name = "%i-%i-%s-%s-%s" % (
|
||||||
width, height, top_level_type, sub_type, method
|
width, height, top_level_type, sub_type, method
|
||||||
)
|
)
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "local_thumbnails",
|
"local_thumbnails",
|
||||||
media_id[0:2], media_id[2:4], media_id[4:],
|
media_id[0:2], media_id[2:4], media_id[4:],
|
||||||
file_name
|
file_name
|
||||||
)
|
)
|
||||||
|
|
||||||
def remote_media_filepath(self, server_name, file_id):
|
local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
|
||||||
|
|
||||||
|
def remote_media_filepath_rel(self, server_name, file_id):
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "remote_content", server_name,
|
"remote_content", server_name,
|
||||||
file_id[0:2], file_id[2:4], file_id[4:]
|
file_id[0:2], file_id[2:4], file_id[4:]
|
||||||
)
|
)
|
||||||
|
|
||||||
def remote_media_thumbnail(self, server_name, file_id, width, height,
|
remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel)
|
||||||
content_type, method):
|
|
||||||
|
def remote_media_thumbnail_rel(self, server_name, file_id, width, height,
|
||||||
|
content_type, method):
|
||||||
top_level_type, sub_type = content_type.split("/")
|
top_level_type, sub_type = content_type.split("/")
|
||||||
file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
|
file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "remote_thumbnail", server_name,
|
"remote_thumbnail", server_name,
|
||||||
file_id[0:2], file_id[2:4], file_id[4:],
|
file_id[0:2], file_id[2:4], file_id[4:],
|
||||||
file_name
|
file_name
|
||||||
)
|
)
|
||||||
|
|
||||||
|
remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel)
|
||||||
|
|
||||||
def remote_media_thumbnail_dir(self, server_name, file_id):
|
def remote_media_thumbnail_dir(self, server_name, file_id):
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "remote_thumbnail", server_name,
|
self.base_path, "remote_thumbnail", server_name,
|
||||||
file_id[0:2], file_id[2:4], file_id[4:],
|
file_id[0:2], file_id[2:4], file_id[4:],
|
||||||
)
|
)
|
||||||
|
|
||||||
def url_cache_filepath(self, media_id):
|
def url_cache_filepath_rel(self, media_id):
|
||||||
if NEW_FORMAT_ID_RE.match(media_id):
|
if NEW_FORMAT_ID_RE.match(media_id):
|
||||||
# Media id is of the form <DATE><RANDOM_STRING>
|
# Media id is of the form <DATE><RANDOM_STRING>
|
||||||
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "url_cache",
|
"url_cache",
|
||||||
media_id[:10], media_id[11:]
|
media_id[:10], media_id[11:]
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "url_cache",
|
"url_cache",
|
||||||
media_id[0:2], media_id[2:4], media_id[4:],
|
media_id[0:2], media_id[2:4], media_id[4:],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel)
|
||||||
|
|
||||||
def url_cache_filepath_dirs_to_delete(self, media_id):
|
def url_cache_filepath_dirs_to_delete(self, media_id):
|
||||||
"The dirs to try and remove if we delete the media_id file"
|
"The dirs to try and remove if we delete the media_id file"
|
||||||
if NEW_FORMAT_ID_RE.match(media_id):
|
if NEW_FORMAT_ID_RE.match(media_id):
|
||||||
|
@ -110,8 +141,8 @@ class MediaFilePaths(object):
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
def url_cache_thumbnail(self, media_id, width, height, content_type,
|
def url_cache_thumbnail_rel(self, media_id, width, height, content_type,
|
||||||
method):
|
method):
|
||||||
# Media id is of the form <DATE><RANDOM_STRING>
|
# Media id is of the form <DATE><RANDOM_STRING>
|
||||||
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
||||||
|
|
||||||
|
@ -122,17 +153,19 @@ class MediaFilePaths(object):
|
||||||
|
|
||||||
if NEW_FORMAT_ID_RE.match(media_id):
|
if NEW_FORMAT_ID_RE.match(media_id):
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "url_cache_thumbnails",
|
"url_cache_thumbnails",
|
||||||
media_id[:10], media_id[11:],
|
media_id[:10], media_id[11:],
|
||||||
file_name
|
file_name
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self.base_path, "url_cache_thumbnails",
|
"url_cache_thumbnails",
|
||||||
media_id[0:2], media_id[2:4], media_id[4:],
|
media_id[0:2], media_id[2:4], media_id[4:],
|
||||||
file_name
|
file_name
|
||||||
)
|
)
|
||||||
|
|
||||||
|
url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
|
||||||
|
|
||||||
def url_cache_thumbnail_directory(self, media_id):
|
def url_cache_thumbnail_directory(self, media_id):
|
||||||
# Media id is of the form <DATE><RANDOM_STRING>
|
# Media id is of the form <DATE><RANDOM_STRING>
|
||||||
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
# E.g.: 2017-09-28-fsdRDt24DS234dsf
|
||||||
|
|
|
@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \
|
||||||
|
|
||||||
from synapse.util.async import Linearizer
|
from synapse.util.async import Linearizer
|
||||||
from synapse.util.stringutils import is_ascii
|
from synapse.util.stringutils import is_ascii
|
||||||
from synapse.util.logcontext import preserve_context_over_fn
|
from synapse.util.logcontext import make_deferred_yieldable, preserve_fn
|
||||||
from synapse.util.retryutils import NotRetryingDestination
|
from synapse.util.retryutils import NotRetryingDestination
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
@ -59,7 +59,14 @@ class MediaRepository(object):
|
||||||
self.store = hs.get_datastore()
|
self.store = hs.get_datastore()
|
||||||
self.max_upload_size = hs.config.max_upload_size
|
self.max_upload_size = hs.config.max_upload_size
|
||||||
self.max_image_pixels = hs.config.max_image_pixels
|
self.max_image_pixels = hs.config.max_image_pixels
|
||||||
self.filepaths = MediaFilePaths(hs.config.media_store_path)
|
|
||||||
|
self.primary_base_path = hs.config.media_store_path
|
||||||
|
self.filepaths = MediaFilePaths(self.primary_base_path)
|
||||||
|
|
||||||
|
self.backup_base_path = hs.config.backup_media_store_path
|
||||||
|
|
||||||
|
self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store
|
||||||
|
|
||||||
self.dynamic_thumbnails = hs.config.dynamic_thumbnails
|
self.dynamic_thumbnails = hs.config.dynamic_thumbnails
|
||||||
self.thumbnail_requirements = hs.config.thumbnail_requirements
|
self.thumbnail_requirements = hs.config.thumbnail_requirements
|
||||||
|
|
||||||
|
@ -87,18 +94,86 @@ class MediaRepository(object):
|
||||||
if not os.path.exists(dirname):
|
if not os.path.exists(dirname):
|
||||||
os.makedirs(dirname)
|
os.makedirs(dirname)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _write_file_synchronously(source, fname):
|
||||||
|
"""Write `source` to the path `fname` synchronously. Should be called
|
||||||
|
from a thread.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: A file like object to be written
|
||||||
|
fname (str): Path to write to
|
||||||
|
"""
|
||||||
|
MediaRepository._makedirs(fname)
|
||||||
|
source.seek(0) # Ensure we read from the start of the file
|
||||||
|
with open(fname, "wb") as f:
|
||||||
|
shutil.copyfileobj(source, f)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def write_to_file_and_backup(self, source, path):
|
||||||
|
"""Write `source` to the on disk media store, and also the backup store
|
||||||
|
if configured.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: A file like object that should be written
|
||||||
|
path (str): Relative path to write file to
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deferred[str]: the file path written to in the primary media store
|
||||||
|
"""
|
||||||
|
fname = os.path.join(self.primary_base_path, path)
|
||||||
|
|
||||||
|
# Write to the main repository
|
||||||
|
yield make_deferred_yieldable(threads.deferToThread(
|
||||||
|
self._write_file_synchronously, source, fname,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Write to backup repository
|
||||||
|
yield self.copy_to_backup(path)
|
||||||
|
|
||||||
|
defer.returnValue(fname)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def copy_to_backup(self, path):
|
||||||
|
"""Copy a file from the primary to backup media store, if configured.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path(str): Relative path to write file to
|
||||||
|
"""
|
||||||
|
if self.backup_base_path:
|
||||||
|
primary_fname = os.path.join(self.primary_base_path, path)
|
||||||
|
backup_fname = os.path.join(self.backup_base_path, path)
|
||||||
|
|
||||||
|
# We can either wait for successful writing to the backup repository
|
||||||
|
# or write in the background and immediately return
|
||||||
|
if self.synchronous_backup_media_store:
|
||||||
|
yield make_deferred_yieldable(threads.deferToThread(
|
||||||
|
shutil.copyfile, primary_fname, backup_fname,
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
preserve_fn(threads.deferToThread)(
|
||||||
|
shutil.copyfile, primary_fname, backup_fname,
|
||||||
|
)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def create_content(self, media_type, upload_name, content, content_length,
|
def create_content(self, media_type, upload_name, content, content_length,
|
||||||
auth_user):
|
auth_user):
|
||||||
|
"""Store uploaded content for a local user and return the mxc URL
|
||||||
|
|
||||||
|
Args:
|
||||||
|
media_type(str): The content type of the file
|
||||||
|
upload_name(str): The name of the file
|
||||||
|
content: A file like object that is the content to store
|
||||||
|
content_length(int): The length of the content
|
||||||
|
auth_user(str): The user_id of the uploader
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deferred[str]: The mxc url of the stored content
|
||||||
|
"""
|
||||||
media_id = random_string(24)
|
media_id = random_string(24)
|
||||||
|
|
||||||
fname = self.filepaths.local_media_filepath(media_id)
|
fname = yield self.write_to_file_and_backup(
|
||||||
self._makedirs(fname)
|
content, self.filepaths.local_media_filepath_rel(media_id)
|
||||||
|
)
|
||||||
# This shouldn't block for very long because the content will have
|
|
||||||
# already been uploaded at this point.
|
|
||||||
with open(fname, "wb") as f:
|
|
||||||
f.write(content)
|
|
||||||
|
|
||||||
logger.info("Stored local media in file %r", fname)
|
logger.info("Stored local media in file %r", fname)
|
||||||
|
|
||||||
|
@ -115,7 +190,7 @@ class MediaRepository(object):
|
||||||
"media_length": content_length,
|
"media_length": content_length,
|
||||||
}
|
}
|
||||||
|
|
||||||
yield self._generate_local_thumbnails(media_id, media_info)
|
yield self._generate_thumbnails(None, media_id, media_info)
|
||||||
|
|
||||||
defer.returnValue("mxc://%s/%s" % (self.server_name, media_id))
|
defer.returnValue("mxc://%s/%s" % (self.server_name, media_id))
|
||||||
|
|
||||||
|
@ -148,9 +223,10 @@ class MediaRepository(object):
|
||||||
def _download_remote_file(self, server_name, media_id):
|
def _download_remote_file(self, server_name, media_id):
|
||||||
file_id = random_string(24)
|
file_id = random_string(24)
|
||||||
|
|
||||||
fname = self.filepaths.remote_media_filepath(
|
fpath = self.filepaths.remote_media_filepath_rel(
|
||||||
server_name, file_id
|
server_name, file_id
|
||||||
)
|
)
|
||||||
|
fname = os.path.join(self.primary_base_path, fpath)
|
||||||
self._makedirs(fname)
|
self._makedirs(fname)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -192,6 +268,8 @@ class MediaRepository(object):
|
||||||
server_name, media_id)
|
server_name, media_id)
|
||||||
raise SynapseError(502, "Failed to fetch remote media")
|
raise SynapseError(502, "Failed to fetch remote media")
|
||||||
|
|
||||||
|
yield self.copy_to_backup(fpath)
|
||||||
|
|
||||||
media_type = headers["Content-Type"][0]
|
media_type = headers["Content-Type"][0]
|
||||||
time_now_ms = self.clock.time_msec()
|
time_now_ms = self.clock.time_msec()
|
||||||
|
|
||||||
|
@ -244,7 +322,7 @@ class MediaRepository(object):
|
||||||
"filesystem_id": file_id,
|
"filesystem_id": file_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
yield self._generate_remote_thumbnails(
|
yield self._generate_thumbnails(
|
||||||
server_name, media_id, media_info
|
server_name, media_id, media_info
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -253,9 +331,8 @@ class MediaRepository(object):
|
||||||
def _get_thumbnail_requirements(self, media_type):
|
def _get_thumbnail_requirements(self, media_type):
|
||||||
return self.thumbnail_requirements.get(media_type, ())
|
return self.thumbnail_requirements.get(media_type, ())
|
||||||
|
|
||||||
def _generate_thumbnail(self, input_path, t_path, t_width, t_height,
|
def _generate_thumbnail(self, thumbnailer, t_width, t_height,
|
||||||
t_method, t_type):
|
t_method, t_type):
|
||||||
thumbnailer = Thumbnailer(input_path)
|
|
||||||
m_width = thumbnailer.width
|
m_width = thumbnailer.width
|
||||||
m_height = thumbnailer.height
|
m_height = thumbnailer.height
|
||||||
|
|
||||||
|
@ -267,72 +344,105 @@ class MediaRepository(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
if t_method == "crop":
|
if t_method == "crop":
|
||||||
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
|
t_byte_source = thumbnailer.crop(t_width, t_height, t_type)
|
||||||
elif t_method == "scale":
|
elif t_method == "scale":
|
||||||
t_width, t_height = thumbnailer.aspect(t_width, t_height)
|
t_width, t_height = thumbnailer.aspect(t_width, t_height)
|
||||||
t_width = min(m_width, t_width)
|
t_width = min(m_width, t_width)
|
||||||
t_height = min(m_height, t_height)
|
t_height = min(m_height, t_height)
|
||||||
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
|
t_byte_source = thumbnailer.scale(t_width, t_height, t_type)
|
||||||
else:
|
else:
|
||||||
t_len = None
|
t_byte_source = None
|
||||||
|
|
||||||
return t_len
|
return t_byte_source
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def generate_local_exact_thumbnail(self, media_id, t_width, t_height,
|
def generate_local_exact_thumbnail(self, media_id, t_width, t_height,
|
||||||
t_method, t_type):
|
t_method, t_type):
|
||||||
input_path = self.filepaths.local_media_filepath(media_id)
|
input_path = self.filepaths.local_media_filepath(media_id)
|
||||||
|
|
||||||
t_path = self.filepaths.local_media_thumbnail(
|
thumbnailer = Thumbnailer(input_path)
|
||||||
media_id, t_width, t_height, t_type, t_method
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
||||||
)
|
|
||||||
self._makedirs(t_path)
|
|
||||||
|
|
||||||
t_len = yield preserve_context_over_fn(
|
|
||||||
threads.deferToThread,
|
|
||||||
self._generate_thumbnail,
|
self._generate_thumbnail,
|
||||||
input_path, t_path, t_width, t_height, t_method, t_type
|
thumbnailer, t_width, t_height, t_method, t_type
|
||||||
)
|
))
|
||||||
|
|
||||||
|
if t_byte_source:
|
||||||
|
try:
|
||||||
|
output_path = yield self.write_to_file_and_backup(
|
||||||
|
t_byte_source,
|
||||||
|
self.filepaths.local_media_thumbnail_rel(
|
||||||
|
media_id, t_width, t_height, t_type, t_method
|
||||||
|
)
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
t_byte_source.close()
|
||||||
|
|
||||||
|
logger.info("Stored thumbnail in file %r", output_path)
|
||||||
|
|
||||||
|
t_len = os.path.getsize(output_path)
|
||||||
|
|
||||||
if t_len:
|
|
||||||
yield self.store.store_local_thumbnail(
|
yield self.store.store_local_thumbnail(
|
||||||
media_id, t_width, t_height, t_type, t_method, t_len
|
media_id, t_width, t_height, t_type, t_method, t_len
|
||||||
)
|
)
|
||||||
|
|
||||||
defer.returnValue(t_path)
|
defer.returnValue(output_path)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def generate_remote_exact_thumbnail(self, server_name, file_id, media_id,
|
def generate_remote_exact_thumbnail(self, server_name, file_id, media_id,
|
||||||
t_width, t_height, t_method, t_type):
|
t_width, t_height, t_method, t_type):
|
||||||
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
|
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
|
||||||
|
|
||||||
t_path = self.filepaths.remote_media_thumbnail(
|
thumbnailer = Thumbnailer(input_path)
|
||||||
server_name, file_id, t_width, t_height, t_type, t_method
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
||||||
)
|
|
||||||
self._makedirs(t_path)
|
|
||||||
|
|
||||||
t_len = yield preserve_context_over_fn(
|
|
||||||
threads.deferToThread,
|
|
||||||
self._generate_thumbnail,
|
self._generate_thumbnail,
|
||||||
input_path, t_path, t_width, t_height, t_method, t_type
|
thumbnailer, t_width, t_height, t_method, t_type
|
||||||
)
|
))
|
||||||
|
|
||||||
|
if t_byte_source:
|
||||||
|
try:
|
||||||
|
output_path = yield self.write_to_file_and_backup(
|
||||||
|
t_byte_source,
|
||||||
|
self.filepaths.remote_media_thumbnail_rel(
|
||||||
|
server_name, file_id, t_width, t_height, t_type, t_method
|
||||||
|
)
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
t_byte_source.close()
|
||||||
|
|
||||||
|
logger.info("Stored thumbnail in file %r", output_path)
|
||||||
|
|
||||||
|
t_len = os.path.getsize(output_path)
|
||||||
|
|
||||||
if t_len:
|
|
||||||
yield self.store.store_remote_media_thumbnail(
|
yield self.store.store_remote_media_thumbnail(
|
||||||
server_name, media_id, file_id,
|
server_name, media_id, file_id,
|
||||||
t_width, t_height, t_type, t_method, t_len
|
t_width, t_height, t_type, t_method, t_len
|
||||||
)
|
)
|
||||||
|
|
||||||
defer.returnValue(t_path)
|
defer.returnValue(output_path)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _generate_local_thumbnails(self, media_id, media_info, url_cache=False):
|
def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False):
|
||||||
|
"""Generate and store thumbnails for an image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_name(str|None): The server name if remote media, else None if local
|
||||||
|
media_id(str)
|
||||||
|
media_info(dict)
|
||||||
|
url_cache(bool): If we are thumbnailing images downloaded for the URL cache,
|
||||||
|
used exclusively by the url previewer
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deferred[dict]: Dict with "width" and "height" keys of original image
|
||||||
|
"""
|
||||||
media_type = media_info["media_type"]
|
media_type = media_info["media_type"]
|
||||||
|
file_id = media_info.get("filesystem_id")
|
||||||
requirements = self._get_thumbnail_requirements(media_type)
|
requirements = self._get_thumbnail_requirements(media_type)
|
||||||
if not requirements:
|
if not requirements:
|
||||||
return
|
return
|
||||||
|
|
||||||
if url_cache:
|
if server_name:
|
||||||
|
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
|
||||||
|
elif url_cache:
|
||||||
input_path = self.filepaths.url_cache_filepath(media_id)
|
input_path = self.filepaths.url_cache_filepath(media_id)
|
||||||
else:
|
else:
|
||||||
input_path = self.filepaths.local_media_filepath(media_id)
|
input_path = self.filepaths.local_media_filepath(media_id)
|
||||||
|
@ -348,135 +458,69 @@ class MediaRepository(object):
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
local_thumbnails = []
|
# We deduplicate the thumbnail sizes by ignoring the cropped versions if
|
||||||
|
# they have the same dimensions of a scaled one.
|
||||||
|
thumbnails = {}
|
||||||
|
for r_width, r_height, r_method, r_type in requirements:
|
||||||
|
if r_method == "crop":
|
||||||
|
thumbnails.setdefault((r_width, r_height, r_type), r_method)
|
||||||
|
elif r_method == "scale":
|
||||||
|
t_width, t_height = thumbnailer.aspect(r_width, r_height)
|
||||||
|
t_width = min(m_width, t_width)
|
||||||
|
t_height = min(m_height, t_height)
|
||||||
|
thumbnails[(t_width, t_height, r_type)] = r_method
|
||||||
|
|
||||||
def generate_thumbnails():
|
# Now we generate the thumbnails for each dimension, store it
|
||||||
scales = set()
|
for (t_width, t_height, t_type), t_method in thumbnails.iteritems():
|
||||||
crops = set()
|
# Work out the correct file name for thumbnail
|
||||||
for r_width, r_height, r_method, r_type in requirements:
|
if server_name:
|
||||||
if r_method == "scale":
|
file_path = self.filepaths.remote_media_thumbnail_rel(
|
||||||
t_width, t_height = thumbnailer.aspect(r_width, r_height)
|
|
||||||
scales.add((
|
|
||||||
min(m_width, t_width), min(m_height, t_height), r_type,
|
|
||||||
))
|
|
||||||
elif r_method == "crop":
|
|
||||||
crops.add((r_width, r_height, r_type))
|
|
||||||
|
|
||||||
for t_width, t_height, t_type in scales:
|
|
||||||
t_method = "scale"
|
|
||||||
if url_cache:
|
|
||||||
t_path = self.filepaths.url_cache_thumbnail(
|
|
||||||
media_id, t_width, t_height, t_type, t_method
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
t_path = self.filepaths.local_media_thumbnail(
|
|
||||||
media_id, t_width, t_height, t_type, t_method
|
|
||||||
)
|
|
||||||
self._makedirs(t_path)
|
|
||||||
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
|
|
||||||
|
|
||||||
local_thumbnails.append((
|
|
||||||
media_id, t_width, t_height, t_type, t_method, t_len
|
|
||||||
))
|
|
||||||
|
|
||||||
for t_width, t_height, t_type in crops:
|
|
||||||
if (t_width, t_height, t_type) in scales:
|
|
||||||
# If the aspect ratio of the cropped thumbnail matches a purely
|
|
||||||
# scaled one then there is no point in calculating a separate
|
|
||||||
# thumbnail.
|
|
||||||
continue
|
|
||||||
t_method = "crop"
|
|
||||||
if url_cache:
|
|
||||||
t_path = self.filepaths.url_cache_thumbnail(
|
|
||||||
media_id, t_width, t_height, t_type, t_method
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
t_path = self.filepaths.local_media_thumbnail(
|
|
||||||
media_id, t_width, t_height, t_type, t_method
|
|
||||||
)
|
|
||||||
self._makedirs(t_path)
|
|
||||||
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
|
|
||||||
local_thumbnails.append((
|
|
||||||
media_id, t_width, t_height, t_type, t_method, t_len
|
|
||||||
))
|
|
||||||
|
|
||||||
yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
|
|
||||||
|
|
||||||
for l in local_thumbnails:
|
|
||||||
yield self.store.store_local_thumbnail(*l)
|
|
||||||
|
|
||||||
defer.returnValue({
|
|
||||||
"width": m_width,
|
|
||||||
"height": m_height,
|
|
||||||
})
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
|
||||||
def _generate_remote_thumbnails(self, server_name, media_id, media_info):
|
|
||||||
media_type = media_info["media_type"]
|
|
||||||
file_id = media_info["filesystem_id"]
|
|
||||||
requirements = self._get_thumbnail_requirements(media_type)
|
|
||||||
if not requirements:
|
|
||||||
return
|
|
||||||
|
|
||||||
remote_thumbnails = []
|
|
||||||
|
|
||||||
input_path = self.filepaths.remote_media_filepath(server_name, file_id)
|
|
||||||
thumbnailer = Thumbnailer(input_path)
|
|
||||||
m_width = thumbnailer.width
|
|
||||||
m_height = thumbnailer.height
|
|
||||||
|
|
||||||
def generate_thumbnails():
|
|
||||||
if m_width * m_height >= self.max_image_pixels:
|
|
||||||
logger.info(
|
|
||||||
"Image too large to thumbnail %r x %r > %r",
|
|
||||||
m_width, m_height, self.max_image_pixels
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
scales = set()
|
|
||||||
crops = set()
|
|
||||||
for r_width, r_height, r_method, r_type in requirements:
|
|
||||||
if r_method == "scale":
|
|
||||||
t_width, t_height = thumbnailer.aspect(r_width, r_height)
|
|
||||||
scales.add((
|
|
||||||
min(m_width, t_width), min(m_height, t_height), r_type,
|
|
||||||
))
|
|
||||||
elif r_method == "crop":
|
|
||||||
crops.add((r_width, r_height, r_type))
|
|
||||||
|
|
||||||
for t_width, t_height, t_type in scales:
|
|
||||||
t_method = "scale"
|
|
||||||
t_path = self.filepaths.remote_media_thumbnail(
|
|
||||||
server_name, file_id, t_width, t_height, t_type, t_method
|
server_name, file_id, t_width, t_height, t_type, t_method
|
||||||
)
|
)
|
||||||
self._makedirs(t_path)
|
elif url_cache:
|
||||||
t_len = thumbnailer.scale(t_path, t_width, t_height, t_type)
|
file_path = self.filepaths.url_cache_thumbnail_rel(
|
||||||
remote_thumbnails.append([
|
media_id, t_width, t_height, t_type, t_method
|
||||||
server_name, media_id, file_id,
|
|
||||||
t_width, t_height, t_type, t_method, t_len
|
|
||||||
])
|
|
||||||
|
|
||||||
for t_width, t_height, t_type in crops:
|
|
||||||
if (t_width, t_height, t_type) in scales:
|
|
||||||
# If the aspect ratio of the cropped thumbnail matches a purely
|
|
||||||
# scaled one then there is no point in calculating a separate
|
|
||||||
# thumbnail.
|
|
||||||
continue
|
|
||||||
t_method = "crop"
|
|
||||||
t_path = self.filepaths.remote_media_thumbnail(
|
|
||||||
server_name, file_id, t_width, t_height, t_type, t_method
|
|
||||||
)
|
)
|
||||||
self._makedirs(t_path)
|
else:
|
||||||
t_len = thumbnailer.crop(t_path, t_width, t_height, t_type)
|
file_path = self.filepaths.local_media_thumbnail_rel(
|
||||||
remote_thumbnails.append([
|
media_id, t_width, t_height, t_type, t_method
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate the thumbnail
|
||||||
|
if t_type == "crop":
|
||||||
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
||||||
|
thumbnailer.crop,
|
||||||
|
r_width, r_height, t_type,
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
|
||||||
|
thumbnailer.scale,
|
||||||
|
r_width, r_height, t_type,
|
||||||
|
))
|
||||||
|
|
||||||
|
if not t_byte_source:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Write to disk
|
||||||
|
output_path = yield self.write_to_file_and_backup(
|
||||||
|
t_byte_source, file_path,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
t_byte_source.close()
|
||||||
|
|
||||||
|
t_len = os.path.getsize(output_path)
|
||||||
|
|
||||||
|
# Write to database
|
||||||
|
if server_name:
|
||||||
|
yield self.store.store_remote_media_thumbnail(
|
||||||
server_name, media_id, file_id,
|
server_name, media_id, file_id,
|
||||||
t_width, t_height, t_type, t_method, t_len
|
t_width, t_height, t_type, t_method, t_len
|
||||||
])
|
)
|
||||||
|
else:
|
||||||
yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails)
|
yield self.store.store_local_thumbnail(
|
||||||
|
media_id, t_width, t_height, t_type, t_method, t_len
|
||||||
for r in remote_thumbnails:
|
)
|
||||||
yield self.store.store_remote_media_thumbnail(*r)
|
|
||||||
|
|
||||||
defer.returnValue({
|
defer.returnValue({
|
||||||
"width": m_width,
|
"width": m_width,
|
||||||
|
@ -497,6 +541,8 @@ class MediaRepository(object):
|
||||||
|
|
||||||
logger.info("Deleting: %r", key)
|
logger.info("Deleting: %r", key)
|
||||||
|
|
||||||
|
# TODO: Should we delete from the backup store
|
||||||
|
|
||||||
with (yield self.remote_media_linearizer.queue(key)):
|
with (yield self.remote_media_linearizer.queue(key)):
|
||||||
full_path = self.filepaths.remote_media_filepath(origin, file_id)
|
full_path = self.filepaths.remote_media_filepath(origin, file_id)
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -59,6 +59,7 @@ class PreviewUrlResource(Resource):
|
||||||
self.store = hs.get_datastore()
|
self.store = hs.get_datastore()
|
||||||
self.client = SpiderHttpClient(hs)
|
self.client = SpiderHttpClient(hs)
|
||||||
self.media_repo = media_repo
|
self.media_repo = media_repo
|
||||||
|
self.primary_base_path = media_repo.primary_base_path
|
||||||
|
|
||||||
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
|
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
|
||||||
|
|
||||||
|
@ -170,8 +171,8 @@ class PreviewUrlResource(Resource):
|
||||||
logger.debug("got media_info of '%s'" % media_info)
|
logger.debug("got media_info of '%s'" % media_info)
|
||||||
|
|
||||||
if _is_media(media_info['media_type']):
|
if _is_media(media_info['media_type']):
|
||||||
dims = yield self.media_repo._generate_local_thumbnails(
|
dims = yield self.media_repo._generate_thumbnails(
|
||||||
media_info['filesystem_id'], media_info, url_cache=True,
|
None, media_info['filesystem_id'], media_info, url_cache=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
og = {
|
og = {
|
||||||
|
@ -216,8 +217,8 @@ class PreviewUrlResource(Resource):
|
||||||
|
|
||||||
if _is_media(image_info['media_type']):
|
if _is_media(image_info['media_type']):
|
||||||
# TODO: make sure we don't choke on white-on-transparent images
|
# TODO: make sure we don't choke on white-on-transparent images
|
||||||
dims = yield self.media_repo._generate_local_thumbnails(
|
dims = yield self.media_repo._generate_thumbnails(
|
||||||
image_info['filesystem_id'], image_info, url_cache=True,
|
None, image_info['filesystem_id'], image_info, url_cache=True,
|
||||||
)
|
)
|
||||||
if dims:
|
if dims:
|
||||||
og["og:image:width"] = dims['width']
|
og["og:image:width"] = dims['width']
|
||||||
|
@ -262,7 +263,8 @@ class PreviewUrlResource(Resource):
|
||||||
|
|
||||||
file_id = datetime.date.today().isoformat() + '_' + random_string(16)
|
file_id = datetime.date.today().isoformat() + '_' + random_string(16)
|
||||||
|
|
||||||
fname = self.filepaths.url_cache_filepath(file_id)
|
fpath = self.filepaths.url_cache_filepath_rel(file_id)
|
||||||
|
fname = os.path.join(self.primary_base_path, fpath)
|
||||||
self.media_repo._makedirs(fname)
|
self.media_repo._makedirs(fname)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -273,6 +275,8 @@ class PreviewUrlResource(Resource):
|
||||||
)
|
)
|
||||||
# FIXME: pass through 404s and other error messages nicely
|
# FIXME: pass through 404s and other error messages nicely
|
||||||
|
|
||||||
|
yield self.media_repo.copy_to_backup(fpath)
|
||||||
|
|
||||||
media_type = headers["Content-Type"][0]
|
media_type = headers["Content-Type"][0]
|
||||||
time_now_ms = self.clock.time_msec()
|
time_now_ms = self.clock.time_msec()
|
||||||
|
|
||||||
|
@ -338,6 +342,9 @@ class PreviewUrlResource(Resource):
|
||||||
def _expire_url_cache_data(self):
|
def _expire_url_cache_data(self):
|
||||||
"""Clean up expired url cache content, media and thumbnails.
|
"""Clean up expired url cache content, media and thumbnails.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# TODO: Delete from backup media store
|
||||||
|
|
||||||
now = self.clock.time_msec()
|
now = self.clock.time_msec()
|
||||||
|
|
||||||
# First we delete expired url cache entries
|
# First we delete expired url cache entries
|
||||||
|
|
|
@ -50,12 +50,16 @@ class Thumbnailer(object):
|
||||||
else:
|
else:
|
||||||
return ((max_height * self.width) // self.height, max_height)
|
return ((max_height * self.width) // self.height, max_height)
|
||||||
|
|
||||||
def scale(self, output_path, width, height, output_type):
|
def scale(self, width, height, output_type):
|
||||||
"""Rescales the image to the given dimensions"""
|
"""Rescales the image to the given dimensions.
|
||||||
scaled = self.image.resize((width, height), Image.ANTIALIAS)
|
|
||||||
return self.save_image(scaled, output_type, output_path)
|
|
||||||
|
|
||||||
def crop(self, output_path, width, height, output_type):
|
Returns:
|
||||||
|
BytesIO: the bytes of the encoded image ready to be written to disk
|
||||||
|
"""
|
||||||
|
scaled = self.image.resize((width, height), Image.ANTIALIAS)
|
||||||
|
return self._encode_image(scaled, output_type)
|
||||||
|
|
||||||
|
def crop(self, width, height, output_type):
|
||||||
"""Rescales and crops the image to the given dimensions preserving
|
"""Rescales and crops the image to the given dimensions preserving
|
||||||
aspect::
|
aspect::
|
||||||
(w_in / h_in) = (w_scaled / h_scaled)
|
(w_in / h_in) = (w_scaled / h_scaled)
|
||||||
|
@ -65,6 +69,9 @@ class Thumbnailer(object):
|
||||||
Args:
|
Args:
|
||||||
max_width: The largest possible width.
|
max_width: The largest possible width.
|
||||||
max_height: The larget possible height.
|
max_height: The larget possible height.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BytesIO: the bytes of the encoded image ready to be written to disk
|
||||||
"""
|
"""
|
||||||
if width * self.height > height * self.width:
|
if width * self.height > height * self.width:
|
||||||
scaled_height = (width * self.height) // self.width
|
scaled_height = (width * self.height) // self.width
|
||||||
|
@ -82,13 +89,9 @@ class Thumbnailer(object):
|
||||||
crop_left = (scaled_width - width) // 2
|
crop_left = (scaled_width - width) // 2
|
||||||
crop_right = width + crop_left
|
crop_right = width + crop_left
|
||||||
cropped = scaled_image.crop((crop_left, 0, crop_right, height))
|
cropped = scaled_image.crop((crop_left, 0, crop_right, height))
|
||||||
return self.save_image(cropped, output_type, output_path)
|
return self._encode_image(cropped, output_type)
|
||||||
|
|
||||||
def save_image(self, output_image, output_type, output_path):
|
def _encode_image(self, output_image, output_type):
|
||||||
output_bytes_io = BytesIO()
|
output_bytes_io = BytesIO()
|
||||||
output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80)
|
output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80)
|
||||||
output_bytes = output_bytes_io.getvalue()
|
return output_bytes_io
|
||||||
with open(output_path, "wb") as output_file:
|
|
||||||
output_file.write(output_bytes)
|
|
||||||
logger.info("Stored thumbnail in file %r", output_path)
|
|
||||||
return len(output_bytes)
|
|
||||||
|
|
|
@ -93,7 +93,7 @@ class UploadResource(Resource):
|
||||||
# TODO(markjh): parse content-dispostion
|
# TODO(markjh): parse content-dispostion
|
||||||
|
|
||||||
content_uri = yield self.media_repo.create_content(
|
content_uri = yield self.media_repo.create_content(
|
||||||
media_type, upload_name, request.content.read(),
|
media_type, upload_name, request.content,
|
||||||
content_length, requester.user
|
content_length, requester.user
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue