2019-09-23 05:28:01 -06:00
|
|
|
# Copyright 2014, 2015 OpenMarket Ltd
|
2014-09-03 10:04:00 -06:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2019-08-13 05:49:28 -06:00
|
|
|
|
2021-08-03 12:13:34 -06:00
|
|
|
import logging
|
2018-12-21 08:04:57 -07:00
|
|
|
import os
|
2022-04-11 10:07:23 -06:00
|
|
|
from typing import Any, Dict, List, Tuple
|
2021-08-03 12:13:34 -06:00
|
|
|
from urllib.request import getproxies_environment # type: ignore
|
2015-08-12 03:55:27 -06:00
|
|
|
|
2021-12-30 11:47:12 -07:00
|
|
|
import attr
|
|
|
|
|
2022-06-14 08:53:42 -06:00
|
|
|
from synapse.config.server import generate_ip_set
|
2021-12-01 05:28:23 -07:00
|
|
|
from synapse.types import JsonDict
|
2022-06-30 11:48:04 -06:00
|
|
|
from synapse.util.check_dependencies import check_requirements
|
2018-01-16 08:44:08 -07:00
|
|
|
from synapse.util.module_loader import load_module
|
|
|
|
|
2018-07-09 00:09:20 -06:00
|
|
|
from ._base import Config, ConfigError
|
2016-04-13 04:57:46 -06:00
|
|
|
|
2021-08-03 12:13:34 -06:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2019-03-19 04:06:40 -06:00
|
|
|
DEFAULT_THUMBNAIL_SIZES = [
|
|
|
|
{"width": 32, "height": 32, "method": "crop"},
|
|
|
|
{"width": 96, "height": 96, "method": "crop"},
|
|
|
|
{"width": 320, "height": 240, "method": "scale"},
|
|
|
|
{"width": 640, "height": 480, "method": "scale"},
|
|
|
|
{"width": 800, "height": 600, "method": "scale"},
|
|
|
|
]
|
|
|
|
|
|
|
|
THUMBNAIL_SIZE_YAML = """\
|
|
|
|
# - width: %(width)i
|
|
|
|
# height: %(height)i
|
|
|
|
# method: %(method)s
|
|
|
|
"""
|
|
|
|
|
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 10:42:21 -06:00
|
|
|
# A map from the given media type to the type of thumbnail we should generate
|
|
|
|
# for it.
|
|
|
|
THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP = {
|
|
|
|
"image/jpeg": "jpeg",
|
|
|
|
"image/jpg": "jpeg",
|
|
|
|
"image/webp": "jpeg",
|
|
|
|
# Thumbnails can only be jpeg or png. We choose png thumbnails for gif
|
|
|
|
# because it can have transparency.
|
|
|
|
"image/gif": "png",
|
|
|
|
"image/png": "png",
|
|
|
|
}
|
|
|
|
|
2021-08-03 12:13:34 -06:00
|
|
|
HTTP_PROXY_SET_WARNING = """\
|
|
|
|
The Synapse config url_preview_ip_range_blacklist will be ignored as an HTTP(s) proxy is configured."""
|
|
|
|
|
2015-08-12 03:55:27 -06:00
|
|
|
|
2021-12-30 11:47:12 -07:00
|
|
|
@attr.s(frozen=True, slots=True, auto_attribs=True)
|
|
|
|
class ThumbnailRequirement:
|
|
|
|
width: int
|
|
|
|
height: int
|
|
|
|
method: str
|
|
|
|
media_type: str
|
|
|
|
|
|
|
|
|
|
|
|
@attr.s(frozen=True, slots=True, auto_attribs=True)
|
|
|
|
class MediaStorageProviderConfig:
|
|
|
|
store_local: bool # Whether to store newly uploaded local files
|
|
|
|
store_remote: bool # Whether to store newly downloaded remote files
|
|
|
|
store_synchronous: bool # Whether to wait for successful storage for local uploads
|
2018-01-16 08:44:08 -07:00
|
|
|
|
2015-08-13 10:34:22 -06:00
|
|
|
|
2021-12-01 05:28:23 -07:00
|
|
|
def parse_thumbnail_requirements(
|
|
|
|
thumbnail_sizes: List[JsonDict],
|
|
|
|
) -> Dict[str, Tuple[ThumbnailRequirement, ...]]:
|
2015-08-12 07:29:17 -06:00
|
|
|
"""Takes a list of dictionaries with "width", "height", and "method" keys
|
2016-04-08 11:37:15 -06:00
|
|
|
and creates a map from image media types to the thumbnail size, thumbnailing
|
2015-08-12 07:29:17 -06:00
|
|
|
method, and thumbnail media type to precalculate
|
|
|
|
|
|
|
|
Args:
|
2021-12-30 11:47:12 -07:00
|
|
|
thumbnail_sizes: List of dicts with "width", "height", and "method" keys
|
|
|
|
|
2015-08-12 07:29:17 -06:00
|
|
|
Returns:
|
2021-12-30 11:47:12 -07:00
|
|
|
Dictionary mapping from media type string to list of ThumbnailRequirement.
|
2015-08-12 07:29:17 -06:00
|
|
|
"""
|
2021-12-01 05:28:23 -07:00
|
|
|
requirements: Dict[str, List[ThumbnailRequirement]] = {}
|
2015-08-12 03:55:27 -06:00
|
|
|
for size in thumbnail_sizes:
|
|
|
|
width = size["width"]
|
|
|
|
height = size["height"]
|
|
|
|
method = size["method"]
|
Provide more info why we don't have any thumbnails to serve (#13038)
Fix https://github.com/matrix-org/synapse/issues/13016
## New error code and status
### Before
Previously, we returned a `404` for `/thumbnail` which isn't even in the spec.
```json
{
"errcode": "M_NOT_FOUND",
"error": "Not found [b'hs1', b'tefQeZhmVxoiBfuFQUKRzJxc']"
}
```
### After
What does the spec say?
> 400: The request does not make sense to the server, or the server cannot thumbnail the content. For example, the client requested non-integer dimensions or asked for negatively-sized images.
>
> *-- https://spec.matrix.org/v1.1/client-server-api/#get_matrixmediav3thumbnailservernamemediaid*
Now with this PR, we respond with a `400` when we don't have thumbnails to serve and we explain why we might not have any thumbnails.
```json
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
}
```
> Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)
---
We still respond with a 404 in many other places. But we can iterate on those later and maybe keep some in some specific places after spec updates/clarification: https://github.com/matrix-org/matrix-spec/issues/1122
We can also iterate on the bugs where Synapse doesn't thumbnail when it should in other issues/PRs.
2022-07-15 10:42:21 -06:00
|
|
|
|
|
|
|
for format, thumbnail_format in THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.items():
|
|
|
|
requirement = requirements.setdefault(format, [])
|
|
|
|
if thumbnail_format == "jpeg":
|
|
|
|
requirement.append(
|
|
|
|
ThumbnailRequirement(width, height, method, "image/jpeg")
|
|
|
|
)
|
|
|
|
elif thumbnail_format == "png":
|
|
|
|
requirement.append(
|
|
|
|
ThumbnailRequirement(width, height, method, "image/png")
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
raise Exception(
|
|
|
|
"Unknown thumbnail mapping from %s to %s. This is a Synapse problem, please report!"
|
|
|
|
% (format, thumbnail_format)
|
|
|
|
)
|
2015-08-12 03:55:27 -06:00
|
|
|
return {
|
|
|
|
media_type: tuple(thumbnails) for media_type, thumbnails in requirements.items()
|
|
|
|
}
|
2014-09-03 10:04:00 -06:00
|
|
|
|
2014-10-30 05:10:17 -06:00
|
|
|
|
2014-09-03 10:04:00 -06:00
|
|
|
class ContentRepositoryConfig(Config):
|
2019-10-10 02:39:35 -06:00
|
|
|
section = "media"
|
|
|
|
|
2022-04-11 10:07:23 -06:00
|
|
|
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
|
2019-08-13 05:49:28 -06:00
|
|
|
# Only enable the media repo if either the media repo is enabled or the
|
|
|
|
# current worker app is the media repo.
|
|
|
|
if (
|
2021-10-06 08:47:41 -06:00
|
|
|
self.root.server.enable_media_repo is False
|
2019-08-13 10:05:11 -06:00
|
|
|
and config.get("worker_app") != "synapse.app.media_repository"
|
2019-08-13 05:49:28 -06:00
|
|
|
):
|
|
|
|
self.can_load_media_repo = False
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
self.can_load_media_repo = True
|
|
|
|
|
2020-06-17 07:13:30 -06:00
|
|
|
# Whether this instance should be the one to run the background jobs to
|
|
|
|
# e.g clean up old URL previews.
|
|
|
|
self.media_instance_running_background_jobs = config.get(
|
|
|
|
"media_instance_running_background_jobs",
|
|
|
|
)
|
|
|
|
|
2020-10-09 09:58:23 -06:00
|
|
|
self.max_upload_size = self.parse_size(config.get("max_upload_size", "50M"))
|
2019-03-19 04:06:40 -06:00
|
|
|
self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
|
|
|
|
self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))
|
2017-10-12 08:20:59 -06:00
|
|
|
|
2023-05-09 12:08:51 -06:00
|
|
|
self.prevent_media_downloads_from = config.get(
|
|
|
|
"prevent_media_downloads_from", []
|
|
|
|
)
|
|
|
|
|
2023-11-15 07:19:24 -07:00
|
|
|
self.unused_expiration_time = self.parse_duration(
|
|
|
|
config.get("unused_expiration_time", "24h")
|
|
|
|
)
|
|
|
|
|
|
|
|
self.max_pending_media_uploads = config.get("max_pending_media_uploads", 5)
|
|
|
|
|
2019-06-21 16:39:08 -06:00
|
|
|
self.media_store_path = self.ensure_directory(
|
|
|
|
config.get("media_store_path", "media_store")
|
|
|
|
)
|
2017-10-12 08:20:59 -06:00
|
|
|
|
2018-01-16 08:44:08 -07:00
|
|
|
backup_media_store_path = config.get("backup_media_store_path")
|
2017-10-12 08:20:59 -06:00
|
|
|
|
2018-01-16 08:44:08 -07:00
|
|
|
synchronous_backup_media_store = config.get(
|
2017-10-12 08:20:59 -06:00
|
|
|
"synchronous_backup_media_store", False
|
|
|
|
)
|
|
|
|
|
2018-01-16 08:44:08 -07:00
|
|
|
storage_providers = config.get("media_storage_providers", [])
|
|
|
|
|
|
|
|
if backup_media_store_path:
|
|
|
|
if storage_providers:
|
|
|
|
raise ConfigError(
|
|
|
|
"Cannot use both 'backup_media_store_path' and 'storage_providers'"
|
|
|
|
)
|
|
|
|
|
|
|
|
storage_providers = [
|
|
|
|
{
|
|
|
|
"module": "file_system",
|
|
|
|
"store_local": True,
|
|
|
|
"store_synchronous": synchronous_backup_media_store,
|
|
|
|
"store_remote": True,
|
|
|
|
"config": {"directory": backup_media_store_path},
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
# This is a list of config that can be used to create the storage
|
|
|
|
# providers. The entries are tuples of (Class, class_config,
|
|
|
|
# MediaStorageProviderConfig), where Class is the class of the provider,
|
|
|
|
# the class_config the config to pass to it, and
|
|
|
|
# MediaStorageProviderConfig are options for StorageProviderWrapper.
|
|
|
|
#
|
|
|
|
# We don't create the storage providers here as not all workers need
|
|
|
|
# them to be started.
|
2021-07-15 04:02:43 -06:00
|
|
|
self.media_storage_providers: List[tuple] = []
|
2018-01-16 08:44:08 -07:00
|
|
|
|
2020-12-08 07:04:35 -07:00
|
|
|
for i, provider_config in enumerate(storage_providers):
|
2018-01-16 08:44:08 -07:00
|
|
|
# We special case the module "file_system" so as not to need to
|
|
|
|
# expose FileStorageProviderBackend
|
2023-02-27 06:26:05 -07:00
|
|
|
if (
|
|
|
|
provider_config["module"] == "file_system"
|
|
|
|
or provider_config["module"] == "synapse.rest.media.v1.storage_provider"
|
|
|
|
):
|
|
|
|
provider_config[
|
|
|
|
"module"
|
|
|
|
] = "synapse.media.storage_provider.FileStorageProviderBackend"
|
2018-01-16 08:44:08 -07:00
|
|
|
|
2020-12-08 07:04:35 -07:00
|
|
|
provider_class, parsed_config = load_module(
|
|
|
|
provider_config, ("media_storage_providers", "<item %i>" % i)
|
|
|
|
)
|
2018-01-16 08:44:08 -07:00
|
|
|
|
|
|
|
wrapper_config = MediaStorageProviderConfig(
|
|
|
|
provider_config.get("store_local", False),
|
|
|
|
provider_config.get("store_remote", False),
|
|
|
|
provider_config.get("store_synchronous", False),
|
|
|
|
)
|
|
|
|
|
|
|
|
self.media_storage_providers.append(
|
2018-01-18 10:14:05 -07:00
|
|
|
(provider_class, parsed_config, wrapper_config)
|
2018-01-16 08:44:08 -07:00
|
|
|
)
|
|
|
|
|
2019-03-19 04:06:40 -06:00
|
|
|
self.dynamic_thumbnails = config.get("dynamic_thumbnails", False)
|
2015-08-12 03:55:27 -06:00
|
|
|
self.thumbnail_requirements = parse_thumbnail_requirements(
|
2019-03-19 04:06:40 -06:00
|
|
|
config.get("thumbnail_sizes", DEFAULT_THUMBNAIL_SIZES)
|
2015-08-12 03:55:27 -06:00
|
|
|
)
|
2016-04-13 04:57:46 -06:00
|
|
|
self.url_preview_enabled = config.get("url_preview_enabled", False)
|
2016-04-08 11:37:15 -06:00
|
|
|
if self.url_preview_enabled:
|
2022-10-07 08:15:10 -06:00
|
|
|
check_requirements("url-preview")
|
2016-04-13 04:57:46 -06:00
|
|
|
|
2021-08-03 12:13:34 -06:00
|
|
|
proxy_env = getproxies_environment()
|
2019-05-03 06:46:50 -06:00
|
|
|
if "url_preview_ip_range_blacklist" not in config:
|
2021-08-03 12:13:34 -06:00
|
|
|
if "http" not in proxy_env or "https" not in proxy_env:
|
|
|
|
raise ConfigError(
|
|
|
|
"For security, you must specify an explicit target IP address "
|
|
|
|
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
|
|
|
"to work"
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
if "http" in proxy_env or "https" in proxy_env:
|
|
|
|
logger.warning("".join(HTTP_PROXY_SET_WARNING))
|
2016-04-13 04:57:46 -06:00
|
|
|
|
2023-05-19 06:25:25 -06:00
|
|
|
# we always block '0.0.0.0' and '::', which are supposed to be
|
2019-05-03 06:46:50 -06:00
|
|
|
# unroutable addresses.
|
2023-05-19 06:25:25 -06:00
|
|
|
self.url_preview_ip_range_blocklist = generate_ip_set(
|
2021-02-03 05:13:46 -07:00
|
|
|
config["url_preview_ip_range_blacklist"],
|
|
|
|
["0.0.0.0", "::"],
|
|
|
|
config_path=("url_preview_ip_range_blacklist",),
|
|
|
|
)
|
2019-05-03 06:46:50 -06:00
|
|
|
|
2023-05-19 06:25:25 -06:00
|
|
|
self.url_preview_ip_range_allowlist = generate_ip_set(
|
2021-02-03 05:13:46 -07:00
|
|
|
config.get("url_preview_ip_range_whitelist", ()),
|
|
|
|
config_path=("url_preview_ip_range_whitelist",),
|
2016-05-16 06:03:59 -06:00
|
|
|
)
|
2016-05-01 05:44:24 -06:00
|
|
|
|
2023-05-19 06:25:25 -06:00
|
|
|
self.url_preview_url_blocklist = config.get("url_preview_url_blacklist", ())
|
2014-09-03 10:04:00 -06:00
|
|
|
|
2020-04-15 06:35:29 -06:00
|
|
|
self.url_preview_accept_language = config.get(
|
|
|
|
"url_preview_accept_language"
|
|
|
|
) or ["en"]
|
|
|
|
|
2022-05-31 10:35:29 -06:00
|
|
|
media_retention = config.get("media_retention") or {}
|
|
|
|
|
|
|
|
self.media_retention_local_media_lifetime_ms = None
|
|
|
|
local_media_lifetime = media_retention.get("local_media_lifetime")
|
|
|
|
if local_media_lifetime is not None:
|
|
|
|
self.media_retention_local_media_lifetime_ms = self.parse_duration(
|
|
|
|
local_media_lifetime
|
|
|
|
)
|
|
|
|
|
|
|
|
self.media_retention_remote_media_lifetime_ms = None
|
|
|
|
remote_media_lifetime = media_retention.get("remote_media_lifetime")
|
|
|
|
if remote_media_lifetime is not None:
|
|
|
|
self.media_retention_remote_media_lifetime_ms = self.parse_duration(
|
|
|
|
remote_media_lifetime
|
|
|
|
)
|
|
|
|
|
2022-04-11 10:07:23 -06:00
|
|
|
def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
|
|
|
|
assert data_dir_path is not None
|
2018-12-21 08:04:57 -07:00
|
|
|
media_store = os.path.join(data_dir_path, "media_store")
|
2022-06-14 08:53:42 -06:00
|
|
|
return f"media_store_path: {media_store}"
|