From b44bdd7f7b261ec26e5d824a8caf97c6f56e1fbd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jun 2020 14:13:30 +0100 Subject: [PATCH] Support running multiple media repos. (#7706) This requires a new config option to specify which media repo should be responsible for running background jobs to e.g. clear out expired URL preview caches. --- changelog.d/7706.feature | 1 + docs/workers.md | 7 ++++++- synapse/config/repository.py | 6 ++++++ synapse/rest/media/v1/preview_url_resource.py | 18 +++++++++++++++--- 4 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 changelog.d/7706.feature diff --git a/changelog.d/7706.feature b/changelog.d/7706.feature new file mode 100644 index 0000000000..c6b3b20b55 --- /dev/null +++ b/changelog.d/7706.feature @@ -0,0 +1 @@ +Add support for running multiple media repository workers. See [docs/workers.md](docs/workers.md) for instructions. diff --git a/docs/workers.md b/docs/workers.md index 7512eff43a..f4cbbc0400 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -307,7 +307,12 @@ expose the `media` resource. For example: - media ``` -Note this worker cannot be load-balanced: only one instance should be active. +Note that if running multiple media repositories they must be on the same server +and you must configure a single instance to run the background tasks, e.g.: + +```yaml + media_instance_running_background_jobs: "media-repository-1" +``` ### `synapse.app.client_reader` diff --git a/synapse/config/repository.py b/synapse/config/repository.py index b751d02d37..01009f3924 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -94,6 +94,12 @@ class ContentRepositoryConfig(Config): else: self.can_load_media_repo = True + # Whether this instance should be the one to run the background jobs to + # e.g clean up old URL previews. + self.media_instance_running_background_jobs = config.get( + "media_instance_running_background_jobs", + ) + self.max_upload_size = self.parse_size(config.get("max_upload_size", "10M")) self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M")) self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M")) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f67e0fb3ec..b4645cd608 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -82,6 +82,15 @@ class PreviewUrlResource(DirectServeResource): self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage + # We run the background jobs if we're the instance specified (or no + # instance is specified, where we assume there is only one instance + # serving media). + instance_running_jobs = hs.config.media.media_instance_running_background_jobs + self._worker_run_media_background_jobs = ( + instance_running_jobs is None + or instance_running_jobs == hs.get_instance_name() + ) + self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist self.url_preview_accept_language = hs.config.url_preview_accept_language @@ -94,9 +103,10 @@ class PreviewUrlResource(DirectServeResource): expiry_ms=60 * 60 * 1000, ) - self._cleaner_loop = self.clock.looping_call( - self._start_expire_url_cache_data, 10 * 1000 - ) + if self._worker_run_media_background_jobs: + self._cleaner_loop = self.clock.looping_call( + self._start_expire_url_cache_data, 10 * 1000 + ) def render_OPTIONS(self, request): request.setHeader(b"Allow", b"OPTIONS, GET") @@ -397,6 +407,8 @@ class PreviewUrlResource(DirectServeResource): """ # TODO: Delete from backup media store + assert self._worker_run_media_background_jobs + now = self.clock.time_msec() logger.debug("Running url preview cache expiry")