Fix bug with new task scheduler using lots of CPU. (#16278)

Using the new `TaskScheduler` meant that we'ed create lots of new
metrics (due to adding task ID to the desc of background process),
resulting in requests for metrics taking an increasing amount of CPU.
This commit is contained in:
Erik Johnston 2023-09-08 14:43:01 +01:00 committed by GitHub
parent 9084429a6c
commit f43d994624
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 21 deletions

1
changelog.d/16278.misc Normal file
View File

@ -0,0 +1 @@
Fix using the new task scheduler causing lots of CPU to be used.

View File

@ -19,6 +19,7 @@ from prometheus_client import Gauge
from twisted.python.failure import Failure from twisted.python.failure import Failure
from synapse.logging.context import nested_logging_context
from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.types import JsonMapping, ScheduledTask, TaskStatus from synapse.types import JsonMapping, ScheduledTask, TaskStatus
from synapse.util.stringutils import random_string from synapse.util.stringutils import random_string
@ -316,26 +317,27 @@ class TaskScheduler:
function = self._actions[task.action] function = self._actions[task.action]
async def wrapper() -> None: async def wrapper() -> None:
try: with nested_logging_context(task.id):
(status, result, error) = await function(task) try:
except Exception: (status, result, error) = await function(task)
f = Failure() except Exception:
logger.error( f = Failure()
f"scheduled task {task.id} failed", logger.error(
exc_info=(f.type, f.value, f.getTracebackObject()), f"scheduled task {task.id} failed",
) exc_info=(f.type, f.value, f.getTracebackObject()),
status = TaskStatus.FAILED )
result = None status = TaskStatus.FAILED
error = f.getErrorMessage() result = None
error = f.getErrorMessage()
await self._store.update_scheduled_task( await self._store.update_scheduled_task(
task.id, task.id,
self._clock.time_msec(), self._clock.time_msec(),
status=status, status=status,
result=result, result=result,
error=error, error=error,
) )
self._running_tasks.remove(task.id) self._running_tasks.remove(task.id)
if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS: if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
return return
@ -353,5 +355,4 @@ class TaskScheduler:
self._running_tasks.add(task.id) self._running_tasks.add(task.id)
await self.update_task(task.id, status=TaskStatus.ACTIVE) await self.update_task(task.id, status=TaskStatus.ACTIVE)
description = f"{task.id}-{task.action}" run_as_background_process(task.action, wrapper)
run_as_background_process(description, wrapper)