2016-01-06 21:26:29 -07:00
|
|
|
# Copyright 2014-2016 OpenMarket Ltd
|
2018-03-09 09:17:27 -07:00
|
|
|
# Copyright 2018 New Vector Ltd
|
2014-08-12 08:10:52 -06:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2018-08-01 08:54:06 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
import abc
|
2020-01-15 08:58:55 -07:00
|
|
|
import html
|
2018-07-09 00:09:20 -06:00
|
|
|
import logging
|
2019-06-29 01:06:55 -06:00
|
|
|
import types
|
|
|
|
import urllib
|
2020-06-16 06:51:47 -06:00
|
|
|
from http import HTTPStatus
|
2021-03-03 13:47:38 -07:00
|
|
|
from inspect import isawaitable
|
2021-01-27 05:41:24 -07:00
|
|
|
from typing import (
|
2021-10-22 11:15:41 -06:00
|
|
|
TYPE_CHECKING,
|
2021-01-27 05:41:24 -07:00
|
|
|
Any,
|
|
|
|
Awaitable,
|
|
|
|
Callable,
|
|
|
|
Dict,
|
|
|
|
Iterable,
|
|
|
|
Iterator,
|
|
|
|
List,
|
2021-12-14 05:00:47 -07:00
|
|
|
NoReturn,
|
2021-03-03 13:47:38 -07:00
|
|
|
Optional,
|
2021-01-27 05:41:24 -07:00
|
|
|
Pattern,
|
|
|
|
Tuple,
|
2022-05-10 07:05:22 -06:00
|
|
|
TypeVar,
|
2021-01-27 05:41:24 -07:00
|
|
|
Union,
|
|
|
|
)
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2021-12-30 11:47:12 -07:00
|
|
|
import attr
|
2020-06-03 03:41:12 -06:00
|
|
|
import jinja2
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
from canonicaljson import encode_canonical_json
|
2021-01-27 05:41:24 -07:00
|
|
|
from typing_extensions import Protocol
|
2020-08-18 06:49:59 -06:00
|
|
|
from zope.interface import implementer
|
2014-12-22 03:16:02 -07:00
|
|
|
|
2020-08-18 06:49:59 -06:00
|
|
|
from twisted.internet import defer, interfaces
|
2022-05-04 06:38:55 -06:00
|
|
|
from twisted.internet.defer import CancelledError
|
2017-11-23 08:20:52 -07:00
|
|
|
from twisted.python import failure
|
2018-08-15 01:49:59 -06:00
|
|
|
from twisted.web import resource
|
2020-06-03 03:41:12 -06:00
|
|
|
from twisted.web.server import NOT_DONE_YET, Request
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
from twisted.web.static import File
|
2014-08-14 04:37:13 -06:00
|
|
|
from twisted.web.util import redirectTo
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2018-07-09 00:09:20 -06:00
|
|
|
from synapse.api.errors import (
|
|
|
|
CodeMessageException,
|
|
|
|
Codes,
|
2020-01-15 08:58:55 -07:00
|
|
|
RedirectException,
|
2018-07-09 00:09:20 -06:00
|
|
|
SynapseError,
|
|
|
|
UnrecognizedRequestError,
|
|
|
|
)
|
2020-06-03 03:41:12 -06:00
|
|
|
from synapse.http.site import SynapseRequest
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background
|
2021-12-21 04:10:36 -07:00
|
|
|
from synapse.logging.opentracing import active_span, start_active_span, trace_servlet
|
2020-08-07 06:02:55 -06:00
|
|
|
from synapse.util import json_encoder
|
2018-07-09 00:09:20 -06:00
|
|
|
from synapse.util.caches import intern_dict
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
from synapse.util.iterutils import chunk_seq
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2021-10-22 11:15:41 -06:00
|
|
|
if TYPE_CHECKING:
|
2021-12-21 04:10:36 -07:00
|
|
|
import opentracing
|
|
|
|
|
2021-10-22 11:15:41 -06:00
|
|
|
from synapse.server import HomeServer
|
|
|
|
|
2014-08-12 08:10:52 -06:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2018-05-10 17:17:11 -06:00
|
|
|
HTML_ERROR_TEMPLATE = """<!DOCTYPE html>
|
|
|
|
<html lang=en>
|
|
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
|
|
<title>Error {code}</title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<p>{msg}</p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
|
|
|
|
2022-05-04 06:38:55 -06:00
|
|
|
# A fictional HTTP status code for requests where the client has disconnected and we
|
|
|
|
# successfully cancelled the request. Used only for logging purposes. Clients will never
|
|
|
|
# observe this code unless cancellations leak across requests or we raise a
|
|
|
|
# `CancelledError` ourselves.
|
|
|
|
# Analogous to nginx's 499 status code:
|
|
|
|
# https://github.com/nginx/nginx/blob/release-1.21.6/src/http/ngx_http_request.h#L128-L134
|
|
|
|
HTTP_STATUS_REQUEST_CANCELLED = 499
|
|
|
|
|
2018-04-06 06:20:05 -06:00
|
|
|
|
2022-05-10 07:05:22 -06:00
|
|
|
F = TypeVar("F", bound=Callable[..., Any])
|
|
|
|
|
|
|
|
|
|
|
|
_cancellable_method_names = frozenset(
|
|
|
|
{
|
|
|
|
# `RestServlet`, `BaseFederationServlet` and `BaseFederationServerServlet`
|
|
|
|
# methods
|
|
|
|
"on_GET",
|
|
|
|
"on_PUT",
|
|
|
|
"on_POST",
|
|
|
|
"on_DELETE",
|
|
|
|
# `_AsyncResource`, `DirectServeHtmlResource` and `DirectServeJsonResource`
|
|
|
|
# methods
|
|
|
|
"_async_render_GET",
|
|
|
|
"_async_render_PUT",
|
|
|
|
"_async_render_POST",
|
|
|
|
"_async_render_DELETE",
|
|
|
|
"_async_render_OPTIONS",
|
|
|
|
# `ReplicationEndpoint` methods
|
|
|
|
"_handle_request",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def cancellable(method: F) -> F:
|
|
|
|
"""Marks a servlet method as cancellable.
|
|
|
|
|
|
|
|
Methods with this decorator will be cancelled if the client disconnects before we
|
|
|
|
finish processing the request.
|
|
|
|
|
|
|
|
During cancellation, `Deferred.cancel()` will be invoked on the `Deferred` wrapping
|
|
|
|
the method. The `cancel()` call will propagate down to the `Deferred` that is
|
|
|
|
currently being waited on. That `Deferred` will raise a `CancelledError`, which will
|
|
|
|
propagate up, as per normal exception handling.
|
|
|
|
|
|
|
|
Before applying this decorator to a new endpoint, you MUST recursively check
|
|
|
|
that all `await`s in the function are on `async` functions or `Deferred`s that
|
|
|
|
handle cancellation cleanly, otherwise a variety of bugs may occur, ranging from
|
|
|
|
premature logging context closure, to stuck requests, to database corruption.
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
class SomeServlet(RestServlet):
|
|
|
|
@cancellable
|
|
|
|
async def on_GET(self, request: SynapseRequest) -> ...:
|
|
|
|
...
|
|
|
|
"""
|
2022-05-11 10:22:34 -06:00
|
|
|
if method.__name__ not in _cancellable_method_names and not any(
|
|
|
|
method.__name__.startswith(prefix) for prefix in _cancellable_method_names
|
|
|
|
):
|
2022-05-10 07:05:22 -06:00
|
|
|
raise ValueError(
|
|
|
|
"@cancellable decorator can only be applied to servlet methods."
|
|
|
|
)
|
|
|
|
|
|
|
|
method.cancellable = True # type: ignore[attr-defined]
|
|
|
|
return method
|
|
|
|
|
|
|
|
|
|
|
|
def is_method_cancellable(method: Callable[..., Any]) -> bool:
|
|
|
|
"""Checks whether a servlet method has the `@cancellable` flag."""
|
|
|
|
return getattr(method, "cancellable", False)
|
|
|
|
|
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
def return_json_error(f: failure.Failure, request: SynapseRequest) -> None:
|
|
|
|
"""Sends a JSON error response to clients."""
|
2018-05-10 04:59:51 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
if f.check(SynapseError):
|
2021-03-03 13:47:38 -07:00
|
|
|
# mypy doesn't understand that f.check asserts the type.
|
2021-07-15 04:02:43 -06:00
|
|
|
exc: SynapseError = f.value # type: ignore
|
2021-03-03 13:47:38 -07:00
|
|
|
error_code = exc.code
|
|
|
|
error_dict = exc.error_dict()
|
2018-05-10 17:17:11 -06:00
|
|
|
|
2021-03-03 13:47:38 -07:00
|
|
|
logger.info("%s SynapseError: %s - %s", request, error_code, exc.msg)
|
2022-05-04 06:38:55 -06:00
|
|
|
elif f.check(CancelledError):
|
|
|
|
error_code = HTTP_STATUS_REQUEST_CANCELLED
|
|
|
|
error_dict = {"error": "Request cancelled", "errcode": Codes.UNKNOWN}
|
|
|
|
|
|
|
|
if not request._disconnected:
|
|
|
|
logger.error(
|
|
|
|
"Got cancellation before client disconnection from %r: %r",
|
|
|
|
request.request_metrics.name,
|
|
|
|
request,
|
|
|
|
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type]
|
|
|
|
)
|
2020-07-03 12:02:19 -06:00
|
|
|
else:
|
|
|
|
error_code = 500
|
|
|
|
error_dict = {"error": "Internal server error", "errcode": Codes.UNKNOWN}
|
2019-06-20 03:32:02 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
logger.error(
|
|
|
|
"Failed handle request via %r: %r",
|
|
|
|
request.request_metrics.name,
|
|
|
|
request,
|
2021-11-12 06:43:06 -07:00
|
|
|
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type]
|
2020-07-03 12:02:19 -06:00
|
|
|
)
|
2018-05-10 17:17:11 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
# Only respond with an error response if we haven't already started writing,
|
|
|
|
# otherwise lets just kill the connection
|
|
|
|
if request.startedWriting:
|
|
|
|
if request.transport:
|
|
|
|
try:
|
|
|
|
request.transport.abortConnection()
|
|
|
|
except Exception:
|
|
|
|
# abortConnection throws if the connection is already closed
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
respond_with_json(
|
2020-11-27 05:37:55 -07:00
|
|
|
request,
|
|
|
|
error_code,
|
|
|
|
error_dict,
|
|
|
|
send_cors=True,
|
2020-07-03 12:02:19 -06:00
|
|
|
)
|
2018-05-10 17:17:11 -06:00
|
|
|
|
|
|
|
|
2020-06-03 03:41:12 -06:00
|
|
|
def return_html_error(
|
|
|
|
f: failure.Failure,
|
|
|
|
request: Request,
|
|
|
|
error_template: Union[str, jinja2.Template],
|
|
|
|
) -> None:
|
|
|
|
"""Sends an HTML error page corresponding to the given failure.
|
|
|
|
|
|
|
|
Handles RedirectException and other CodeMessageExceptions (such as SynapseError)
|
2018-05-10 17:17:11 -06:00
|
|
|
|
|
|
|
Args:
|
2020-06-03 03:41:12 -06:00
|
|
|
f: the error to report
|
|
|
|
request: the failing request
|
|
|
|
error_template: the HTML template. Can be either a string (with `{code}`,
|
|
|
|
`{msg}` placeholders), or a jinja2 template
|
2018-05-10 17:17:11 -06:00
|
|
|
"""
|
|
|
|
if f.check(CodeMessageException):
|
2021-03-03 13:47:38 -07:00
|
|
|
# mypy doesn't understand that f.check asserts the type.
|
2021-07-15 04:02:43 -06:00
|
|
|
cme: CodeMessageException = f.value # type: ignore
|
2018-05-10 17:17:11 -06:00
|
|
|
code = cme.code
|
|
|
|
msg = cme.msg
|
|
|
|
|
2020-01-15 08:58:55 -07:00
|
|
|
if isinstance(cme, RedirectException):
|
|
|
|
logger.info("%s redirect to %s", request, cme.location)
|
|
|
|
request.setHeader(b"location", cme.location)
|
|
|
|
request.cookies.extend(cme.cookies)
|
|
|
|
elif isinstance(cme, SynapseError):
|
2018-05-10 17:17:11 -06:00
|
|
|
logger.info("%s SynapseError: %s - %s", request, code, msg)
|
|
|
|
else:
|
|
|
|
logger.error(
|
2019-02-25 09:56:41 -07:00
|
|
|
"Failed handle request %r",
|
2018-05-10 17:17:11 -06:00
|
|
|
request,
|
2021-11-12 06:43:06 -07:00
|
|
|
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type]
|
2018-05-10 17:17:11 -06:00
|
|
|
)
|
2022-05-04 06:38:55 -06:00
|
|
|
elif f.check(CancelledError):
|
|
|
|
code = HTTP_STATUS_REQUEST_CANCELLED
|
|
|
|
msg = "Request cancelled"
|
|
|
|
|
|
|
|
if not request._disconnected:
|
|
|
|
logger.error(
|
|
|
|
"Got cancellation before client disconnection when handling request %r",
|
|
|
|
request,
|
|
|
|
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type]
|
|
|
|
)
|
2018-05-10 17:17:11 -06:00
|
|
|
else:
|
2020-06-16 06:51:47 -06:00
|
|
|
code = HTTPStatus.INTERNAL_SERVER_ERROR
|
2018-05-10 17:17:11 -06:00
|
|
|
msg = "Internal server error"
|
|
|
|
|
|
|
|
logger.error(
|
2019-02-25 09:56:41 -07:00
|
|
|
"Failed handle request %r",
|
2018-05-10 17:17:11 -06:00
|
|
|
request,
|
2021-11-12 06:43:06 -07:00
|
|
|
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type]
|
2018-05-10 17:17:11 -06:00
|
|
|
)
|
|
|
|
|
2020-06-03 03:41:12 -06:00
|
|
|
if isinstance(error_template, str):
|
|
|
|
body = error_template.format(code=code, msg=html.escape(msg))
|
|
|
|
else:
|
|
|
|
body = error_template.render(code=code, msg=msg)
|
|
|
|
|
2020-07-01 07:10:23 -06:00
|
|
|
respond_with_html(request, code, body)
|
2018-05-10 17:17:11 -06:00
|
|
|
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def wrap_async_request_handler(
|
|
|
|
h: Callable[["_AsyncResource", SynapseRequest], Awaitable[None]]
|
|
|
|
) -> Callable[["_AsyncResource", SynapseRequest], "defer.Deferred[None]"]:
|
2018-08-15 01:49:59 -06:00
|
|
|
"""Wraps an async request handler so that it calls request.processing.
|
|
|
|
|
|
|
|
This helps ensure that work done by the request handler after the request is completed
|
|
|
|
is correctly recorded against the request metrics/logs.
|
2018-05-10 04:59:51 -06:00
|
|
|
|
|
|
|
The handler method must have a signature of "handle_foo(self, request)",
|
2018-08-15 01:49:59 -06:00
|
|
|
where "request" must be a SynapseRequest.
|
2015-04-21 09:35:53 -06:00
|
|
|
|
2018-08-15 01:49:59 -06:00
|
|
|
The handler may return a deferred, in which case the completion of the request isn't
|
|
|
|
logged until the deferred completes.
|
2015-04-21 09:07:20 -06:00
|
|
|
"""
|
2019-06-20 03:32:02 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
async def wrapped_async_request_handler(
|
|
|
|
self: "_AsyncResource", request: SynapseRequest
|
|
|
|
) -> None:
|
2018-08-15 01:49:59 -06:00
|
|
|
with request.processing():
|
2019-06-29 01:06:55 -06:00
|
|
|
await h(self, request)
|
2016-04-28 03:57:49 -06:00
|
|
|
|
2018-08-15 01:49:59 -06:00
|
|
|
# we need to preserve_fn here, because the synchronous render method won't yield for
|
|
|
|
# us (obviously)
|
|
|
|
return preserve_fn(wrapped_async_request_handler)
|
2015-04-21 09:07:20 -06:00
|
|
|
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2021-01-27 05:41:24 -07:00
|
|
|
# Type of a callback method for processing requests
|
|
|
|
# it is actually called with a SynapseRequest and a kwargs dict for the params,
|
|
|
|
# but I can't figure out how to represent that.
|
|
|
|
ServletCallback = Callable[
|
|
|
|
..., Union[None, Awaitable[None], Tuple[int, Any], Awaitable[Tuple[int, Any]]]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
class HttpServer(Protocol):
|
2014-08-12 08:10:52 -06:00
|
|
|
"""Interface for registering callbacks on a HTTP server"""
|
|
|
|
|
2021-01-27 05:41:24 -07:00
|
|
|
def register_paths(
|
|
|
|
self,
|
|
|
|
method: str,
|
|
|
|
path_patterns: Iterable[Pattern],
|
|
|
|
callback: ServletCallback,
|
|
|
|
servlet_classname: str,
|
|
|
|
) -> None:
|
2015-03-31 07:40:02 -06:00
|
|
|
"""Register a callback that gets fired if we receive a http request
|
2014-08-12 08:10:52 -06:00
|
|
|
with the given method for a path that matches the given regex.
|
|
|
|
|
2020-10-23 10:38:40 -06:00
|
|
|
If the regex contains groups these gets passed to the callback via
|
2014-08-12 08:10:52 -06:00
|
|
|
an unpacked tuple.
|
|
|
|
|
2022-05-11 05:25:13 -06:00
|
|
|
The callback may be marked with the `@cancellable` decorator, which will
|
|
|
|
cause request processing to be cancelled when clients disconnect early.
|
|
|
|
|
2014-08-12 08:10:52 -06:00
|
|
|
Args:
|
2021-01-27 05:41:24 -07:00
|
|
|
method: The HTTP method to listen to.
|
|
|
|
path_patterns: The regex used to match requests.
|
|
|
|
callback: The function to fire if we receive a matched
|
2014-08-12 08:10:52 -06:00
|
|
|
request. The first argument will be the request object and
|
|
|
|
subsequent arguments will be any matched groups from the regex.
|
2021-01-27 05:41:24 -07:00
|
|
|
This should return either tuple of (code, response), or None.
|
|
|
|
servlet_classname (str): The name of the handler to be used in prometheus
|
|
|
|
and opentracing logs.
|
2014-08-12 08:10:52 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
|
|
|
|
"""Base class for resources that have async handlers.
|
|
|
|
|
|
|
|
Sub classes can either implement `_async_render_<METHOD>` to handle
|
|
|
|
requests by method, or override `_async_render` to handle all requests.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
extract_context: Whether to attempt to extract the opentracing
|
|
|
|
context from the request the servlet is handling.
|
|
|
|
"""
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def __init__(self, extract_context: bool = False):
|
2020-07-03 12:02:19 -06:00
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
self._extract_context = extract_context
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def render(self, request: SynapseRequest) -> int:
|
2020-07-03 12:02:19 -06:00
|
|
|
"""This gets called by twisted every time someone sends us a request."""
|
2022-05-10 13:39:05 -06:00
|
|
|
request.render_deferred = defer.ensureDeferred(
|
|
|
|
self._async_render_wrapper(request)
|
|
|
|
)
|
2020-07-03 12:02:19 -06:00
|
|
|
return NOT_DONE_YET
|
|
|
|
|
|
|
|
@wrap_async_request_handler
|
2021-12-14 05:00:47 -07:00
|
|
|
async def _async_render_wrapper(self, request: SynapseRequest) -> None:
|
2020-07-03 12:02:19 -06:00
|
|
|
"""This is a wrapper that delegates to `_async_render` and handles
|
|
|
|
exceptions, return values, metrics, etc.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
request.request_metrics.name = self.__class__.__name__
|
|
|
|
|
|
|
|
with trace_servlet(request, self._extract_context):
|
|
|
|
callback_return = await self._async_render(request)
|
|
|
|
|
|
|
|
if callback_return is not None:
|
|
|
|
code, response = callback_return
|
|
|
|
self._send_response(request, code, response)
|
|
|
|
except Exception:
|
|
|
|
# failure.Failure() fishes the original Failure out
|
|
|
|
# of our stack, and thus gives us a sensible stack
|
|
|
|
# trace.
|
|
|
|
f = failure.Failure()
|
|
|
|
self._send_error_response(f, request)
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
async def _async_render(self, request: SynapseRequest) -> Optional[Tuple[int, Any]]:
|
2020-07-03 12:02:19 -06:00
|
|
|
"""Delegates to `_async_render_<METHOD>` methods, or returns a 400 if
|
2020-10-23 10:38:40 -06:00
|
|
|
no appropriate method exists. Can be overridden in sub classes for
|
2020-07-03 12:02:19 -06:00
|
|
|
different routing.
|
|
|
|
"""
|
2020-08-03 06:45:42 -06:00
|
|
|
# Treat HEAD requests as GET requests.
|
|
|
|
request_method = request.method.decode("ascii")
|
|
|
|
if request_method == "HEAD":
|
|
|
|
request_method = "GET"
|
2020-07-03 12:02:19 -06:00
|
|
|
|
2020-08-03 06:45:42 -06:00
|
|
|
method_handler = getattr(self, "_async_render_%s" % (request_method,), None)
|
2020-07-03 12:02:19 -06:00
|
|
|
if method_handler:
|
2022-05-11 05:24:48 -06:00
|
|
|
request.is_render_cancellable = is_method_cancellable(method_handler)
|
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
raw_callback_return = method_handler(request)
|
|
|
|
|
|
|
|
# Is it synchronous? We'll allow this for now.
|
2021-03-03 13:47:38 -07:00
|
|
|
if isawaitable(raw_callback_return):
|
2020-07-03 12:02:19 -06:00
|
|
|
callback_return = await raw_callback_return
|
|
|
|
else:
|
2022-04-27 07:03:44 -06:00
|
|
|
callback_return = raw_callback_return
|
2020-07-03 12:02:19 -06:00
|
|
|
|
|
|
|
return callback_return
|
|
|
|
|
|
|
|
_unrecognised_request_handler(request)
|
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
def _send_response(
|
|
|
|
self,
|
|
|
|
request: SynapseRequest,
|
|
|
|
code: int,
|
|
|
|
response_object: Any,
|
|
|
|
) -> None:
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
def _send_error_response(
|
|
|
|
self,
|
|
|
|
f: failure.Failure,
|
|
|
|
request: SynapseRequest,
|
|
|
|
) -> None:
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
|
|
class DirectServeJsonResource(_AsyncResource):
|
|
|
|
"""A resource that will call `self._async_on_<METHOD>` on new requests,
|
|
|
|
formatting responses and errors as JSON.
|
|
|
|
"""
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def __init__(self, canonical_json: bool = False, extract_context: bool = False):
|
2020-12-10 05:42:55 -07:00
|
|
|
super().__init__(extract_context)
|
|
|
|
self.canonical_json = canonical_json
|
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
def _send_response(
|
2020-07-10 12:28:42 -06:00
|
|
|
self,
|
2021-09-24 04:01:25 -06:00
|
|
|
request: SynapseRequest,
|
2020-07-10 12:28:42 -06:00
|
|
|
code: int,
|
|
|
|
response_object: Any,
|
2021-12-14 05:00:47 -07:00
|
|
|
) -> None:
|
2020-07-03 12:02:19 -06:00
|
|
|
"""Implements _AsyncResource._send_response"""
|
|
|
|
# TODO: Only enable CORS for the requests that need it.
|
|
|
|
respond_with_json(
|
|
|
|
request,
|
|
|
|
code,
|
|
|
|
response_object,
|
|
|
|
send_cors=True,
|
|
|
|
canonical_json=self.canonical_json,
|
|
|
|
)
|
|
|
|
|
|
|
|
def _send_error_response(
|
|
|
|
self,
|
|
|
|
f: failure.Failure,
|
|
|
|
request: SynapseRequest,
|
|
|
|
) -> None:
|
|
|
|
"""Implements _AsyncResource._send_error_response"""
|
|
|
|
return_json_error(f, request)
|
|
|
|
|
|
|
|
|
2021-12-30 11:47:12 -07:00
|
|
|
@attr.s(slots=True, frozen=True, auto_attribs=True)
|
|
|
|
class _PathEntry:
|
|
|
|
pattern: Pattern
|
|
|
|
callback: ServletCallback
|
|
|
|
servlet_classname: str
|
2021-10-22 11:15:41 -06:00
|
|
|
|
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
class JsonResource(DirectServeJsonResource):
|
2014-08-14 03:18:54 -06:00
|
|
|
"""This implements the HttpServer interface and provides JSON support for
|
|
|
|
Resources.
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2018-03-09 11:05:41 -07:00
|
|
|
Register callbacks via register_paths()
|
2015-04-01 08:13:14 -06:00
|
|
|
|
|
|
|
Callbacks can return a tuple of status code and a dict in which case the
|
|
|
|
the dict will automatically be sent to the client as a JSON object.
|
|
|
|
|
|
|
|
The JsonResource is primarily intended for returning JSON, but callbacks
|
|
|
|
may send something other than JSON, they may do so by using the methods
|
|
|
|
on the request object and instead returning None.
|
2014-08-12 08:10:52 -06:00
|
|
|
"""
|
|
|
|
|
|
|
|
isLeaf = True
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
hs: "HomeServer",
|
|
|
|
canonical_json: bool = True,
|
|
|
|
extract_context: bool = False,
|
|
|
|
):
|
2020-12-10 05:42:55 -07:00
|
|
|
super().__init__(canonical_json, extract_context)
|
2015-02-09 06:46:22 -07:00
|
|
|
self.clock = hs.get_clock()
|
2021-10-22 11:15:41 -06:00
|
|
|
self.path_regexs: Dict[bytes, List[_PathEntry]] = {}
|
2015-03-13 18:12:20 -06:00
|
|
|
self.hs = hs
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def register_paths(
|
|
|
|
self,
|
|
|
|
method: str,
|
|
|
|
path_patterns: Iterable[Pattern],
|
|
|
|
callback: ServletCallback,
|
|
|
|
servlet_classname: str,
|
|
|
|
) -> None:
|
2019-07-24 06:07:35 -06:00
|
|
|
"""
|
|
|
|
Registers a request handler against a regular expression. Later request URLs are
|
|
|
|
checked against these regular expressions in order to identify an appropriate
|
|
|
|
handler for that request.
|
|
|
|
|
|
|
|
Args:
|
2021-12-14 05:00:47 -07:00
|
|
|
method: GET, POST etc
|
2019-07-24 06:07:35 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
path_patterns: A list of regular expressions to which the request
|
|
|
|
URLs are compared.
|
2019-07-24 06:07:35 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
callback: The handler for the request. Usually a Servlet
|
2019-07-24 06:07:35 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
servlet_classname: The name of the handler to be used in prometheus
|
2019-07-24 06:07:35 -06:00
|
|
|
and opentracing logs.
|
|
|
|
"""
|
2021-12-14 05:00:47 -07:00
|
|
|
method_bytes = method.encode("utf-8")
|
2019-09-05 07:46:04 -06:00
|
|
|
|
2015-12-01 10:34:32 -07:00
|
|
|
for path_pattern in path_patterns:
|
2016-07-21 08:56:57 -06:00
|
|
|
logger.debug("Registering for %s %s", method, path_pattern.pattern)
|
2021-12-14 05:00:47 -07:00
|
|
|
self.path_regexs.setdefault(method_bytes, []).append(
|
2021-10-22 11:15:41 -06:00
|
|
|
_PathEntry(path_pattern, callback, servlet_classname)
|
2015-12-01 10:34:32 -07:00
|
|
|
)
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
def _get_handler_for_request(
|
|
|
|
self, request: SynapseRequest
|
2021-01-27 05:41:24 -07:00
|
|
|
) -> Tuple[ServletCallback, str, Dict[str, str]]:
|
2020-07-03 12:02:19 -06:00
|
|
|
"""Finds a callback method to handle the given request.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A tuple of the callback to use, the name of the servlet, and the
|
|
|
|
key word arguments to pass to the callback
|
2014-08-12 08:10:52 -06:00
|
|
|
"""
|
2021-03-03 13:47:38 -07:00
|
|
|
# At this point the path must be bytes.
|
2021-07-15 04:02:43 -06:00
|
|
|
request_path_bytes: bytes = request.path # type: ignore
|
2021-03-03 13:47:38 -07:00
|
|
|
request_path = request_path_bytes.decode("ascii")
|
2020-08-03 06:45:42 -06:00
|
|
|
# Treat HEAD requests as GET requests.
|
|
|
|
request_method = request.method
|
|
|
|
if request_method == b"HEAD":
|
|
|
|
request_method = b"GET"
|
2020-07-03 12:02:19 -06:00
|
|
|
|
|
|
|
# Loop through all the registered callbacks to check if the method
|
|
|
|
# and path regex match
|
2020-08-03 06:45:42 -06:00
|
|
|
for path_entry in self.path_regexs.get(request_method, []):
|
2020-07-03 12:02:19 -06:00
|
|
|
m = path_entry.pattern.match(request_path)
|
|
|
|
if m:
|
|
|
|
# We found a match!
|
|
|
|
return path_entry.callback, path_entry.servlet_classname, m.groupdict()
|
|
|
|
|
|
|
|
# Huh. No one wanted to handle that? Fiiiiiine. Send 400.
|
|
|
|
return _unrecognised_request_handler, "unrecognised_request_handler", {}
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]:
|
2019-07-24 06:07:35 -06:00
|
|
|
callback, servlet_classname, group_dict = self._get_handler_for_request(request)
|
2016-02-04 03:15:56 -07:00
|
|
|
|
2022-05-11 05:25:13 -06:00
|
|
|
request.is_render_cancellable = is_method_cancellable(callback)
|
|
|
|
|
2020-10-23 10:38:40 -06:00
|
|
|
# Make sure we have an appropriate name for this handler in prometheus
|
2020-07-03 12:02:19 -06:00
|
|
|
# (rather than the default of JsonResource).
|
2018-05-09 16:49:29 -06:00
|
|
|
request.request_metrics.name = servlet_classname
|
2016-08-19 11:40:31 -06:00
|
|
|
|
2018-03-09 09:17:27 -07:00
|
|
|
# Now trigger the callback. If it returns a response, we send it
|
|
|
|
# here. If it throws an exception, that is handled by the wrapper
|
|
|
|
# installed by @request_handler.
|
|
|
|
kwargs = intern_dict(
|
|
|
|
{
|
2019-06-29 01:06:55 -06:00
|
|
|
name: urllib.parse.unquote(value) if value else value
|
2018-03-09 09:17:27 -07:00
|
|
|
for name, value in group_dict.items()
|
|
|
|
}
|
|
|
|
)
|
2018-01-09 11:27:35 -07:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
raw_callback_return = callback(request, **kwargs)
|
2019-06-29 01:06:55 -06:00
|
|
|
|
|
|
|
# Is it synchronous? We'll allow this for now.
|
2020-07-03 12:02:19 -06:00
|
|
|
if isinstance(raw_callback_return, (defer.Deferred, types.CoroutineType)):
|
|
|
|
callback_return = await raw_callback_return
|
|
|
|
else:
|
2022-04-27 07:03:44 -06:00
|
|
|
callback_return = raw_callback_return
|
2019-06-29 01:06:55 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
return callback_return
|
2018-01-09 11:27:35 -07:00
|
|
|
|
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
class DirectServeHtmlResource(_AsyncResource):
|
|
|
|
"""A resource that will call `self._async_on_<METHOD>` on new requests,
|
|
|
|
formatting responses and errors as HTML.
|
|
|
|
"""
|
2016-08-19 11:40:31 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
# The error template to use for this resource
|
|
|
|
ERROR_TEMPLATE = HTML_ERROR_TEMPLATE
|
2014-08-12 08:10:52 -06:00
|
|
|
|
2014-09-03 02:37:44 -06:00
|
|
|
def _send_response(
|
2020-07-03 12:02:19 -06:00
|
|
|
self,
|
|
|
|
request: SynapseRequest,
|
|
|
|
code: int,
|
|
|
|
response_object: Any,
|
2021-12-14 05:00:47 -07:00
|
|
|
) -> None:
|
2020-07-03 12:02:19 -06:00
|
|
|
"""Implements _AsyncResource._send_response"""
|
|
|
|
# We expect to get bytes for us to write
|
|
|
|
assert isinstance(response_object, bytes)
|
|
|
|
html_bytes = response_object
|
2019-06-29 01:06:55 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
respond_with_html_bytes(request, 200, html_bytes)
|
2019-07-02 12:01:28 -06:00
|
|
|
|
2020-07-03 12:02:19 -06:00
|
|
|
def _send_error_response(
|
|
|
|
self,
|
|
|
|
f: failure.Failure,
|
|
|
|
request: SynapseRequest,
|
|
|
|
) -> None:
|
|
|
|
"""Implements _AsyncResource._send_error_response"""
|
|
|
|
return_html_error(f, request, self.ERROR_TEMPLATE)
|
2019-06-29 01:06:55 -06:00
|
|
|
|
|
|
|
|
2020-07-01 07:10:23 -06:00
|
|
|
class StaticResource(File):
|
|
|
|
"""
|
|
|
|
A resource that represents a plain non-interpreted file or directory.
|
|
|
|
|
|
|
|
Differs from the File resource by adding clickjacking protection.
|
|
|
|
"""
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def render_GET(self, request: Request) -> bytes:
|
2020-07-01 07:10:23 -06:00
|
|
|
set_clickjacking_protection_headers(request)
|
|
|
|
return super().render_GET(request)
|
|
|
|
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def _unrecognised_request_handler(request: Request) -> NoReturn:
|
2018-03-09 11:05:41 -07:00
|
|
|
"""Request handler for unrecognised requests
|
|
|
|
|
|
|
|
This is a request handler suitable for return from
|
|
|
|
_get_handler_for_request. It actually just raises an
|
|
|
|
UnrecognizedRequestError.
|
|
|
|
|
|
|
|
Args:
|
2021-12-14 05:00:47 -07:00
|
|
|
request: Unused, but passed in to match the signature of ServletCallback.
|
2018-03-09 11:05:41 -07:00
|
|
|
"""
|
2018-03-09 09:17:27 -07:00
|
|
|
raise UnrecognizedRequestError()
|
|
|
|
|
|
|
|
|
2014-08-14 04:37:13 -06:00
|
|
|
class RootRedirect(resource.Resource):
|
|
|
|
"""Redirects the root '/' path to another path."""
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def __init__(self, path: str):
|
2021-12-20 09:00:13 -07:00
|
|
|
super().__init__()
|
2014-08-14 04:37:13 -06:00
|
|
|
self.url = path
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def render_GET(self, request: Request) -> bytes:
|
2018-08-01 08:54:06 -06:00
|
|
|
return redirectTo(self.url.encode("ascii"), request)
|
2014-08-14 04:37:13 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def getChild(self, name: str, request: Request) -> resource.Resource:
|
2014-08-14 04:37:13 -06:00
|
|
|
if len(name) == 0:
|
|
|
|
return self # select ourselves as the child to render
|
2021-12-20 09:00:13 -07:00
|
|
|
return super().getChild(name, request)
|
2014-08-14 04:37:13 -06:00
|
|
|
|
|
|
|
|
2020-05-22 07:30:07 -06:00
|
|
|
class OptionsResource(resource.Resource):
|
|
|
|
"""Responds to OPTION requests for itself and all children."""
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def render_OPTIONS(self, request: Request) -> bytes:
|
2020-07-24 05:08:07 -06:00
|
|
|
request.setResponseCode(204)
|
|
|
|
request.setHeader(b"Content-Length", b"0")
|
2020-05-22 07:30:07 -06:00
|
|
|
|
2020-07-24 05:08:07 -06:00
|
|
|
set_cors_headers(request)
|
|
|
|
|
|
|
|
return b""
|
2020-05-22 07:30:07 -06:00
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def getChildWithDefault(self, path: str, request: Request) -> resource.Resource:
|
2020-05-22 07:30:07 -06:00
|
|
|
if request.method == b"OPTIONS":
|
|
|
|
return self # select ourselves as the child to render
|
2021-12-20 09:00:13 -07:00
|
|
|
return super().getChildWithDefault(path, request)
|
2020-05-22 07:30:07 -06:00
|
|
|
|
|
|
|
|
|
|
|
class RootOptionsRedirectResource(OptionsResource, RootRedirect):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2020-08-19 06:07:57 -06:00
|
|
|
@implementer(interfaces.IPushProducer)
|
2020-08-18 06:49:59 -06:00
|
|
|
class _ByteProducer:
|
|
|
|
"""
|
|
|
|
Iteratively write bytes to the request.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# The minimum number of bytes for each chunk. Note that the last chunk will
|
|
|
|
# usually be smaller than this.
|
|
|
|
min_chunk_size = 1024
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
request: Request,
|
|
|
|
iterator: Iterator[bytes],
|
|
|
|
):
|
2021-07-15 04:02:43 -06:00
|
|
|
self._request: Optional[Request] = request
|
2020-08-18 06:49:59 -06:00
|
|
|
self._iterator = iterator
|
2020-08-19 06:07:57 -06:00
|
|
|
self._paused = False
|
2020-08-18 06:49:59 -06:00
|
|
|
|
2021-09-28 07:36:19 -06:00
|
|
|
try:
|
|
|
|
self._request.registerProducer(self, True)
|
2021-10-07 04:37:10 -06:00
|
|
|
except AttributeError as e:
|
|
|
|
# Calling self._request.registerProducer might raise an AttributeError since
|
|
|
|
# the underlying Twisted code calls self._request.channel.registerProducer,
|
|
|
|
# however self._request.channel will be None if the connection was lost.
|
2021-09-28 07:36:19 -06:00
|
|
|
logger.info("Connection disconnected before response was written: %r", e)
|
|
|
|
|
|
|
|
# We drop our references to data we'll not use.
|
|
|
|
self._request = None
|
|
|
|
self._iterator = iter(())
|
|
|
|
else:
|
|
|
|
# Start producing if `registerProducer` was successful
|
|
|
|
self.resumeProducing()
|
2020-08-18 06:49:59 -06:00
|
|
|
|
|
|
|
def _send_data(self, data: List[bytes]) -> None:
|
|
|
|
"""
|
2020-08-19 06:07:57 -06:00
|
|
|
Send a list of bytes as a chunk of a response.
|
2020-08-18 06:49:59 -06:00
|
|
|
"""
|
2021-03-03 13:47:38 -07:00
|
|
|
if not data or not self._request:
|
2020-08-18 06:49:59 -06:00
|
|
|
return
|
|
|
|
self._request.write(b"".join(data))
|
|
|
|
|
2020-08-19 06:07:57 -06:00
|
|
|
def pauseProducing(self) -> None:
|
|
|
|
self._paused = True
|
|
|
|
|
2020-08-18 06:49:59 -06:00
|
|
|
def resumeProducing(self) -> None:
|
|
|
|
# We've stopped producing in the meantime (note that this might be
|
|
|
|
# re-entrant after calling write).
|
|
|
|
if not self._request:
|
|
|
|
return
|
|
|
|
|
2020-08-19 06:07:57 -06:00
|
|
|
self._paused = False
|
|
|
|
|
|
|
|
# Write until there's backpressure telling us to stop.
|
|
|
|
while not self._paused:
|
|
|
|
# Get the next chunk and write it to the request.
|
|
|
|
#
|
|
|
|
# The output of the JSON encoder is buffered and coalesced until
|
|
|
|
# min_chunk_size is reached. This is because JSON encoders produce
|
|
|
|
# very small output per iteration and the Request object converts
|
|
|
|
# each call to write() to a separate chunk. Without this there would
|
|
|
|
# be an explosion in bytes written (e.g. b"{" becoming "1\r\n{\r\n").
|
|
|
|
#
|
|
|
|
# Note that buffer stores a list of bytes (instead of appending to
|
|
|
|
# bytes) to hopefully avoid many allocations.
|
|
|
|
buffer = []
|
|
|
|
buffered_bytes = 0
|
|
|
|
while buffered_bytes < self.min_chunk_size:
|
|
|
|
try:
|
|
|
|
data = next(self._iterator)
|
|
|
|
buffer.append(data)
|
|
|
|
buffered_bytes += len(data)
|
|
|
|
except StopIteration:
|
|
|
|
# The entire JSON object has been serialized, write any
|
|
|
|
# remaining data, finalize the producer and the request, and
|
|
|
|
# clean-up any references.
|
|
|
|
self._send_data(buffer)
|
|
|
|
self._request.unregisterProducer()
|
|
|
|
self._request.finish()
|
|
|
|
self.stopProducing()
|
|
|
|
return
|
|
|
|
|
|
|
|
self._send_data(buffer)
|
2020-08-18 06:49:59 -06:00
|
|
|
|
|
|
|
def stopProducing(self) -> None:
|
2020-08-19 06:07:57 -06:00
|
|
|
# Clear a circular reference.
|
2020-08-18 06:49:59 -06:00
|
|
|
self._request = None
|
|
|
|
|
|
|
|
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
def _encode_json_bytes(json_object: Any) -> bytes:
|
2020-08-18 06:49:59 -06:00
|
|
|
"""
|
|
|
|
Encode an object into JSON. Returns an iterator of bytes.
|
|
|
|
"""
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
return json_encoder.encode(json_object).encode("utf-8")
|
2020-08-18 06:49:59 -06:00
|
|
|
|
|
|
|
|
2014-12-02 08:09:51 -07:00
|
|
|
def respond_with_json(
|
2021-09-24 04:01:25 -06:00
|
|
|
request: SynapseRequest,
|
2020-07-10 12:28:42 -06:00
|
|
|
code: int,
|
|
|
|
json_object: Any,
|
|
|
|
send_cors: bool = False,
|
|
|
|
canonical_json: bool = True,
|
2021-12-14 05:00:47 -07:00
|
|
|
) -> Optional[int]:
|
2020-07-10 12:28:42 -06:00
|
|
|
"""Sends encoded JSON in response to the given request.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
request: The http request to respond to.
|
|
|
|
code: The HTTP response code.
|
|
|
|
json_object: The object to serialize to JSON.
|
|
|
|
send_cors: Whether to send Cross-Origin Resource Sharing headers
|
|
|
|
https://fetch.spec.whatwg.org/#http-cors-protocol
|
|
|
|
canonical_json: Whether to use the canonicaljson algorithm when encoding
|
|
|
|
the JSON bytes.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
twisted.web.server.NOT_DONE_YET if the request is still active.
|
|
|
|
"""
|
2022-04-28 09:49:50 -06:00
|
|
|
# The response code must always be set, for logging purposes.
|
|
|
|
request.setResponseCode(code)
|
|
|
|
|
2018-01-16 10:58:16 -07:00
|
|
|
# could alternatively use request.notifyFinish() and flip a flag when
|
|
|
|
# the Deferred fires, but since the flag is RIGHT THERE it seems like
|
|
|
|
# a waste.
|
|
|
|
if request._disconnected:
|
2019-10-31 04:23:24 -06:00
|
|
|
logger.warning(
|
2018-01-16 10:58:16 -07:00
|
|
|
"Not sending response to request %s, already disconnected.", request
|
|
|
|
)
|
2020-07-10 12:28:42 -06:00
|
|
|
return None
|
2018-01-16 10:58:16 -07:00
|
|
|
|
2020-11-27 05:37:55 -07:00
|
|
|
if canonical_json:
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
encoder = encode_canonical_json
|
2014-12-02 08:09:51 -07:00
|
|
|
else:
|
2020-11-27 05:37:55 -07:00
|
|
|
encoder = _encode_json_bytes
|
2020-08-18 06:49:59 -06:00
|
|
|
|
|
|
|
request.setHeader(b"Content-Type", b"application/json")
|
|
|
|
request.setHeader(b"Cache-Control", b"no-cache, no-store, must-revalidate")
|
|
|
|
|
|
|
|
if send_cors:
|
|
|
|
set_cors_headers(request)
|
2014-12-02 08:09:51 -07:00
|
|
|
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
run_in_background(
|
|
|
|
_async_write_json_to_request_in_thread, request, encoder, json_object
|
|
|
|
)
|
2020-08-18 06:49:59 -06:00
|
|
|
return NOT_DONE_YET
|
2014-12-02 08:09:51 -07:00
|
|
|
|
|
|
|
|
2014-09-03 02:37:44 -06:00
|
|
|
def respond_with_json_bytes(
|
2020-07-10 12:28:42 -06:00
|
|
|
request: Request,
|
|
|
|
code: int,
|
|
|
|
json_bytes: bytes,
|
|
|
|
send_cors: bool = False,
|
2021-12-14 05:00:47 -07:00
|
|
|
) -> Optional[int]:
|
2014-08-12 08:10:52 -06:00
|
|
|
"""Sends encoded JSON in response to the given request.
|
|
|
|
|
|
|
|
Args:
|
2020-07-10 12:28:42 -06:00
|
|
|
request: The http request to respond to.
|
|
|
|
code: The HTTP response code.
|
|
|
|
json_bytes: The json bytes to use as the response body.
|
|
|
|
send_cors: Whether to send Cross-Origin Resource Sharing headers
|
2020-07-01 07:10:23 -06:00
|
|
|
https://fetch.spec.whatwg.org/#http-cors-protocol
|
2020-07-10 12:28:42 -06:00
|
|
|
|
2014-08-12 08:10:52 -06:00
|
|
|
Returns:
|
2020-07-10 12:28:42 -06:00
|
|
|
twisted.web.server.NOT_DONE_YET if the request is still active.
|
|
|
|
"""
|
2022-04-28 09:49:50 -06:00
|
|
|
# The response code must always be set, for logging purposes.
|
|
|
|
request.setResponseCode(code)
|
|
|
|
|
2020-10-06 03:03:39 -06:00
|
|
|
if request._disconnected:
|
|
|
|
logger.warning(
|
|
|
|
"Not sending response to request %s, already disconnected.", request
|
|
|
|
)
|
2021-12-14 05:00:47 -07:00
|
|
|
return None
|
2014-08-12 08:10:52 -06:00
|
|
|
|
|
|
|
request.setHeader(b"Content-Type", b"application/json")
|
2015-01-06 06:21:39 -07:00
|
|
|
request.setHeader(b"Content-Length", b"%d" % (len(json_bytes),))
|
2018-03-21 11:46:26 -06:00
|
|
|
request.setHeader(b"Cache-Control", b"no-cache, no-store, must-revalidate")
|
2014-08-12 08:10:52 -06:00
|
|
|
|
|
|
|
if send_cors:
|
2016-11-02 05:29:25 -06:00
|
|
|
set_cors_headers(request)
|
2014-08-12 08:10:52 -06:00
|
|
|
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
_write_bytes_to_request(request, json_bytes)
|
2014-08-12 08:10:52 -06:00
|
|
|
return NOT_DONE_YET
|
2015-04-21 09:07:20 -06:00
|
|
|
|
|
|
|
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
async def _async_write_json_to_request_in_thread(
|
|
|
|
request: SynapseRequest,
|
|
|
|
json_encoder: Callable[[Any], bytes],
|
|
|
|
json_object: Any,
|
2021-12-14 05:00:47 -07:00
|
|
|
) -> None:
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
"""Encodes the given JSON object on a thread and then writes it to the
|
|
|
|
request.
|
|
|
|
|
|
|
|
This is done so that encoding large JSON objects doesn't block the reactor
|
|
|
|
thread.
|
|
|
|
|
|
|
|
Note: We don't use JsonEncoder.iterencode here as that falls back to the
|
|
|
|
Python implementation (rather than the C backend), which is *much* more
|
|
|
|
expensive.
|
|
|
|
"""
|
|
|
|
|
2021-12-21 04:10:36 -07:00
|
|
|
def encode(opentracing_span: "Optional[opentracing.Span]") -> bytes:
|
|
|
|
# it might take a while for the threadpool to schedule us, so we write
|
|
|
|
# opentracing logs once we actually get scheduled, so that we can see how
|
|
|
|
# much that contributed.
|
|
|
|
if opentracing_span:
|
|
|
|
opentracing_span.log_kv({"event": "scheduled"})
|
|
|
|
res = json_encoder(json_object)
|
|
|
|
if opentracing_span:
|
|
|
|
opentracing_span.log_kv({"event": "encoded"})
|
|
|
|
return res
|
|
|
|
|
|
|
|
with start_active_span("encode_json_response"):
|
|
|
|
span = active_span()
|
|
|
|
json_str = await defer_to_thread(request.reactor, encode, span)
|
Encode JSON responses on a thread in C, mk2 (#10905)
Currently we use `JsonEncoder.iterencode` to write JSON responses, which ensures that we don't block the main reactor thread when encoding huge objects. The downside to this is that `iterencode` falls back to using a pure Python encoder that is *much* less efficient and can easily burn a lot of CPU for huge responses. To fix this, while still ensuring we don't block the reactor loop, we encode the JSON on a threadpool using the standard `JsonEncoder.encode` functions, which is backed by a C library.
Doing so, however, requires `respond_with_json` to have access to the reactor, which it previously didn't. There are two ways of doing this:
1. threading through the reactor object, which is a bit fiddly as e.g. `DirectServeJsonResource` doesn't currently take a reactor, but is exposed to modules and so is a PITA to change; or
2. expose the reactor in `SynapseRequest`, which requires updating a bunch of servlet types.
I went with the latter as that is just a mechanical change, and I think makes sense as a request already has a reactor associated with it (via its http channel).
2021-09-28 03:37:58 -06:00
|
|
|
|
|
|
|
_write_bytes_to_request(request, json_str)
|
|
|
|
|
|
|
|
|
|
|
|
def _write_bytes_to_request(request: Request, bytes_to_write: bytes) -> None:
|
|
|
|
"""Writes the bytes to the request using an appropriate producer.
|
|
|
|
|
|
|
|
Note: This should be used instead of `Request.write` to correctly handle
|
|
|
|
large response bodies.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# The problem with dumping all of the response into the `Request` object at
|
|
|
|
# once (via `Request.write`) is that doing so starts the timeout for the
|
|
|
|
# next request to be received: so if it takes longer than 60s to stream back
|
|
|
|
# the response to the client, the client never gets it.
|
|
|
|
#
|
|
|
|
# The correct solution is to use a Producer; then the timeout is only
|
|
|
|
# started once all of the content is sent over the TCP connection.
|
|
|
|
|
|
|
|
# To make sure we don't write all of the bytes at once we split it up into
|
|
|
|
# chunks.
|
|
|
|
chunk_size = 4096
|
|
|
|
bytes_generator = chunk_seq(bytes_to_write, chunk_size)
|
|
|
|
|
|
|
|
# We use a `_ByteProducer` here rather than `NoRangeStaticProducer` as the
|
|
|
|
# unit tests can't cope with being given a pull producer.
|
|
|
|
_ByteProducer(request, bytes_generator)
|
|
|
|
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def set_cors_headers(request: Request) -> None:
|
2020-07-10 12:28:42 -06:00
|
|
|
"""Set the CORS headers so that javascript running in a web browsers can
|
2016-11-02 05:29:25 -06:00
|
|
|
use this API
|
|
|
|
|
|
|
|
Args:
|
2020-07-10 12:28:42 -06:00
|
|
|
request: The http request to add CORS to.
|
2016-11-02 05:29:25 -06:00
|
|
|
"""
|
2018-11-07 07:37:43 -07:00
|
|
|
request.setHeader(b"Access-Control-Allow-Origin", b"*")
|
2016-11-02 05:29:25 -06:00
|
|
|
request.setHeader(
|
2020-08-03 06:45:42 -06:00
|
|
|
b"Access-Control-Allow-Methods", b"GET, HEAD, POST, PUT, DELETE, OPTIONS"
|
2016-11-02 05:29:25 -06:00
|
|
|
)
|
|
|
|
request.setHeader(
|
2018-11-07 07:37:43 -07:00
|
|
|
b"Access-Control-Allow-Headers",
|
2021-06-23 04:25:03 -06:00
|
|
|
b"X-Requested-With, Content-Type, Authorization, Date",
|
2016-11-02 05:29:25 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def respond_with_html(request: Request, code: int, html: str) -> None:
|
2020-07-01 07:10:23 -06:00
|
|
|
"""
|
|
|
|
Wraps `respond_with_html_bytes` by first encoding HTML from a str to UTF-8 bytes.
|
|
|
|
"""
|
|
|
|
respond_with_html_bytes(request, code, html.encode("utf-8"))
|
|
|
|
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def respond_with_html_bytes(request: Request, code: int, html_bytes: bytes) -> None:
|
2020-07-01 07:10:23 -06:00
|
|
|
"""
|
|
|
|
Sends HTML (encoded as UTF-8 bytes) as the response to the given request.
|
|
|
|
|
|
|
|
Note that this adds clickjacking protection headers and finishes the request.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
request: The http request to respond to.
|
|
|
|
code: The HTTP response code.
|
|
|
|
html_bytes: The HTML bytes to use as the response body.
|
|
|
|
"""
|
2022-04-28 09:49:50 -06:00
|
|
|
# The response code must always be set, for logging purposes.
|
|
|
|
request.setResponseCode(code)
|
|
|
|
|
2020-07-01 07:10:23 -06:00
|
|
|
# could alternatively use request.notifyFinish() and flip a flag when
|
|
|
|
# the Deferred fires, but since the flag is RIGHT THERE it seems like
|
|
|
|
# a waste.
|
|
|
|
if request._disconnected:
|
|
|
|
logger.warning(
|
|
|
|
"Not sending response to request %s, already disconnected.", request
|
|
|
|
)
|
2021-12-14 05:00:47 -07:00
|
|
|
return None
|
2020-07-01 07:10:23 -06:00
|
|
|
|
|
|
|
request.setHeader(b"Content-Type", b"text/html; charset=utf-8")
|
|
|
|
request.setHeader(b"Content-Length", b"%d" % (len(html_bytes),))
|
|
|
|
|
|
|
|
# Ensure this content cannot be embedded.
|
|
|
|
set_clickjacking_protection_headers(request)
|
|
|
|
|
|
|
|
request.write(html_bytes)
|
|
|
|
finish_request(request)
|
|
|
|
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def set_clickjacking_protection_headers(request: Request) -> None:
|
2020-07-01 07:10:23 -06:00
|
|
|
"""
|
|
|
|
Set headers to guard against clickjacking of embedded content.
|
|
|
|
|
|
|
|
This sets the X-Frame-Options and Content-Security-Policy headers which instructs
|
|
|
|
browsers to not allow the HTML of the response to be embedded onto another
|
|
|
|
page.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
request: The http request to add the headers to.
|
|
|
|
"""
|
|
|
|
request.setHeader(b"X-Frame-Options", b"DENY")
|
|
|
|
request.setHeader(b"Content-Security-Policy", b"frame-ancestors 'none';")
|
|
|
|
|
|
|
|
|
2021-02-01 06:15:51 -07:00
|
|
|
def respond_with_redirect(request: Request, url: bytes) -> None:
|
|
|
|
"""Write a 302 response to the request, if it is still alive."""
|
|
|
|
logger.debug("Redirect to %s", url.decode("utf-8"))
|
|
|
|
request.redirect(url)
|
|
|
|
finish_request(request)
|
|
|
|
|
|
|
|
|
2021-12-14 05:00:47 -07:00
|
|
|
def finish_request(request: Request) -> None:
|
2016-02-12 06:46:59 -07:00
|
|
|
"""Finish writing the response to the request.
|
|
|
|
|
|
|
|
Twisted throws a RuntimeException if the connection closed before the
|
|
|
|
response was written but doesn't provide a convenient or reliable way to
|
|
|
|
determine if the connection was closed. So we catch and log the RuntimeException
|
|
|
|
|
|
|
|
You might think that ``request.notifyFinish`` could be used to tell if the
|
|
|
|
request was finished. However the deferred it returns won't fire if the
|
|
|
|
connection was already closed, meaning we'd have to have called the method
|
|
|
|
right at the start of the request. By the time we want to write the response
|
|
|
|
it will already be too late.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
request.finish()
|
|
|
|
except RuntimeError as e:
|
|
|
|
logger.info("Connection disconnected before response was written: %r", e)
|