Keep track of `user_ips` and `monthly_active_users` when delegating auth (#16672)

* Describe `insert_client_ip`
* Pull out client_ips and MAU tracking to BaseAuth
* Define HAS_AUTHLIB once in tests

sick of copypasting

* Track ips and token usage when delegating auth
* Test that we track MAU and user_ips
* Don't track `__oidc_admin`
This commit is contained in:
David Robertson 2023-11-23 12:35:37 +00:00 committed by GitHub
parent 1a5f9bb651
commit 32a59a6495
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 201 additions and 82 deletions

View File

@ -0,0 +1 @@
Restore tracking of requests and monthly active users when delegating authentication to an [MSC3861](https://github.com/matrix-org/synapse/pull/16672) OIDC provider.

View File

@ -27,6 +27,8 @@ from synapse.api.errors import (
UnstableSpecAuthError, UnstableSpecAuthError,
) )
from synapse.appservice import ApplicationService from synapse.appservice import ApplicationService
from synapse.http import get_request_user_agent
from synapse.http.site import SynapseRequest
from synapse.logging.opentracing import trace from synapse.logging.opentracing import trace
from synapse.types import Requester, create_requester from synapse.types import Requester, create_requester
from synapse.util.cancellation import cancellable from synapse.util.cancellation import cancellable
@ -45,6 +47,9 @@ class BaseAuth:
self.store = hs.get_datastores().main self.store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers() self._storage_controllers = hs.get_storage_controllers()
self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips
self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips
async def check_user_in_room( async def check_user_in_room(
self, self,
room_id: str, room_id: str,
@ -349,3 +354,46 @@ class BaseAuth:
return create_requester( return create_requester(
effective_user_id, app_service=app_service, device_id=effective_device_id effective_user_id, app_service=app_service, device_id=effective_device_id
) )
async def _record_request(
self, request: SynapseRequest, requester: Requester
) -> None:
"""Record that this request was made.
This updates the client_ips and monthly_active_user tables.
"""
ip_addr = request.get_client_ip_if_available()
if ip_addr and (not requester.app_service or self._track_appservice_user_ips):
user_agent = get_request_user_agent(request)
access_token = self.get_access_token_from_request(request)
# XXX(quenting): I'm 95% confident that we could skip setting the
# device_id to "dummy-device" for appservices, and that the only impact
# would be some rows which whould not deduplicate in the 'user_ips'
# table during the transition
recorded_device_id = (
"dummy-device"
if requester.device_id is None and requester.app_service is not None
else requester.device_id
)
await self.store.insert_client_ip(
user_id=requester.authenticated_entity,
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=recorded_device_id,
)
# Track also the puppeted user client IP if enabled and the user is puppeting
if (
requester.user.to_string() != requester.authenticated_entity
and self._track_puppeted_user_ips
):
await self.store.insert_client_ip(
user_id=requester.user.to_string(),
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=requester.device_id,
)

View File

@ -22,7 +22,6 @@ from synapse.api.errors import (
InvalidClientTokenError, InvalidClientTokenError,
MissingClientTokenError, MissingClientTokenError,
) )
from synapse.http import get_request_user_agent
from synapse.http.site import SynapseRequest from synapse.http.site import SynapseRequest
from synapse.logging.opentracing import active_span, force_tracing, start_active_span from synapse.logging.opentracing import active_span, force_tracing, start_active_span
from synapse.types import Requester, create_requester from synapse.types import Requester, create_requester
@ -48,8 +47,6 @@ class InternalAuth(BaseAuth):
self._account_validity_handler = hs.get_account_validity_handler() self._account_validity_handler = hs.get_account_validity_handler()
self._macaroon_generator = hs.get_macaroon_generator() self._macaroon_generator = hs.get_macaroon_generator()
self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips
self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips
self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users
@cancellable @cancellable
@ -115,9 +112,6 @@ class InternalAuth(BaseAuth):
Once get_user_by_req has set up the opentracing span, this does the actual work. Once get_user_by_req has set up the opentracing span, this does the actual work.
""" """
try: try:
ip_addr = request.get_client_ip_if_available()
user_agent = get_request_user_agent(request)
access_token = self.get_access_token_from_request(request) access_token = self.get_access_token_from_request(request)
# First check if it could be a request from an appservice # First check if it could be a request from an appservice
@ -154,38 +148,7 @@ class InternalAuth(BaseAuth):
errcode=Codes.EXPIRED_ACCOUNT, errcode=Codes.EXPIRED_ACCOUNT,
) )
if ip_addr and ( await self._record_request(request, requester)
not requester.app_service or self._track_appservice_user_ips
):
# XXX(quenting): I'm 95% confident that we could skip setting the
# device_id to "dummy-device" for appservices, and that the only impact
# would be some rows which whould not deduplicate in the 'user_ips'
# table during the transition
recorded_device_id = (
"dummy-device"
if requester.device_id is None and requester.app_service is not None
else requester.device_id
)
await self.store.insert_client_ip(
user_id=requester.authenticated_entity,
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=recorded_device_id,
)
# Track also the puppeted user client IP if enabled and the user is puppeting
if (
requester.user.to_string() != requester.authenticated_entity
and self._track_puppeted_user_ips
):
await self.store.insert_client_ip(
user_id=requester.user.to_string(),
access_token=access_token,
ip=ip_addr,
user_agent=user_agent,
device_id=requester.device_id,
)
if requester.is_guest and not allow_guest: if requester.is_guest and not allow_guest:
raise AuthError( raise AuthError(

View File

@ -227,6 +227,10 @@ class MSC3861DelegatedAuth(BaseAuth):
# so that we don't provision the user if they don't have enough permission: # so that we don't provision the user if they don't have enough permission:
requester = await self.get_user_by_access_token(access_token, allow_expired) requester = await self.get_user_by_access_token(access_token, allow_expired)
# Do not record requests from MAS using the virtual `__oidc_admin` user.
if access_token != self._admin_token:
await self._record_request(request, requester)
if not allow_guest and requester.is_guest: if not allow_guest and requester.is_guest:
raise OAuthInsufficientScopeError([SCOPE_MATRIX_API]) raise OAuthInsufficientScopeError([SCOPE_MATRIX_API])

View File

@ -589,6 +589,27 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke
device_id: Optional[str], device_id: Optional[str],
now: Optional[int] = None, now: Optional[int] = None,
) -> None: ) -> None:
"""Record that `user_id` used `access_token` from this `ip` address.
This method does two things.
1. It queues up a row to be upserted into the `client_ips` table. These happen
periodically; see _update_client_ips_batch.
2. It immediately records this user as having taken action for the purposes of
MAU tracking.
Any DB writes take place on the background tasks worker, falling back to the
main process. If we're not that worker, this method emits a replication payload
to run this logic on that worker.
Two caveats to note:
- We only take action once per LAST_SEEN_GRANULARITY, to avoid spamming the
DB with writes.
- Requests using the sliding-sync proxy's user agent are excluded, as its
requests are not directly driven by end-users. This is a hack and we're not
very proud of it.
"""
# The sync proxy continuously triggers /sync even if the user is not # The sync proxy continuously triggers /sync even if the user is not
# present so should be excluded from user_ips entries. # present so should be excluded from user_ips entries.
if user_agent == "sync-v3-proxy-": if user_agent == "sync-v3-proxy-":

View File

@ -22,15 +22,7 @@ from synapse.types import JsonDict
from tests.server import get_clock, setup_test_homeserver from tests.server import get_clock, setup_test_homeserver
from tests.unittest import TestCase, skip_unless from tests.unittest import TestCase, skip_unless
from tests.utils import default_config from tests.utils import HAS_AUTHLIB, default_config
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
# These are a few constants that are used as config parameters in the tests. # These are a few constants that are used as config parameters in the tests.
SERVER_NAME = "test" SERVER_NAME = "test"

View File

@ -13,7 +13,8 @@
# limitations under the License. # limitations under the License.
from http import HTTPStatus from http import HTTPStatus
from typing import Any, Dict, Union from io import BytesIO
from typing import Any, Dict, Optional, Union
from unittest.mock import ANY, AsyncMock, Mock from unittest.mock import ANY, AsyncMock, Mock
from urllib.parse import parse_qs from urllib.parse import parse_qs
@ -25,6 +26,8 @@ from signedjson.key import (
from signedjson.sign import sign_json from signedjson.sign import sign_json
from twisted.test.proto_helpers import MemoryReactor from twisted.test.proto_helpers import MemoryReactor
from twisted.web.http_headers import Headers
from twisted.web.iweb import IResponse
from synapse.api.errors import ( from synapse.api.errors import (
AuthError, AuthError,
@ -33,23 +36,17 @@ from synapse.api.errors import (
OAuthInsufficientScopeError, OAuthInsufficientScopeError,
SynapseError, SynapseError,
) )
from synapse.http.site import SynapseRequest
from synapse.rest import admin from synapse.rest import admin
from synapse.rest.client import account, devices, keys, login, logout, register from synapse.rest.client import account, devices, keys, login, logout, register
from synapse.server import HomeServer from synapse.server import HomeServer
from synapse.types import JsonDict from synapse.types import JsonDict, UserID
from synapse.util import Clock from synapse.util import Clock
from tests.server import FakeChannel
from tests.test_utils import FakeResponse, get_awaitable_result from tests.test_utils import FakeResponse, get_awaitable_result
from tests.unittest import HomeserverTestCase, skip_unless from tests.unittest import HomeserverTestCase, override_config, skip_unless
from tests.utils import mock_getRawHeaders from tests.utils import HAS_AUTHLIB, checked_cast, mock_getRawHeaders
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
# These are a few constants that are used as config parameters in the tests. # These are a few constants that are used as config parameters in the tests.
SERVER_NAME = "test" SERVER_NAME = "test"
@ -75,6 +72,7 @@ MATRIX_DEVICE_SCOPE = MATRIX_DEVICE_SCOPE_PREFIX + DEVICE
SUBJECT = "abc-def-ghi" SUBJECT = "abc-def-ghi"
USERNAME = "test-user" USERNAME = "test-user"
USER_ID = "@" + USERNAME + ":" + SERVER_NAME USER_ID = "@" + USERNAME + ":" + SERVER_NAME
OIDC_ADMIN_USERID = f"@__oidc_admin:{SERVER_NAME}"
async def get_json(url: str) -> JsonDict: async def get_json(url: str) -> JsonDict:
@ -134,7 +132,10 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
hs = self.setup_test_homeserver(proxied_http_client=self.http_client) hs = self.setup_test_homeserver(proxied_http_client=self.http_client)
self.auth = hs.get_auth() # Import this here so that we've checked that authlib is available.
from synapse.api.auth.msc3861_delegated import MSC3861DelegatedAuth
self.auth = checked_cast(MSC3861DelegatedAuth, hs.get_auth())
return hs return hs
@ -675,7 +676,8 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
request.requestHeaders.getRawHeaders = mock_getRawHeaders() request.requestHeaders.getRawHeaders = mock_getRawHeaders()
requester = self.get_success(self.auth.get_user_by_req(request)) requester = self.get_success(self.auth.get_user_by_req(request))
self.assertEqual( self.assertEqual(
requester.user.to_string(), "@%s:%s" % ("__oidc_admin", SERVER_NAME) requester.user.to_string(),
OIDC_ADMIN_USERID,
) )
self.assertEqual(requester.is_guest, False) self.assertEqual(requester.is_guest, False)
self.assertEqual(requester.device_id, None) self.assertEqual(requester.device_id, None)
@ -685,3 +687,102 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
# There should be no call to the introspection endpoint # There should be no call to the introspection endpoint
self.http_client.request.assert_not_called() self.http_client.request.assert_not_called()
@override_config({"mau_stats_only": True})
def test_request_tracking(self) -> None:
"""Using an access token should update the client_ips and MAU tables."""
# To start, there are no MAU users.
store = self.hs.get_datastores().main
mau = self.get_success(store.get_monthly_active_count())
self.assertEqual(mau, 0)
known_token = "token-token-GOOD-:)"
async def mock_http_client_request(
method: str,
uri: str,
data: Optional[bytes] = None,
headers: Optional[Headers] = None,
) -> IResponse:
"""Mocked auth provider response."""
assert method == "POST"
token = parse_qs(data)[b"token"][0].decode("utf-8")
if token == known_token:
return FakeResponse.json(
code=200,
payload={
"active": True,
"scope": MATRIX_USER_SCOPE,
"sub": SUBJECT,
"username": USERNAME,
},
)
return FakeResponse.json(code=200, payload={"active": False})
self.http_client.request = mock_http_client_request
EXAMPLE_IPV4_ADDR = "123.123.123.123"
EXAMPLE_USER_AGENT = "httprettygood"
# First test a known access token
channel = FakeChannel(self.site, self.reactor)
# type-ignore: FakeChannel is a mock of an HTTPChannel, not a proper HTTPChannel
req = SynapseRequest(channel, self.site) # type: ignore[arg-type]
req.client.host = EXAMPLE_IPV4_ADDR
req.requestHeaders.addRawHeader("Authorization", f"Bearer {known_token}")
req.requestHeaders.addRawHeader("User-Agent", EXAMPLE_USER_AGENT)
req.content = BytesIO(b"")
req.requestReceived(
b"GET",
b"/_matrix/client/v3/account/whoami",
b"1.1",
)
channel.await_result()
self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
self.assertEqual(channel.json_body["user_id"], USER_ID, channel.json_body)
# Expect to see one MAU entry, from the first request
mau = self.get_success(store.get_monthly_active_count())
self.assertEqual(mau, 1)
conn_infos = self.get_success(
store.get_user_ip_and_agents(UserID.from_string(USER_ID))
)
self.assertEqual(len(conn_infos), 1, conn_infos)
conn_info = conn_infos[0]
self.assertEqual(conn_info["access_token"], known_token)
self.assertEqual(conn_info["ip"], EXAMPLE_IPV4_ADDR)
self.assertEqual(conn_info["user_agent"], EXAMPLE_USER_AGENT)
# Now test MAS making a request using the special __oidc_admin token
MAS_IPV4_ADDR = "127.0.0.1"
MAS_USER_AGENT = "masmasmas"
channel = FakeChannel(self.site, self.reactor)
req = SynapseRequest(channel, self.site) # type: ignore[arg-type]
req.client.host = MAS_IPV4_ADDR
req.requestHeaders.addRawHeader(
"Authorization", f"Bearer {self.auth._admin_token}"
)
req.requestHeaders.addRawHeader("User-Agent", MAS_USER_AGENT)
req.content = BytesIO(b"")
req.requestReceived(
b"GET",
b"/_matrix/client/v3/account/whoami",
b"1.1",
)
channel.await_result()
self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
self.assertEqual(
channel.json_body["user_id"], OIDC_ADMIN_USERID, channel.json_body
)
# Still expect to see one MAU entry, from the first request
mau = self.get_success(store.get_monthly_active_count())
self.assertEqual(mau, 1)
conn_infos = self.get_success(
store.get_user_ip_and_agents(UserID.from_string(OIDC_ADMIN_USERID))
)
self.assertEqual(conn_infos, [])

View File

@ -19,13 +19,7 @@ from twisted.web.resource import Resource
from synapse.rest.synapse.client import build_synapse_client_resource_tree from synapse.rest.synapse.client import build_synapse_client_resource_tree
from tests.unittest import HomeserverTestCase, override_config, skip_unless from tests.unittest import HomeserverTestCase, override_config, skip_unless
from tests.utils import HAS_AUTHLIB
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
@skip_unless(HAS_AUTHLIB, "requires authlib") @skip_unless(HAS_AUTHLIB, "requires authlib")

View File

@ -30,13 +30,7 @@ from synapse.types import JsonDict, Requester, create_requester
from tests import unittest from tests import unittest
from tests.http.server._base import make_request_with_cancellation_test from tests.http.server._base import make_request_with_cancellation_test
from tests.unittest import override_config from tests.unittest import override_config
from tests.utils import HAS_AUTHLIB
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
class KeyQueryTestCase(unittest.HomeserverTestCase): class KeyQueryTestCase(unittest.HomeserverTestCase):

View File

@ -16,13 +16,7 @@ from twisted.web.resource import Resource
from synapse.rest.well_known import well_known_resource from synapse.rest.well_known import well_known_resource
from tests import unittest from tests import unittest
from tests.utils import HAS_AUTHLIB
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
class WellKnownTests(unittest.HomeserverTestCase): class WellKnownTests(unittest.HomeserverTestCase):

View File

@ -30,6 +30,13 @@ from synapse.storage.database import LoggingDatabaseConnection
from synapse.storage.engines import create_engine from synapse.storage.engines import create_engine
from synapse.storage.prepare_database import prepare_database from synapse.storage.prepare_database import prepare_database
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
# set this to True to run the tests against postgres instead of sqlite. # set this to True to run the tests against postgres instead of sqlite.
# #
# When running under postgres, we first create a base database with the name # When running under postgres, we first create a base database with the name