2021-06-09 12:39:51 -06:00
|
|
|
# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
|
|
|
|
# Copyright 2020 Sorunome
|
2019-05-21 10:36:50 -06:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
import logging
|
2019-09-04 06:04:27 -06:00
|
|
|
from collections import Counter
|
2021-01-26 08:50:21 -07:00
|
|
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple
|
|
|
|
|
|
|
|
from typing_extensions import Counter as CounterType
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2021-09-06 05:17:16 -06:00
|
|
|
from synapse.api.constants import EventContentFields, EventTypes, Membership
|
2019-05-21 10:36:50 -06:00
|
|
|
from synapse.metrics import event_processing_positions
|
|
|
|
from synapse.metrics.background_process_metrics import run_as_background_process
|
2021-01-26 08:50:21 -07:00
|
|
|
from synapse.types import JsonDict
|
|
|
|
|
|
|
|
if TYPE_CHECKING:
|
2021-03-23 05:12:48 -06:00
|
|
|
from synapse.server import HomeServer
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2020-06-05 12:42:55 -06:00
|
|
|
class StatsHandler:
|
2019-05-21 10:36:50 -06:00
|
|
|
"""Handles keeping the *_stats tables updated with a simple time-series of
|
|
|
|
information about the users, rooms and media on the server, such that admins
|
|
|
|
have some idea of who is consuming their resources.
|
|
|
|
|
|
|
|
Heavily derived from UserDirectoryHandler
|
|
|
|
"""
|
|
|
|
|
2021-01-26 08:50:21 -07:00
|
|
|
def __init__(self, hs: "HomeServer"):
|
2019-05-21 10:36:50 -06:00
|
|
|
self.hs = hs
|
2022-02-23 04:04:02 -07:00
|
|
|
self.store = hs.get_datastores().main
|
2022-06-01 09:02:53 -06:00
|
|
|
self._storage_controllers = hs.get_storage_controllers()
|
2019-05-21 10:36:50 -06:00
|
|
|
self.state = hs.get_state_handler()
|
|
|
|
self.clock = hs.get_clock()
|
|
|
|
self.notifier = hs.get_notifier()
|
|
|
|
self.is_mine_id = hs.is_mine_id
|
|
|
|
|
2021-09-24 05:25:21 -06:00
|
|
|
self.stats_enabled = hs.config.stats.stats_enabled
|
2019-10-25 03:28:36 -06:00
|
|
|
|
2019-05-21 10:36:50 -06:00
|
|
|
# The current position in the current_state_delta stream
|
2021-07-16 11:22:36 -06:00
|
|
|
self.pos: Optional[int] = None
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
# Guard to ensure we only process deltas one at a time
|
|
|
|
self._is_processing = False
|
|
|
|
|
2021-09-13 11:07:12 -06:00
|
|
|
if self.stats_enabled and hs.config.worker.run_background_tasks:
|
2019-05-21 10:36:50 -06:00
|
|
|
self.notifier.add_replication_callback(self.notify_new_event)
|
|
|
|
|
|
|
|
# We kick this off so that we don't have to wait for a change before
|
|
|
|
# we start populating stats
|
|
|
|
self.clock.call_later(0, self.notify_new_event)
|
|
|
|
|
2021-01-26 08:50:21 -07:00
|
|
|
def notify_new_event(self) -> None:
|
2019-05-21 10:36:50 -06:00
|
|
|
"""Called when there may be more deltas to process"""
|
2019-10-25 03:28:36 -06:00
|
|
|
if not self.stats_enabled or self._is_processing:
|
2019-05-21 10:36:50 -06:00
|
|
|
return
|
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
self._is_processing = True
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2021-09-20 06:56:23 -06:00
|
|
|
async def process() -> None:
|
2019-05-21 10:36:50 -06:00
|
|
|
try:
|
2020-06-05 12:42:55 -06:00
|
|
|
await self._unsafe_process()
|
2019-05-21 10:36:50 -06:00
|
|
|
finally:
|
|
|
|
self._is_processing = False
|
|
|
|
|
|
|
|
run_as_background_process("stats.notify_new_event", process)
|
|
|
|
|
2021-01-26 08:50:21 -07:00
|
|
|
async def _unsafe_process(self) -> None:
|
2019-05-21 10:36:50 -06:00
|
|
|
# If self.pos is None then means we haven't fetched it from DB
|
|
|
|
if self.pos is None:
|
2020-06-05 12:42:55 -06:00
|
|
|
self.pos = await self.store.get_stats_positions()
|
2022-01-04 09:36:33 -07:00
|
|
|
room_max_stream_ordering = self.store.get_room_max_stream_ordering()
|
|
|
|
if self.pos > room_max_stream_ordering:
|
|
|
|
# apparently, we've processed more events than exist in the database!
|
|
|
|
# this can happen if events are removed with history purge or similar.
|
|
|
|
logger.warning(
|
|
|
|
"Event stream ordering appears to have gone backwards (%i -> %i): "
|
|
|
|
"rewinding stats processor",
|
|
|
|
self.pos,
|
|
|
|
room_max_stream_ordering,
|
|
|
|
)
|
|
|
|
self.pos = room_max_stream_ordering
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
# Loop round handling deltas until we're up to date
|
2019-09-04 06:04:27 -06:00
|
|
|
|
2019-05-21 10:36:50 -06:00
|
|
|
while True:
|
2019-09-17 05:41:23 -06:00
|
|
|
# Be sure to read the max stream_ordering *before* checking if there are any outstanding
|
|
|
|
# deltas, since there is otherwise a chance that we could miss updates which arrive
|
|
|
|
# after we check the deltas.
|
2019-10-10 04:29:01 -06:00
|
|
|
room_max_stream_ordering = self.store.get_room_max_stream_ordering()
|
2019-09-17 05:41:23 -06:00
|
|
|
if self.pos == room_max_stream_ordering:
|
|
|
|
break
|
|
|
|
|
2019-10-10 04:29:01 -06:00
|
|
|
logger.debug(
|
|
|
|
"Processing room stats %s->%s", self.pos, room_max_stream_ordering
|
|
|
|
)
|
2022-06-01 09:02:53 -06:00
|
|
|
(
|
|
|
|
max_pos,
|
|
|
|
deltas,
|
|
|
|
) = await self._storage_controllers.state.get_current_state_deltas(
|
2019-10-10 04:29:01 -06:00
|
|
|
self.pos, room_max_stream_ordering
|
|
|
|
)
|
2019-09-04 06:04:27 -06:00
|
|
|
|
|
|
|
if deltas:
|
|
|
|
logger.debug("Handling %d state deltas", len(deltas))
|
2020-06-05 12:42:55 -06:00
|
|
|
room_deltas, user_deltas = await self._handle_deltas(deltas)
|
2019-09-04 06:04:27 -06:00
|
|
|
else:
|
|
|
|
room_deltas = {}
|
|
|
|
user_deltas = {}
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
logger.debug("room_deltas: %s", room_deltas)
|
|
|
|
logger.debug("user_deltas: %s", user_deltas)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
# Always call this so that we update the stats position.
|
2020-06-05 12:42:55 -06:00
|
|
|
await self.store.bulk_update_stats_delta(
|
2019-09-04 06:04:27 -06:00
|
|
|
self.clock.time_msec(),
|
|
|
|
updates={"room": room_deltas, "user": user_deltas},
|
|
|
|
stream_id=max_pos,
|
|
|
|
)
|
|
|
|
|
2019-09-17 05:41:23 -06:00
|
|
|
logger.debug("Handled room stats to %s -> %s", self.pos, max_pos)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-17 05:41:23 -06:00
|
|
|
event_processing_positions.labels("stats").set(max_pos)
|
2019-09-04 06:04:27 -06:00
|
|
|
|
|
|
|
self.pos = max_pos
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2021-01-26 08:50:21 -07:00
|
|
|
async def _handle_deltas(
|
|
|
|
self, deltas: Iterable[JsonDict]
|
|
|
|
) -> Tuple[Dict[str, CounterType[str]], Dict[str, CounterType[str]]]:
|
2019-09-04 06:04:27 -06:00
|
|
|
"""Called with the state deltas to process
|
|
|
|
|
|
|
|
Returns:
|
2020-06-05 12:42:55 -06:00
|
|
|
Two dicts: the room deltas and the user deltas,
|
2019-09-04 06:04:27 -06:00
|
|
|
mapping from room/user ID to changes in the various fields.
|
2019-05-21 10:36:50 -06:00
|
|
|
"""
|
2019-09-04 06:04:27 -06:00
|
|
|
|
2021-07-16 11:22:36 -06:00
|
|
|
room_to_stats_deltas: Dict[str, CounterType[str]] = {}
|
|
|
|
user_to_stats_deltas: Dict[str, CounterType[str]] = {}
|
2019-09-04 06:04:27 -06:00
|
|
|
|
2021-07-16 11:22:36 -06:00
|
|
|
room_to_state_updates: Dict[str, Dict[str, Any]] = {}
|
2019-09-04 06:04:27 -06:00
|
|
|
|
2019-05-21 10:36:50 -06:00
|
|
|
for delta in deltas:
|
|
|
|
typ = delta["type"]
|
|
|
|
state_key = delta["state_key"]
|
|
|
|
room_id = delta["room_id"]
|
|
|
|
event_id = delta["event_id"]
|
|
|
|
stream_id = delta["stream_id"]
|
|
|
|
prev_event_id = delta["prev_event_id"]
|
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
logger.debug("Handling: %r, %r %r, %s", room_id, typ, state_key, event_id)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2020-06-05 12:42:55 -06:00
|
|
|
token = await self.store.get_earliest_token_for_stats("room", room_id)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
# If the earliest token to begin from is larger than our current
|
|
|
|
# stream ID, skip processing this delta.
|
|
|
|
if token is not None and token >= stream_id:
|
|
|
|
logger.debug(
|
|
|
|
"Ignoring: %s as earlier than this room's initial ingestion event",
|
|
|
|
event_id,
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
|
|
|
if event_id is None and prev_event_id is None:
|
2019-09-04 06:04:27 -06:00
|
|
|
logger.error(
|
|
|
|
"event ID is None and so is the previous event ID. stream_id: %s",
|
|
|
|
stream_id,
|
|
|
|
)
|
2019-05-21 10:36:50 -06:00
|
|
|
continue
|
|
|
|
|
2021-07-16 11:22:36 -06:00
|
|
|
event_content: JsonDict = {}
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
if event_id is not None:
|
2020-06-05 12:42:55 -06:00
|
|
|
event = await self.store.get_event(event_id, allow_none=True)
|
2019-06-05 08:45:46 -06:00
|
|
|
if event:
|
|
|
|
event_content = event.content or {}
|
2019-09-04 06:04:27 -06:00
|
|
|
|
|
|
|
# All the values in this dict are deltas (RELATIVE changes)
|
|
|
|
room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter())
|
2019-06-05 08:45:46 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
room_state = room_to_state_updates.setdefault(room_id, {})
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
if prev_event_id is None:
|
|
|
|
# this state event doesn't overwrite another,
|
|
|
|
# so it is a new effective/current state event
|
|
|
|
room_stats_delta["current_state_events"] += 1
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
if typ == EventTypes.Member:
|
2020-06-05 12:42:55 -06:00
|
|
|
# we could use StateDeltasHandler._get_key_change here but it's
|
|
|
|
# a bit inefficient given we're not testing for a specific
|
|
|
|
# result; might as well just grab the prev_membership and
|
|
|
|
# membership strings and compare them.
|
2019-09-04 06:04:27 -06:00
|
|
|
# We take None rather than leave as a previous membership
|
|
|
|
# in the absence of a previous event because we do not want to
|
|
|
|
# reduce the leave count when a new-to-the-room user joins.
|
|
|
|
prev_membership = None
|
2019-05-21 10:36:50 -06:00
|
|
|
if prev_event_id is not None:
|
2020-06-05 12:42:55 -06:00
|
|
|
prev_event = await self.store.get_event(
|
2019-06-05 08:45:46 -06:00
|
|
|
prev_event_id, allow_none=True
|
|
|
|
)
|
|
|
|
if prev_event:
|
|
|
|
prev_event_content = prev_event.content
|
2019-09-04 06:04:27 -06:00
|
|
|
prev_membership = prev_event_content.get(
|
|
|
|
"membership", Membership.LEAVE
|
|
|
|
)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
membership = event_content.get("membership", Membership.LEAVE)
|
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
if prev_membership is None:
|
|
|
|
logger.debug("No previous membership for this user.")
|
|
|
|
elif membership == prev_membership:
|
|
|
|
pass # noop
|
|
|
|
elif prev_membership == Membership.JOIN:
|
|
|
|
room_stats_delta["joined_members"] -= 1
|
2019-05-21 10:36:50 -06:00
|
|
|
elif prev_membership == Membership.INVITE:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_stats_delta["invited_members"] -= 1
|
2019-05-21 10:36:50 -06:00
|
|
|
elif prev_membership == Membership.LEAVE:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_stats_delta["left_members"] -= 1
|
2019-05-21 10:36:50 -06:00
|
|
|
elif prev_membership == Membership.BAN:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_stats_delta["banned_members"] -= 1
|
2021-06-09 12:39:51 -06:00
|
|
|
elif prev_membership == Membership.KNOCK:
|
|
|
|
room_stats_delta["knocked_members"] -= 1
|
2019-05-21 10:36:50 -06:00
|
|
|
else:
|
2019-09-04 06:04:27 -06:00
|
|
|
raise ValueError(
|
|
|
|
"%r is not a valid prev_membership" % (prev_membership,)
|
|
|
|
)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
if membership == prev_membership:
|
|
|
|
pass # noop
|
2020-08-03 14:54:24 -06:00
|
|
|
elif membership == Membership.JOIN:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_stats_delta["joined_members"] += 1
|
2019-05-21 10:36:50 -06:00
|
|
|
elif membership == Membership.INVITE:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_stats_delta["invited_members"] += 1
|
2019-05-21 10:36:50 -06:00
|
|
|
elif membership == Membership.LEAVE:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_stats_delta["left_members"] += 1
|
2019-05-21 10:36:50 -06:00
|
|
|
elif membership == Membership.BAN:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_stats_delta["banned_members"] += 1
|
2021-06-09 12:39:51 -06:00
|
|
|
elif membership == Membership.KNOCK:
|
|
|
|
room_stats_delta["knocked_members"] += 1
|
2019-05-21 10:36:50 -06:00
|
|
|
else:
|
2019-09-04 06:04:27 -06:00
|
|
|
raise ValueError("%r is not a valid membership" % (membership,))
|
2019-05-21 10:36:50 -06:00
|
|
|
|
|
|
|
user_id = state_key
|
|
|
|
if self.is_mine_id(user_id):
|
2019-09-04 06:04:27 -06:00
|
|
|
# this accounts for transitions like leave → ban and so on.
|
|
|
|
has_changed_joinedness = (prev_membership == Membership.JOIN) != (
|
|
|
|
membership == Membership.JOIN
|
|
|
|
)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
if has_changed_joinedness:
|
2021-01-26 08:50:21 -07:00
|
|
|
membership_delta = +1 if membership == Membership.JOIN else -1
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
user_to_stats_deltas.setdefault(user_id, Counter())[
|
|
|
|
"joined_rooms"
|
2021-01-26 08:50:21 -07:00
|
|
|
] += membership_delta
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2021-01-26 08:50:21 -07:00
|
|
|
room_stats_delta["local_users_in_room"] += membership_delta
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
elif typ == EventTypes.Create:
|
2019-09-06 07:09:16 -06:00
|
|
|
room_state["is_federatable"] = (
|
2021-09-08 08:00:43 -06:00
|
|
|
event_content.get(EventContentFields.FEDERATE, True) is True
|
2019-09-06 07:09:16 -06:00
|
|
|
)
|
2022-06-29 11:12:45 -06:00
|
|
|
room_type = event_content.get(EventContentFields.ROOM_TYPE)
|
|
|
|
if isinstance(room_type, str):
|
|
|
|
room_state["room_type"] = room_type
|
2019-09-04 06:04:27 -06:00
|
|
|
elif typ == EventTypes.JoinRules:
|
|
|
|
room_state["join_rules"] = event_content.get("join_rule")
|
2019-05-21 10:36:50 -06:00
|
|
|
elif typ == EventTypes.RoomHistoryVisibility:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_state["history_visibility"] = event_content.get(
|
|
|
|
"history_visibility"
|
2019-05-21 10:36:50 -06:00
|
|
|
)
|
2020-02-04 10:25:54 -07:00
|
|
|
elif typ == EventTypes.RoomEncryption:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_state["encryption"] = event_content.get("algorithm")
|
2019-05-21 10:36:50 -06:00
|
|
|
elif typ == EventTypes.Name:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_state["name"] = event_content.get("name")
|
2019-05-21 10:36:50 -06:00
|
|
|
elif typ == EventTypes.Topic:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_state["topic"] = event_content.get("topic")
|
2019-05-21 10:36:50 -06:00
|
|
|
elif typ == EventTypes.RoomAvatar:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_state["avatar"] = event_content.get("url")
|
2019-05-21 10:36:50 -06:00
|
|
|
elif typ == EventTypes.CanonicalAlias:
|
2019-09-04 06:04:27 -06:00
|
|
|
room_state["canonical_alias"] = event_content.get("alias")
|
|
|
|
elif typ == EventTypes.GuestAccess:
|
2021-09-06 05:17:16 -06:00
|
|
|
room_state["guest_access"] = event_content.get(
|
|
|
|
EventContentFields.GUEST_ACCESS
|
|
|
|
)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
for room_id, state in room_to_state_updates.items():
|
2020-02-06 06:31:05 -07:00
|
|
|
logger.debug("Updating room_stats_state for %s: %s", room_id, state)
|
2020-06-05 12:42:55 -06:00
|
|
|
await self.store.update_room_state(room_id, state)
|
2019-05-21 10:36:50 -06:00
|
|
|
|
2019-09-04 06:04:27 -06:00
|
|
|
return room_to_stats_deltas, user_to_stats_deltas
|