Merge pull request #5294 from matrix-org/erikj/speed_up_room_stats
Speed up room stats background update
This commit is contained in:
commit
31d44ec4bd
|
@ -0,0 +1 @@
|
||||||
|
Fix performance problems with the rooms stats background update.
|
|
@ -618,7 +618,12 @@ class EventsWorkerStore(SQLBaseStore):
|
||||||
"""
|
"""
|
||||||
See get_total_state_event_counts.
|
See get_total_state_event_counts.
|
||||||
"""
|
"""
|
||||||
sql = "SELECT COUNT(*) FROM state_events WHERE room_id=?"
|
# We join against the events table as that has an index on room_id
|
||||||
|
sql = """
|
||||||
|
SELECT COUNT(*) FROM state_events
|
||||||
|
INNER JOIN events USING (room_id, event_id)
|
||||||
|
WHERE room_id=?
|
||||||
|
"""
|
||||||
txn.execute(sql, (room_id,))
|
txn.execute(sql, (room_id,))
|
||||||
row = txn.fetchone()
|
row = txn.fetchone()
|
||||||
return row[0] if row else 0
|
return row[0] if row else 0
|
||||||
|
|
|
@ -142,26 +142,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
|
||||||
|
|
||||||
return self.runInteraction("get_room_summary", _get_room_summary_txn)
|
return self.runInteraction("get_room_summary", _get_room_summary_txn)
|
||||||
|
|
||||||
def _get_user_count_in_room_txn(self, txn, room_id, membership):
|
def _get_user_counts_in_room_txn(self, txn, room_id):
|
||||||
"""
|
"""
|
||||||
See get_user_count_in_room.
|
Get the user count in a room by membership.
|
||||||
"""
|
|
||||||
sql = (
|
|
||||||
"SELECT count(*) FROM room_memberships as m"
|
|
||||||
" INNER JOIN current_state_events as c"
|
|
||||||
" ON m.event_id = c.event_id "
|
|
||||||
" AND m.room_id = c.room_id "
|
|
||||||
" AND m.user_id = c.state_key"
|
|
||||||
" WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
txn.execute(sql, (room_id, membership))
|
|
||||||
row = txn.fetchone()
|
|
||||||
return row[0]
|
|
||||||
|
|
||||||
def get_user_count_in_room(self, room_id, membership):
|
|
||||||
"""
|
|
||||||
Get the user count in a room with a particular membership.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
room_id (str)
|
room_id (str)
|
||||||
|
@ -170,9 +153,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
|
||||||
Returns:
|
Returns:
|
||||||
Deferred[int]
|
Deferred[int]
|
||||||
"""
|
"""
|
||||||
return self.runInteraction(
|
sql = """
|
||||||
"get_users_in_room", self._get_user_count_in_room_txn, room_id, membership
|
SELECT m.membership, count(*) FROM room_memberships as m
|
||||||
)
|
INNER JOIN current_state_events as c USING(event_id)
|
||||||
|
WHERE c.type = 'm.room.member' AND c.room_id = ?
|
||||||
|
GROUP BY m.membership
|
||||||
|
"""
|
||||||
|
|
||||||
|
txn.execute(sql, (room_id,))
|
||||||
|
return {row[0]: row[1] for row in txn}
|
||||||
|
|
||||||
@cached()
|
@cached()
|
||||||
def get_invited_rooms_for_user(self, user_id):
|
def get_invited_rooms_for_user(self, user_id):
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
/* Copyright 2019 The Matrix.org Foundation C.I.C.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-- This delta file gets run after `54/stats.sql` delta.
|
||||||
|
|
||||||
|
-- We want to add some indices to the temporary stats table, so we re-insert
|
||||||
|
-- 'populate_stats_createtables' if we are still processing the rooms update.
|
||||||
|
INSERT INTO background_updates (update_name, progress_json)
|
||||||
|
SELECT 'populate_stats_createtables', '{}'
|
||||||
|
WHERE
|
||||||
|
'populate_stats_process_rooms' IN (
|
||||||
|
SELECT update_name FROM background_updates
|
||||||
|
)
|
||||||
|
AND 'populate_stats_createtables' NOT IN ( -- don't insert if already exists
|
||||||
|
SELECT update_name FROM background_updates
|
||||||
|
);
|
|
@ -18,6 +18,7 @@ import logging
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
from synapse.api.constants import EventTypes, Membership
|
from synapse.api.constants import EventTypes, Membership
|
||||||
|
from synapse.storage.prepare_database import get_statements
|
||||||
from synapse.storage.state_deltas import StateDeltasStore
|
from synapse.storage.state_deltas import StateDeltasStore
|
||||||
from synapse.util.caches.descriptors import cached
|
from synapse.util.caches.descriptors import cached
|
||||||
|
|
||||||
|
@ -69,12 +70,25 @@ class StatsStore(StateDeltasStore):
|
||||||
|
|
||||||
# Get all the rooms that we want to process.
|
# Get all the rooms that we want to process.
|
||||||
def _make_staging_area(txn):
|
def _make_staging_area(txn):
|
||||||
sql = (
|
# Create the temporary tables
|
||||||
"CREATE TABLE IF NOT EXISTS "
|
stmts = get_statements("""
|
||||||
+ TEMP_TABLE
|
-- We just recreate the table, we'll be reinserting the
|
||||||
+ "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)"
|
-- correct entries again later anyway.
|
||||||
)
|
DROP TABLE IF EXISTS {temp}_rooms;
|
||||||
txn.execute(sql)
|
|
||||||
|
CREATE TABLE IF NOT EXISTS {temp}_rooms(
|
||||||
|
room_id TEXT NOT NULL,
|
||||||
|
events BIGINT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX {temp}_rooms_events
|
||||||
|
ON {temp}_rooms(events);
|
||||||
|
CREATE INDEX {temp}_rooms_id
|
||||||
|
ON {temp}_rooms(room_id);
|
||||||
|
""".format(temp=TEMP_TABLE).splitlines())
|
||||||
|
|
||||||
|
for statement in stmts:
|
||||||
|
txn.execute(statement)
|
||||||
|
|
||||||
sql = (
|
sql = (
|
||||||
"CREATE TABLE IF NOT EXISTS "
|
"CREATE TABLE IF NOT EXISTS "
|
||||||
|
@ -83,15 +97,16 @@ class StatsStore(StateDeltasStore):
|
||||||
)
|
)
|
||||||
txn.execute(sql)
|
txn.execute(sql)
|
||||||
|
|
||||||
# Get rooms we want to process from the database
|
# Get rooms we want to process from the database, only adding
|
||||||
|
# those that we haven't (i.e. those not in room_stats_earliest_token)
|
||||||
sql = """
|
sql = """
|
||||||
SELECT room_id, count(*) FROM current_state_events
|
INSERT INTO %s_rooms (room_id, events)
|
||||||
GROUP BY room_id
|
SELECT c.room_id, count(*) FROM current_state_events AS c
|
||||||
"""
|
LEFT JOIN room_stats_earliest_token AS t USING (room_id)
|
||||||
|
WHERE t.room_id IS NULL
|
||||||
|
GROUP BY c.room_id
|
||||||
|
""" % (TEMP_TABLE,)
|
||||||
txn.execute(sql)
|
txn.execute(sql)
|
||||||
rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()]
|
|
||||||
self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms)
|
|
||||||
del rooms
|
|
||||||
|
|
||||||
new_pos = yield self.get_max_stream_id_in_current_state_deltas()
|
new_pos = yield self.get_max_stream_id_in_current_state_deltas()
|
||||||
yield self.runInteraction("populate_stats_temp_build", _make_staging_area)
|
yield self.runInteraction("populate_stats_temp_build", _make_staging_area)
|
||||||
|
@ -179,46 +194,39 @@ class StatsStore(StateDeltasStore):
|
||||||
|
|
||||||
current_state_ids = yield self.get_current_state_ids(room_id)
|
current_state_ids = yield self.get_current_state_ids(room_id)
|
||||||
|
|
||||||
join_rules = yield self.get_event(
|
join_rules_id = current_state_ids.get((EventTypes.JoinRules, ""))
|
||||||
current_state_ids.get((EventTypes.JoinRules, "")), allow_none=True
|
history_visibility_id = current_state_ids.get(
|
||||||
)
|
(EventTypes.RoomHistoryVisibility, "")
|
||||||
history_visibility = yield self.get_event(
|
|
||||||
current_state_ids.get((EventTypes.RoomHistoryVisibility, "")),
|
|
||||||
allow_none=True,
|
|
||||||
)
|
|
||||||
encryption = yield self.get_event(
|
|
||||||
current_state_ids.get((EventTypes.RoomEncryption, "")), allow_none=True
|
|
||||||
)
|
|
||||||
name = yield self.get_event(
|
|
||||||
current_state_ids.get((EventTypes.Name, "")), allow_none=True
|
|
||||||
)
|
|
||||||
topic = yield self.get_event(
|
|
||||||
current_state_ids.get((EventTypes.Topic, "")), allow_none=True
|
|
||||||
)
|
|
||||||
avatar = yield self.get_event(
|
|
||||||
current_state_ids.get((EventTypes.RoomAvatar, "")), allow_none=True
|
|
||||||
)
|
|
||||||
canonical_alias = yield self.get_event(
|
|
||||||
current_state_ids.get((EventTypes.CanonicalAlias, "")), allow_none=True
|
|
||||||
)
|
)
|
||||||
|
encryption_id = current_state_ids.get((EventTypes.RoomEncryption, ""))
|
||||||
|
name_id = current_state_ids.get((EventTypes.Name, ""))
|
||||||
|
topic_id = current_state_ids.get((EventTypes.Topic, ""))
|
||||||
|
avatar_id = current_state_ids.get((EventTypes.RoomAvatar, ""))
|
||||||
|
canonical_alias_id = current_state_ids.get((EventTypes.CanonicalAlias, ""))
|
||||||
|
|
||||||
def _or_none(x, arg):
|
state_events = yield self.get_events([
|
||||||
if x:
|
join_rules_id, history_visibility_id, encryption_id, name_id,
|
||||||
return x.content.get(arg)
|
topic_id, avatar_id, canonical_alias_id,
|
||||||
|
])
|
||||||
|
|
||||||
|
def _get_or_none(event_id, arg):
|
||||||
|
event = state_events.get(event_id)
|
||||||
|
if event:
|
||||||
|
return event.content.get(arg)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
yield self.update_room_state(
|
yield self.update_room_state(
|
||||||
room_id,
|
room_id,
|
||||||
{
|
{
|
||||||
"join_rules": _or_none(join_rules, "join_rule"),
|
"join_rules": _get_or_none(join_rules_id, "join_rule"),
|
||||||
"history_visibility": _or_none(
|
"history_visibility": _get_or_none(
|
||||||
history_visibility, "history_visibility"
|
history_visibility_id, "history_visibility"
|
||||||
),
|
),
|
||||||
"encryption": _or_none(encryption, "algorithm"),
|
"encryption": _get_or_none(encryption_id, "algorithm"),
|
||||||
"name": _or_none(name, "name"),
|
"name": _get_or_none(name_id, "name"),
|
||||||
"topic": _or_none(topic, "topic"),
|
"topic": _get_or_none(topic_id, "topic"),
|
||||||
"avatar": _or_none(avatar, "url"),
|
"avatar": _get_or_none(avatar_id, "url"),
|
||||||
"canonical_alias": _or_none(canonical_alias, "alias"),
|
"canonical_alias": _get_or_none(canonical_alias_id, "alias"),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -233,18 +241,9 @@ class StatsStore(StateDeltasStore):
|
||||||
current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn)
|
current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn)
|
||||||
|
|
||||||
current_state_events = len(current_state_ids)
|
current_state_events = len(current_state_ids)
|
||||||
joined_members = self._get_user_count_in_room_txn(
|
|
||||||
txn, room_id, Membership.JOIN
|
membership_counts = self._get_user_counts_in_room_txn(txn, room_id)
|
||||||
)
|
|
||||||
invited_members = self._get_user_count_in_room_txn(
|
|
||||||
txn, room_id, Membership.INVITE
|
|
||||||
)
|
|
||||||
left_members = self._get_user_count_in_room_txn(
|
|
||||||
txn, room_id, Membership.LEAVE
|
|
||||||
)
|
|
||||||
banned_members = self._get_user_count_in_room_txn(
|
|
||||||
txn, room_id, Membership.BAN
|
|
||||||
)
|
|
||||||
total_state_events = self._get_total_state_event_counts_txn(
|
total_state_events = self._get_total_state_event_counts_txn(
|
||||||
txn, room_id
|
txn, room_id
|
||||||
)
|
)
|
||||||
|
@ -257,10 +256,10 @@ class StatsStore(StateDeltasStore):
|
||||||
{
|
{
|
||||||
"bucket_size": self.stats_bucket_size,
|
"bucket_size": self.stats_bucket_size,
|
||||||
"current_state_events": current_state_events,
|
"current_state_events": current_state_events,
|
||||||
"joined_members": joined_members,
|
"joined_members": membership_counts.get(Membership.JOIN, 0),
|
||||||
"invited_members": invited_members,
|
"invited_members": membership_counts.get(Membership.INVITE, 0),
|
||||||
"left_members": left_members,
|
"left_members": membership_counts.get(Membership.LEAVE, 0),
|
||||||
"banned_members": banned_members,
|
"banned_members": membership_counts.get(Membership.BAN, 0),
|
||||||
"state_events": total_state_events,
|
"state_events": total_state_events,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -270,10 +269,13 @@ class StatsStore(StateDeltasStore):
|
||||||
{"room_id": room_id, "token": current_token},
|
{"room_id": room_id, "token": current_token},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# We've finished a room. Delete it from the table.
|
||||||
|
self._simple_delete_one_txn(
|
||||||
|
txn, TEMP_TABLE + "_rooms", {"room_id": room_id},
|
||||||
|
)
|
||||||
|
|
||||||
yield self.runInteraction("update_room_stats", _fetch_data)
|
yield self.runInteraction("update_room_stats", _fetch_data)
|
||||||
|
|
||||||
# We've finished a room. Delete it from the table.
|
|
||||||
yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id})
|
|
||||||
# Update the remaining counter.
|
# Update the remaining counter.
|
||||||
progress["remaining"] -= 1
|
progress["remaining"] -= 1
|
||||||
yield self.runInteraction(
|
yield self.runInteraction(
|
||||||
|
|
Loading…
Reference in New Issue