From 03cf4385e098ae73730b9c5ef695fa3f16c1806f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Oct 2019 15:09:10 +0100 Subject: [PATCH] Fix public room list pagination. We incorrectly used `room_id` as to bound the result set, even though we order by `joined_members, room_id`, leading to incorrect results after pagination. --- synapse/handlers/room_list.py | 33 +++++++++++++++------- synapse/storage/room.py | 53 ++++++++++++++++++++++++----------- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 4e1cc5460f..cfed344d4d 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -142,12 +142,12 @@ class RoomListHandler(BaseHandler): if since_token: batch_token = RoomListNextBatch.from_token(since_token) - last_room_id = batch_token.last_room_id + bounds = (batch_token.last_joined_members, batch_token.last_room_id) forwards = batch_token.direction_is_forward else: batch_token = None + bounds = None - last_room_id = None forwards = True # we request one more than wanted to see if there are more pages to come @@ -157,7 +157,7 @@ class RoomListHandler(BaseHandler): network_tuple, search_filter, probing_limit, - last_room_id=last_room_id, + bounds=bounds, forwards=forwards, ignore_non_federatable=from_federation, ) @@ -193,30 +193,38 @@ class RoomListHandler(BaseHandler): more_to_come = False if num_results > 0: - final_room_id = results[-1]["room_id"] - initial_room_id = results[0]["room_id"] + final_entry = results[-1] + initial_entry = results[0] if forwards: if batch_token: # If there was a token given then we assume that there # must be previous results. response["prev_batch"] = RoomListNextBatch( - last_room_id=initial_room_id, direction_is_forward=False + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, ).to_token() if more_to_come: response["next_batch"] = RoomListNextBatch( - last_room_id=final_room_id, direction_is_forward=True + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, ).to_token() else: if batch_token: response["next_batch"] = RoomListNextBatch( - last_room_id=final_room_id, direction_is_forward=True + last_joined_members=final_entry["num_joined_members"], + last_room_id=final_entry["room_id"], + direction_is_forward=True, ).to_token() if more_to_come: response["prev_batch"] = RoomListNextBatch( - last_room_id=initial_room_id, direction_is_forward=False + last_joined_members=initial_entry["num_joined_members"], + last_room_id=initial_entry["room_id"], + direction_is_forward=False, ).to_token() for room in results: @@ -449,12 +457,17 @@ class RoomListNextBatch( namedtuple( "RoomListNextBatch", ( + "last_joined_members", # The count to get rooms after/before "last_room_id", # The room_id to get rooms after/before "direction_is_forward", # Bool if this is a next_batch, false if prev_batch ), ) ): - KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"} + KEY_DICT = { + "last_joined_members": "m", + "last_room_id": "r", + "direction_is_forward": "d", + } REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()} diff --git a/synapse/storage/room.py b/synapse/storage/room.py index c02787a73d..9b7e31583c 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -17,6 +17,7 @@ import collections import logging import re +from typing import Optional, Tuple from canonicaljson import json @@ -25,6 +26,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore +from synapse.types import ThirdPartyInstanceID from synapse.util.caches.descriptors import cached, cachedInlineCallbacks logger = logging.getLogger(__name__) @@ -119,24 +121,25 @@ class RoomWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_largest_public_rooms( self, - network_tuple, - search_filter, - limit, - last_room_id, - forwards, - ignore_non_federatable=False, + network_tuple: Optional[ThirdPartyInstanceID], + search_filter: Optional[dict], + limit: Optional[int], + bounds: Optional[Tuple[int, str]], + forwards: bool, + ignore_non_federatable: bool = False, ): """Gets the largest public rooms (where largest is in terms of joined members, as tracked in the statistics table). Args: - network_tuple (ThirdPartyInstanceID|None): - search_filter (dict|None): - limit (int|None): Maxmimum number of rows to return, unlimited otherwise. - last_room_id (str|None): if present, a room ID which bounds the - result set, and is always *excluded* from the result set. - forwards (bool): true iff going forwards, going backwards otherwise - ignore_non_federatable (bool): If true filters out non-federatable rooms. + network_tuple + search_filter + limit: Maxmimum number of rows to return, unlimited otherwise. + bounds: An uppoer or lower bound to apply to result set if given, + consists of a joined member count and room_id (these are + excluded from result set). + forwards: true iff going forwards, going backwards otherwise + ignore_non_federatable: If true filters out non-federatable rooms. Returns: Rooms in order: biggest number of joined users first. @@ -147,13 +150,29 @@ class RoomWorkerStore(SQLBaseStore): where_clauses = [] query_args = [] - if last_room_id: + # Work out the bounds if we're given them, these bounds look slightly + # odd, but are designed to help query planner use indices by pulling + # out a common bound. + if bounds: + last_joined_members, last_room_id = bounds if forwards: - where_clauses.append("room_id < ?") + where_clauses.append( + """ + joined_members <= ? AND ( + joined_members < ? OR room_id < ? + ) + """ + ) else: - where_clauses.append("? < room_id") + where_clauses.append( + """ + joined_members >= ? AND ( + joined_members > ? OR room_id > ? + ) + """ + ) - query_args += [last_room_id] + query_args += [last_joined_members, last_joined_members, last_room_id] if search_filter and search_filter.get("generic_search_term", None): search_term = "%" + search_filter["generic_search_term"] + "%"