Keep FTS indexes up to date. Only search through rooms currently joined

This commit is contained in:
Erik Johnston 2015-10-12 10:49:53 +01:00
parent c85c912562
commit 61561b9df7
6 changed files with 55 additions and 12 deletions

View File

@ -65,7 +65,7 @@ class SearchHandler(BaseHandler):
super(SearchHandler, self).__init__(hs) super(SearchHandler, self).__init__(hs)
@defer.inlineCallbacks @defer.inlineCallbacks
def search(self, content): def search(self, user, content):
constraint_dicts = content["search_categories"]["room_events"]["constraints"] constraint_dicts = content["search_categories"]["room_events"]["constraints"]
constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts]
@ -76,20 +76,33 @@ class SearchHandler(BaseHandler):
raise SynapseError(400, "Only one constraint can be FTS") raise SynapseError(400, "Only one constraint can be FTS")
fts = True fts = True
res = yield self.hs.get_datastore().search_msgs(constraints) rooms = yield self.store.get_rooms_for_user(
user.to_string(),
)
time_now = self.hs.get_clock().time_msec() # For some reason the list of events contains duplicates
# TODO(paul): work out why because I really don't think it should
room_ids = set(r.room_id for r in rooms)
results = [ res = yield self.store.search_msgs(room_ids, constraints)
{
time_now = self.clock.time_msec()
results = {
r["result"].event_id: {
"rank": r["rank"], "rank": r["rank"],
"result": serialize_event(r["result"], time_now) "result": serialize_event(r["result"], time_now)
} }
for r in res for r in res
] }
logger.info("returning: %r", results) logger.info("returning: %r", results)
results.sort(key=lambda r: -r["rank"]) defer.returnValue({
"search_categories": {
defer.returnValue(results) "room_events": {
"results": results,
"count": len(results)
}
}
})

View File

@ -540,7 +540,7 @@ class SearchRestServlet(ClientV1RestServlet):
content = _parse_json(request) content = _parse_json(request)
results = yield self.handlers.search_handler.search(content) results = yield self.handlers.search_handler.search(auth_user, content)
defer.returnValue((200, results)) defer.returnValue((200, results))

View File

@ -307,6 +307,8 @@ class EventsStore(SQLBaseStore):
self._store_room_name_txn(txn, event) self._store_room_name_txn(txn, event)
elif event.type == EventTypes.Topic: elif event.type == EventTypes.Topic:
self._store_room_topic_txn(txn, event) self._store_room_topic_txn(txn, event)
elif event.type == EventTypes.Message:
self._store_room_message_txn(txn, event)
elif event.type == EventTypes.Redaction: elif event.type == EventTypes.Redaction:
self._store_redaction(txn, event) self._store_redaction(txn, event)

View File

@ -175,6 +175,10 @@ class RoomStore(SQLBaseStore):
}, },
) )
self._store_event_search_txn(
txn, event, "content.topic", event.content["topic"]
)
def _store_room_name_txn(self, txn, event): def _store_room_name_txn(self, txn, event):
if hasattr(event, "content") and "name" in event.content: if hasattr(event, "content") and "name" in event.content:
self._simple_insert_txn( self._simple_insert_txn(
@ -187,6 +191,24 @@ class RoomStore(SQLBaseStore):
} }
) )
self._store_event_search_txn(
txn, event, "content.name", event.content["name"]
)
def _store_room_message_txn(self, txn, event):
if hasattr(event, "content") and "body" in event.content:
self._store_event_search_txn(
txn, event, "content.body", event.content["body"]
)
def _store_event_search_txn(self, txn, event, key, value):
sql = (
"INSERT INTO event_search (event_id, room_id, key, vector)"
" VALUES (?,?,?,to_tsvector('english', ?))"
)
txn.execute(sql, (event.event_id, event.room_id, key, value,))
@cachedInlineCallbacks() @cachedInlineCallbacks()
def get_room_name_and_aliases(self, room_id): def get_room_name_and_aliases(self, room_id):
def f(txn): def f(txn):

View File

@ -44,7 +44,8 @@ INSERT INTO event_search SELECT
FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';
CREATE INDEX event_search_idx ON event_search USING gin(vector); CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
CREATE INDEX event_search_ev_idx ON event_search(event_id);
""" """

View File

@ -21,11 +21,16 @@ from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes
class SearchStore(SQLBaseStore): class SearchStore(SQLBaseStore):
@defer.inlineCallbacks @defer.inlineCallbacks
def search_msgs(self, constraints): def search_msgs(self, room_ids, constraints):
clauses = [] clauses = []
args = [] args = []
fts = None fts = None
clauses.append(
"room_id IN (%s)" % (",".join(["?"] * len(room_ids)),)
)
args.extend(room_ids)
for c in constraints: for c in constraints:
local_clauses = [] local_clauses = []
if c.search_type == SearchConstraintTypes.FTS: if c.search_type == SearchConstraintTypes.FTS: