Replace postgres GIN with GIST

This is because GIN can be slow to write too, especially when the table
gets large.
This commit is contained in:
Erik Johnston 2016-11-03 14:59:59 +00:00
parent 9164bfa1c3
commit 8fd4d9129f
3 changed files with 45 additions and 1 deletions

View File

@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
# Remember to update this number every time a change is made to database # Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts. # schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 37 SCHEMA_VERSION = 38
dir_path = os.path.abspath(os.path.dirname(__file__)) dir_path = os.path.abspath(os.path.dirname(__file__))

View File

@ -0,0 +1,17 @@
/* Copyright 2016 OpenMarket Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
INSERT into background_updates (update_name, progress_json)
VALUES ('event_search_postgres_gist', '{}');

View File

@ -31,6 +31,7 @@ class SearchStore(BackgroundUpdateStore):
EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_UPDATE_NAME = "event_search"
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
def __init__(self, hs): def __init__(self, hs):
super(SearchStore, self).__init__(hs) super(SearchStore, self).__init__(hs)
@ -41,6 +42,10 @@ class SearchStore(BackgroundUpdateStore):
self.EVENT_SEARCH_ORDER_UPDATE_NAME, self.EVENT_SEARCH_ORDER_UPDATE_NAME,
self._background_reindex_search_order self._background_reindex_search_order
) )
self.register_background_update_handler(
self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME,
self._background_reindex_gist_search
)
@defer.inlineCallbacks @defer.inlineCallbacks
def _background_reindex_search(self, progress, batch_size): def _background_reindex_search(self, progress, batch_size):
@ -139,6 +144,28 @@ class SearchStore(BackgroundUpdateStore):
defer.returnValue(result) defer.returnValue(result)
@defer.inlineCallbacks
def _background_reindex_gist_search(self, progress, batch_size):
def create_index(conn):
conn.rollback()
conn.set_session(autocommit=True)
c = conn.cursor()
# We create with NULLS FIRST so that when we search *backwards*
# we get the ones with non null origin_server_ts *first*
c.execute(
"CREATE INDEX CONCURRENTLY event_search_fts_idx_gist"
" ON event_search USING GIST (vector)"
)
c.execute("DROP INDEX event_search_fts_idx")
conn.set_session(autocommit=False)
if isinstance(self.database_engine, PostgresEngine):
yield self.runWithConnection(create_index)
yield self._end_background_update(self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME)
defer.returnValue(1)
@defer.inlineCallbacks @defer.inlineCallbacks
def _background_reindex_search_order(self, progress, batch_size): def _background_reindex_search_order(self, progress, batch_size):
target_min_stream_id = progress["target_min_stream_id_inclusive"] target_min_stream_id = progress["target_min_stream_id_inclusive"]