From 67583281e3f8ea923eedbc56a4c85c7ba75d1582 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 27 Oct 2022 09:58:12 -0400 Subject: [PATCH] Fix tests for change in PostgreSQL 14 behavior change. (#14310) PostgreSQL 14 changed the behavior of `websearch_to_tsquery` to improve some behaviour. The tests were hitting those edge-cases about handling of hanging double quotes. This fixes the tests to take into account the PostgreSQL version. --- changelog.d/14310.feature | 1 + synapse/storage/databases/main/search.py | 5 ++--- tests/storage/test_room_search.py | 16 ++++++++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 changelog.d/14310.feature diff --git a/changelog.d/14310.feature b/changelog.d/14310.feature new file mode 100644 index 0000000000..94c8a83212 --- /dev/null +++ b/changelog.d/14310.feature @@ -0,0 +1 @@ +Allow use of postgres and sqllite full-text search operators in search queries. \ No newline at end of file diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index a89fc54c2c..594b935614 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -824,9 +824,8 @@ def _tokenize_query(query: str) -> TokenList: in_phrase = False parts = deque(query.split('"')) for i, part in enumerate(parts): - # The contents inside double quotes is treated as a phrase, a trailing - # double quote is not implied. - in_phrase = bool(i % 2) and i != (len(parts) - 1) + # The contents inside double quotes is treated as a phrase. + in_phrase = bool(i % 2) # Pull out the individual words, discarding any non-word characters. words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE)) diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index 9ddc19900a..868b5bee84 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -239,7 +239,6 @@ class MessageSearchTest(HomeserverTestCase): ("fox -nope", (True, False)), ("fox -brown", (False, True)), ('"fox" quick', True), - ('"fox quick', True), ('"quick brown', True), ('" quick "', True), ('" nope"', False), @@ -269,6 +268,15 @@ class MessageSearchTest(HomeserverTestCase): response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token) self.assertIn("event_id", response) + # The behaviour of a missing trailing double quote changed in PostgreSQL 14 + # from ignoring the initial double quote to treating it as a phrase. + main_store = homeserver.get_datastores().main + found = False + if isinstance(main_store.database_engine, PostgresEngine): + assert main_store.database_engine._version is not None + found = main_store.database_engine._version < 140000 + self.COMMON_CASES.append(('"fox quick', (found, True))) + def test_tokenize_query(self) -> None: """Test the custom logic to tokenize a user's query.""" cases = ( @@ -280,9 +288,9 @@ class MessageSearchTest(HomeserverTestCase): ("fox -brown", ["fox", SearchToken.Not, "brown"]), ("- fox", [SearchToken.Not, "fox"]), ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]), - # No trailing double quoe. - ('"fox quick', ["fox", SearchToken.And, "quick"]), - ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]), + # No trailing double quote. + ('"fox quick', [Phrase(["fox", "quick"])]), + ('"-fox quick', [Phrase(["-fox", "quick"])]), ('" quick "', [Phrase(["quick"])]), ( 'q"uick brow"n',