Simplify reap_monthly_active_users (#7558)
we can use `make_in_list_sql_clause` rather than doing our own half-baked equivalent, which has the benefit of working just fine with empty lists. (This has quite a lot of tests, so I think it's pretty safe)
This commit is contained in:
parent
f4269694ce
commit
d14c4d6b6d
|
@ -0,0 +1 @@
|
||||||
|
Simplify `reap_monthly_active_users`.
|
|
@ -17,7 +17,7 @@ import logging
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
from synapse.storage._base import SQLBaseStore
|
from synapse.storage._base import SQLBaseStore
|
||||||
from synapse.storage.database import Database
|
from synapse.storage.database import Database, make_in_list_sql_clause
|
||||||
from synapse.util.caches.descriptors import cached
|
from synapse.util.caches.descriptors import cached
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -187,75 +187,57 @@ class MonthlyActiveUsersStore(MonthlyActiveUsersWorkerStore):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
|
thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
|
||||||
query_args = [thirty_days_ago]
|
|
||||||
base_sql = "DELETE FROM monthly_active_users WHERE timestamp < ?"
|
|
||||||
|
|
||||||
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
|
in_clause, in_clause_args = make_in_list_sql_clause(
|
||||||
# when len(reserved_users) == 0. Works fine on sqlite.
|
self.database_engine, "user_id", reserved_users
|
||||||
if len(reserved_users) > 0:
|
)
|
||||||
# questionmarks is a hack to overcome sqlite not supporting
|
|
||||||
# tuples in 'WHERE IN %s'
|
|
||||||
question_marks = ",".join("?" * len(reserved_users))
|
|
||||||
|
|
||||||
query_args.extend(reserved_users)
|
txn.execute(
|
||||||
sql = base_sql + " AND user_id NOT IN ({})".format(question_marks)
|
"DELETE FROM monthly_active_users WHERE timestamp < ? AND NOT %s"
|
||||||
else:
|
% (in_clause,),
|
||||||
sql = base_sql
|
[thirty_days_ago] + in_clause_args,
|
||||||
|
)
|
||||||
txn.execute(sql, query_args)
|
|
||||||
|
|
||||||
if self._limit_usage_by_mau:
|
if self._limit_usage_by_mau:
|
||||||
# If MAU user count still exceeds the MAU threshold, then delete on
|
# If MAU user count still exceeds the MAU threshold, then delete on
|
||||||
# a least recently active basis.
|
# a least recently active basis.
|
||||||
# Note it is not possible to write this query using OFFSET due to
|
# Note it is not possible to write this query using OFFSET due to
|
||||||
# incompatibilities in how sqlite and postgres support the feature.
|
# incompatibilities in how sqlite and postgres support the feature.
|
||||||
# sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
|
# Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present,
|
||||||
# While Postgres does not require 'LIMIT', but also does not support
|
# while Postgres does not require 'LIMIT', but also does not support
|
||||||
# negative LIMIT values. So there is no way to write it that both can
|
# negative LIMIT values. So there is no way to write it that both can
|
||||||
# support
|
# support
|
||||||
if len(reserved_users) == 0:
|
|
||||||
sql = """
|
# Limit must be >= 0 for postgres
|
||||||
DELETE FROM monthly_active_users
|
|
||||||
WHERE user_id NOT IN (
|
|
||||||
SELECT user_id FROM monthly_active_users
|
|
||||||
ORDER BY timestamp DESC
|
|
||||||
LIMIT ?
|
|
||||||
)
|
|
||||||
"""
|
|
||||||
txn.execute(sql, ((self._max_mau_value),))
|
|
||||||
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
|
|
||||||
# when len(reserved_users) == 0. Works fine on sqlite.
|
|
||||||
else:
|
|
||||||
# Must be >= 0 for postgres
|
|
||||||
num_of_non_reserved_users_to_remove = max(
|
num_of_non_reserved_users_to_remove = max(
|
||||||
self._max_mau_value - len(reserved_users), 0
|
self._max_mau_value - len(reserved_users), 0
|
||||||
)
|
)
|
||||||
|
|
||||||
# It is important to filter reserved users twice to guard
|
# It is important to filter reserved users twice to guard
|
||||||
# against the case where the reserved user is present in the
|
# against the case where the reserved user is present in the
|
||||||
# SELECT, meaning that a legitmate mau is deleted.
|
# SELECT, meaning that a legitimate mau is deleted.
|
||||||
sql = """
|
sql = """
|
||||||
DELETE FROM monthly_active_users
|
DELETE FROM monthly_active_users
|
||||||
WHERE user_id NOT IN (
|
WHERE user_id NOT IN (
|
||||||
SELECT user_id FROM monthly_active_users
|
SELECT user_id FROM monthly_active_users
|
||||||
WHERE user_id NOT IN ({})
|
WHERE NOT %s
|
||||||
ORDER BY timestamp DESC
|
ORDER BY timestamp DESC
|
||||||
LIMIT ?
|
LIMIT ?
|
||||||
)
|
)
|
||||||
AND user_id NOT IN ({})
|
AND NOT %s
|
||||||
""".format(
|
""" % (
|
||||||
question_marks, question_marks
|
in_clause,
|
||||||
|
in_clause,
|
||||||
)
|
)
|
||||||
|
|
||||||
query_args = [
|
query_args = (
|
||||||
*reserved_users,
|
in_clause_args
|
||||||
num_of_non_reserved_users_to_remove,
|
+ [num_of_non_reserved_users_to_remove]
|
||||||
*reserved_users,
|
+ in_clause_args
|
||||||
]
|
)
|
||||||
|
|
||||||
txn.execute(sql, query_args)
|
txn.execute(sql, query_args)
|
||||||
|
|
||||||
# It seems poor to invalidate the whole cache, Postgres supports
|
# It seems poor to invalidate the whole cache. Postgres supports
|
||||||
# 'Returning' which would allow me to invalidate only the
|
# 'Returning' which would allow me to invalidate only the
|
||||||
# specific users, but sqlite has no way to do this and instead
|
# specific users, but sqlite has no way to do this and instead
|
||||||
# I would need to SELECT and the DELETE which without locking
|
# I would need to SELECT and the DELETE which without locking
|
||||||
|
|
Loading…
Reference in New Issue