Merge pull request #3163 from matrix-org/cohort_analytics
user visit data
This commit is contained in:
commit
dc8930ea9e
|
@ -49,3 +49,4 @@ env/
|
||||||
*.config
|
*.config
|
||||||
|
|
||||||
.vscode/
|
.vscode/
|
||||||
|
.ropeproject/
|
||||||
|
|
|
@ -484,6 +484,14 @@ def run(hs):
|
||||||
" changes across releases."
|
" changes across releases."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def generate_user_daily_visit_stats():
|
||||||
|
hs.get_datastore().generate_user_daily_visits()
|
||||||
|
|
||||||
|
# Rather than update on per session basis, batch up the requests.
|
||||||
|
# If you increase the loop period, the accuracy of user_daily_visits
|
||||||
|
# table will decrease
|
||||||
|
clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000)
|
||||||
|
|
||||||
if hs.config.report_stats:
|
if hs.config.report_stats:
|
||||||
logger.info("Scheduling stats reporting for 3 hour intervals")
|
logger.info("Scheduling stats reporting for 3 hour intervals")
|
||||||
clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000)
|
clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000)
|
||||||
|
|
|
@ -14,6 +14,11 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
from dateutil import tz
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
|
||||||
from synapse.storage.devices import DeviceStore
|
from synapse.storage.devices import DeviceStore
|
||||||
from .appservice import (
|
from .appservice import (
|
||||||
ApplicationServiceStore, ApplicationServiceTransactionStore
|
ApplicationServiceStore, ApplicationServiceTransactionStore
|
||||||
|
@ -55,10 +60,6 @@ from .engines import PostgresEngine
|
||||||
from synapse.api.constants import PresenceState
|
from synapse.api.constants import PresenceState
|
||||||
from synapse.util.caches.stream_change_cache import StreamChangeCache
|
from synapse.util.caches.stream_change_cache import StreamChangeCache
|
||||||
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -213,6 +214,9 @@ class DataStore(RoomMemberStore, RoomStore,
|
||||||
self._stream_order_on_start = self.get_room_max_stream_ordering()
|
self._stream_order_on_start = self.get_room_max_stream_ordering()
|
||||||
self._min_stream_order_on_start = self.get_room_min_stream_ordering()
|
self._min_stream_order_on_start = self.get_room_min_stream_ordering()
|
||||||
|
|
||||||
|
# Used in _generate_user_daily_visits to keep track of progress
|
||||||
|
self._last_user_visit_update = self._get_start_of_day()
|
||||||
|
|
||||||
super(DataStore, self).__init__(db_conn, hs)
|
super(DataStore, self).__init__(db_conn, hs)
|
||||||
|
|
||||||
def take_presence_startup_info(self):
|
def take_presence_startup_info(self):
|
||||||
|
@ -347,6 +351,65 @@ class DataStore(RoomMemberStore, RoomStore,
|
||||||
|
|
||||||
return self.runInteraction("count_r30_users", _count_r30_users)
|
return self.runInteraction("count_r30_users", _count_r30_users)
|
||||||
|
|
||||||
|
def _get_start_of_day(self):
|
||||||
|
"""
|
||||||
|
Returns millisecond unixtime for start of UTC day.
|
||||||
|
"""
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
today_start = datetime.datetime(now.year, now.month,
|
||||||
|
now.day, tzinfo=tz.tzutc())
|
||||||
|
return int(time.mktime(today_start.timetuple())) * 1000
|
||||||
|
|
||||||
|
def generate_user_daily_visits(self):
|
||||||
|
"""
|
||||||
|
Generates daily visit data for use in cohort/ retention analysis
|
||||||
|
"""
|
||||||
|
def _generate_user_daily_visits(txn):
|
||||||
|
logger.info("Calling _generate_user_daily_visits")
|
||||||
|
today_start = self._get_start_of_day()
|
||||||
|
a_day_in_milliseconds = 24 * 60 * 60 * 1000
|
||||||
|
now = self.clock.time_msec()
|
||||||
|
|
||||||
|
sql = """
|
||||||
|
INSERT INTO user_daily_visits (user_id, device_id, timestamp)
|
||||||
|
SELECT u.user_id, u.device_id, ?
|
||||||
|
FROM user_ips AS u
|
||||||
|
LEFT JOIN (
|
||||||
|
SELECT user_id, device_id, timestamp FROM user_daily_visits
|
||||||
|
WHERE timestamp IS ?
|
||||||
|
) udv
|
||||||
|
ON u.user_id = udv.user_id AND u.device_id=udv.device_id
|
||||||
|
WHERE last_seen > ? AND last_seen <= ? AND udv.timestamp IS NULL
|
||||||
|
"""
|
||||||
|
|
||||||
|
# This means that the day has rolled over but there could still
|
||||||
|
# be entries from the previous day. There is an edge case
|
||||||
|
# where if the user logs in at 23:59 and overwrites their
|
||||||
|
# last_seen at 00:01 then they will not be counted in the
|
||||||
|
# previous day's stats - it is important that the query is run
|
||||||
|
# often to minimise this case.
|
||||||
|
if today_start > self._last_user_visit_update:
|
||||||
|
yesterday_start = today_start - a_day_in_milliseconds
|
||||||
|
txn.execute(sql, (
|
||||||
|
yesterday_start, yesterday_start,
|
||||||
|
self._last_user_visit_update, today_start
|
||||||
|
))
|
||||||
|
self._last_user_visit_update = today_start
|
||||||
|
|
||||||
|
txn.execute(sql, (
|
||||||
|
today_start, today_start,
|
||||||
|
self._last_user_visit_update,
|
||||||
|
now
|
||||||
|
))
|
||||||
|
# Update _last_user_visit_update to now. The reason to do this
|
||||||
|
# rather just clamping to the beginning of the day is to limit
|
||||||
|
# the size of the join - meaning that the query can be run more
|
||||||
|
# frequently
|
||||||
|
self._last_user_visit_update = now
|
||||||
|
|
||||||
|
return self.runInteraction("generate_user_daily_visits",
|
||||||
|
_generate_user_daily_visits)
|
||||||
|
|
||||||
def get_users(self):
|
def get_users(self):
|
||||||
"""Function to reterive a list of users in users table.
|
"""Function to reterive a list of users in users table.
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,13 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
|
||||||
columns=["user_id", "last_seen"],
|
columns=["user_id", "last_seen"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.register_background_index_update(
|
||||||
|
"user_ips_last_seen_only_index",
|
||||||
|
index_name="user_ips_last_seen_only",
|
||||||
|
table="user_ips",
|
||||||
|
columns=["last_seen"],
|
||||||
|
)
|
||||||
|
|
||||||
# (user_id, access_token, ip) -> (user_agent, device_id, last_seen)
|
# (user_id, access_token, ip) -> (user_agent, device_id, last_seen)
|
||||||
self._batch_row_update = {}
|
self._batch_row_update = {}
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Remember to update this number every time a change is made to database
|
# Remember to update this number every time a change is made to database
|
||||||
# schema files, so the users will be informed on server restarts.
|
# schema files, so the users will be informed on server restarts.
|
||||||
SCHEMA_VERSION = 48
|
SCHEMA_VERSION = 49
|
||||||
|
|
||||||
dir_path = os.path.abspath(os.path.dirname(__file__))
|
dir_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
/* Copyright 2018 New Vector Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL,
|
||||||
|
device_id TEXT,
|
||||||
|
timestamp BIGINT NOT NULL );
|
||||||
|
CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp);
|
||||||
|
CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp);
|
|
@ -0,0 +1,17 @@
|
||||||
|
/* Copyright 2018 New Vector Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
INSERT into background_updates (update_name, progress_json)
|
||||||
|
VALUES ('user_ips_last_seen_only_index', '{}');
|
Loading…
Reference in New Issue