Add account data to export command (#14969)

* Add account data to to export command

* newsfile

* remove not needed function

* update newsfile

* adopt #14973
This commit is contained in:
Dirk Klimpel 2023-02-17 14:54:55 +01:00 committed by GitHub
parent 4f4f27e57f
commit 61bfcd669a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 79 additions and 16 deletions

View File

@ -0,0 +1 @@
Add account data to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.78/usage/administration/admin_faq.html#how-can-i-export-user-data).

View File

@ -71,6 +71,9 @@ output-directory
│ ├───invite_state │ ├───invite_state
│ └───knock_state │ └───knock_state
└───user_data └───user_data
├───account_data
│ ├───global
│ └───<room_id>
├───connections ├───connections
├───devices ├───devices
└───profile └───profile

View File

@ -17,7 +17,7 @@ import logging
import os import os
import sys import sys
import tempfile import tempfile
from typing import List, Optional from typing import List, Mapping, Optional
from twisted.internet import defer, task from twisted.internet import defer, task
@ -222,6 +222,19 @@ class FileExfiltrationWriter(ExfiltrationWriter):
with open(connection_file, "a") as f: with open(connection_file, "a") as f:
print(json.dumps(connection), file=f) print(json.dumps(connection), file=f)
def write_account_data(
self, file_name: str, account_data: Mapping[str, JsonDict]
) -> None:
account_data_directory = os.path.join(
self.base_directory, "user_data", "account_data"
)
os.makedirs(account_data_directory, exist_ok=True)
account_data_file = os.path.join(account_data_directory, file_name)
with open(account_data_file, "a") as f:
print(json.dumps(account_data), file=f)
def finished(self) -> str: def finished(self) -> str:
return self.base_directory return self.base_directory

View File

@ -14,7 +14,7 @@
import abc import abc
import logging import logging
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set
from synapse.api.constants import Direction, Membership from synapse.api.constants import Direction, Membership
from synapse.events import EventBase from synapse.events import EventBase
@ -29,7 +29,7 @@ logger = logging.getLogger(__name__)
class AdminHandler: class AdminHandler:
def __init__(self, hs: "HomeServer"): def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main self._store = hs.get_datastores().main
self._device_handler = hs.get_device_handler() self._device_handler = hs.get_device_handler()
self._storage_controllers = hs.get_storage_controllers() self._storage_controllers = hs.get_storage_controllers()
self._state_storage_controller = self._storage_controllers.state self._state_storage_controller = self._storage_controllers.state
@ -38,7 +38,7 @@ class AdminHandler:
async def get_whois(self, user: UserID) -> JsonDict: async def get_whois(self, user: UserID) -> JsonDict:
connections = [] connections = []
sessions = await self.store.get_user_ip_and_agents(user) sessions = await self._store.get_user_ip_and_agents(user)
for session in sessions: for session in sessions:
connections.append( connections.append(
{ {
@ -57,7 +57,7 @@ class AdminHandler:
async def get_user(self, user: UserID) -> Optional[JsonDict]: async def get_user(self, user: UserID) -> Optional[JsonDict]:
"""Function to get user details""" """Function to get user details"""
user_info_dict = await self.store.get_user_by_id(user.to_string()) user_info_dict = await self._store.get_user_by_id(user.to_string())
if user_info_dict is None: if user_info_dict is None:
return None return None
@ -89,11 +89,11 @@ class AdminHandler:
} }
# Add additional user metadata # Add additional user metadata
profile = await self.store.get_profileinfo(user.localpart) profile = await self._store.get_profileinfo(user.localpart)
threepids = await self.store.user_get_threepids(user.to_string()) threepids = await self._store.user_get_threepids(user.to_string())
external_ids = [ external_ids = [
({"auth_provider": auth_provider, "external_id": external_id}) ({"auth_provider": auth_provider, "external_id": external_id})
for auth_provider, external_id in await self.store.get_external_ids_by_user( for auth_provider, external_id in await self._store.get_external_ids_by_user(
user.to_string() user.to_string()
) )
] ]
@ -101,7 +101,7 @@ class AdminHandler:
user_info_dict["avatar_url"] = profile.avatar_url user_info_dict["avatar_url"] = profile.avatar_url
user_info_dict["threepids"] = threepids user_info_dict["threepids"] = threepids
user_info_dict["external_ids"] = external_ids user_info_dict["external_ids"] = external_ids
user_info_dict["erased"] = await self.store.is_user_erased(user.to_string()) user_info_dict["erased"] = await self._store.is_user_erased(user.to_string())
return user_info_dict return user_info_dict
@ -117,7 +117,7 @@ class AdminHandler:
The returned value is that returned by `writer.finished()`. The returned value is that returned by `writer.finished()`.
""" """
# Get all rooms the user is in or has been in # Get all rooms the user is in or has been in
rooms = await self.store.get_rooms_for_local_user_where_membership_is( rooms = await self._store.get_rooms_for_local_user_where_membership_is(
user_id, user_id,
membership_list=( membership_list=(
Membership.JOIN, Membership.JOIN,
@ -131,7 +131,7 @@ class AdminHandler:
# We only try and fetch events for rooms the user has been in. If # We only try and fetch events for rooms the user has been in. If
# they've been e.g. invited to a room without joining then we handle # they've been e.g. invited to a room without joining then we handle
# those separately. # those separately.
rooms_user_has_been_in = await self.store.get_rooms_user_has_been_in(user_id) rooms_user_has_been_in = await self._store.get_rooms_user_has_been_in(user_id)
for index, room in enumerate(rooms): for index, room in enumerate(rooms):
room_id = room.room_id room_id = room.room_id
@ -140,7 +140,7 @@ class AdminHandler:
"[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms) "[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms)
) )
forgotten = await self.store.did_forget(user_id, room_id) forgotten = await self._store.did_forget(user_id, room_id)
if forgotten: if forgotten:
logger.info("[%s] User forgot room %d, ignoring", user_id, room_id) logger.info("[%s] User forgot room %d, ignoring", user_id, room_id)
continue continue
@ -152,14 +152,14 @@ class AdminHandler:
if room.membership == Membership.INVITE: if room.membership == Membership.INVITE:
event_id = room.event_id event_id = room.event_id
invite = await self.store.get_event(event_id, allow_none=True) invite = await self._store.get_event(event_id, allow_none=True)
if invite: if invite:
invited_state = invite.unsigned["invite_room_state"] invited_state = invite.unsigned["invite_room_state"]
writer.write_invite(room_id, invite, invited_state) writer.write_invite(room_id, invite, invited_state)
if room.membership == Membership.KNOCK: if room.membership == Membership.KNOCK:
event_id = room.event_id event_id = room.event_id
knock = await self.store.get_event(event_id, allow_none=True) knock = await self._store.get_event(event_id, allow_none=True)
if knock: if knock:
knock_state = knock.unsigned["knock_room_state"] knock_state = knock.unsigned["knock_room_state"]
writer.write_knock(room_id, knock, knock_state) writer.write_knock(room_id, knock, knock_state)
@ -170,7 +170,7 @@ class AdminHandler:
# were joined. We estimate that point by looking at the # were joined. We estimate that point by looking at the
# stream_ordering of the last membership if it wasn't a join. # stream_ordering of the last membership if it wasn't a join.
if room.membership == Membership.JOIN: if room.membership == Membership.JOIN:
stream_ordering = self.store.get_room_max_stream_ordering() stream_ordering = self._store.get_room_max_stream_ordering()
else: else:
stream_ordering = room.stream_ordering stream_ordering = room.stream_ordering
@ -197,7 +197,7 @@ class AdminHandler:
# events that we have and then filtering, this isn't the most # events that we have and then filtering, this isn't the most
# efficient method perhaps but it does guarantee we get everything. # efficient method perhaps but it does guarantee we get everything.
while True: while True:
events, _ = await self.store.paginate_room_events( events, _ = await self._store.paginate_room_events(
room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS
) )
if not events: if not events:
@ -263,6 +263,13 @@ class AdminHandler:
connections["devices"][""]["sessions"][0]["connections"] connections["devices"][""]["sessions"][0]["connections"]
) )
# Get all account data the user has global and in rooms
global_data = await self._store.get_global_account_data_for_user(user_id)
by_room_data = await self._store.get_room_account_data_for_user(user_id)
writer.write_account_data("global", global_data)
for room_id in by_room_data:
writer.write_account_data(room_id, by_room_data[room_id])
return writer.finished() return writer.finished()
@ -340,6 +347,18 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
""" """
raise NotImplementedError() raise NotImplementedError()
@abc.abstractmethod
def write_account_data(
self, file_name: str, account_data: Mapping[str, JsonDict]
) -> None:
"""Write the account data of a user.
Args:
file_name: file name to write data
account_data: mapping of global or room account_data
"""
raise NotImplementedError()
@abc.abstractmethod @abc.abstractmethod
def finished(self) -> Any: def finished(self) -> Any:
"""Called when all data has successfully been exported and written. """Called when all data has successfully been exported and written.

View File

@ -296,3 +296,30 @@ class ExfiltrateData(unittest.HomeserverTestCase):
self.assertEqual(args[0][0]["user_agent"], "user_agent") self.assertEqual(args[0][0]["user_agent"], "user_agent")
self.assertGreater(args[0][0]["last_seen"], 0) self.assertGreater(args[0][0]["last_seen"], 0)
self.assertNotIn("access_token", args[0][0]) self.assertNotIn("access_token", args[0][0])
def test_account_data(self) -> None:
"""Tests that user account data get exported."""
# add account data
self.get_success(
self._store.add_account_data_for_user(self.user2, "m.global", {"a": 1})
)
self.get_success(
self._store.add_account_data_to_room(
self.user2, "test_room", "m.per_room", {"b": 2}
)
)
writer = Mock()
self.get_success(self.admin_handler.export_user_data(self.user2, writer))
# two calls, one call for user data and one call for room data
writer.write_account_data.assert_called()
args = writer.write_account_data.call_args_list[0][0]
self.assertEqual(args[0], "global")
self.assertEqual(args[1]["m.global"]["a"], 1)
args = writer.write_account_data.call_args_list[1][0]
self.assertEqual(args[0], "test_room")
self.assertEqual(args[1]["m.per_room"]["b"], 2)