Merge pull request #3441 from matrix-org/erikj/redo_erasure

Fix user erasure and re-enable
This commit is contained in:
Erik Johnston 2018-06-25 14:37:01 +01:00 committed by GitHub
commit 33fdcfa957
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 215 additions and 25 deletions

View File

@ -42,7 +42,7 @@ class DeactivateAccountHandler(BaseHandler):
hs.get_reactor().callWhenRunning(self._start_user_parting) hs.get_reactor().callWhenRunning(self._start_user_parting)
@defer.inlineCallbacks @defer.inlineCallbacks
def deactivate_account(self, user_id): def deactivate_account(self, user_id, erase_data):
"""Deactivate a user's account """Deactivate a user's account
Args: Args:
@ -92,6 +92,11 @@ class DeactivateAccountHandler(BaseHandler):
# delete from user directory # delete from user directory
yield self.user_directory_handler.handle_user_deactivated(user_id) yield self.user_directory_handler.handle_user_deactivated(user_id)
# Mark the user as erased, if they asked for that
if erase_data:
logger.info("Marking %s as erased", user_id)
yield self.store.mark_user_erased(user_id)
# Now start the process that goes through that list and # Now start the process that goes through that list and
# parts users from rooms (if it isn't already running) # parts users from rooms (if it isn't already running)
self._start_user_parting() self._start_user_parting()

View File

@ -495,7 +495,20 @@ class FederationHandler(BaseHandler):
for e_id, key_to_eid in event_to_state_ids.iteritems() for e_id, key_to_eid in event_to_state_ids.iteritems()
} }
erased_senders = yield self.store.are_users_erased(
e.sender for e in events,
)
def redact_disallowed(event, state): def redact_disallowed(event, state):
# if the sender has been gdpr17ed, always return a redacted
# copy of the event.
if erased_senders[event.sender]:
logger.info(
"Sender of %s has been erased, redacting",
event.event_id,
)
return prune_event(event)
if not state: if not state:
return event return event

View File

@ -23,6 +23,7 @@ from synapse.storage.roommember import RoomMemberWorkerStore
from synapse.storage.state import StateGroupWorkerStore from synapse.storage.state import StateGroupWorkerStore
from synapse.storage.stream import StreamWorkerStore from synapse.storage.stream import StreamWorkerStore
from synapse.storage.signatures import SignatureWorkerStore from synapse.storage.signatures import SignatureWorkerStore
from synapse.storage.user_erasure_store import UserErasureWorkerStore
from ._base import BaseSlavedStore from ._base import BaseSlavedStore
from ._slaved_id_tracker import SlavedIdTracker from ._slaved_id_tracker import SlavedIdTracker
@ -45,6 +46,7 @@ class SlavedEventStore(EventFederationWorkerStore,
EventsWorkerStore, EventsWorkerStore,
StateGroupWorkerStore, StateGroupWorkerStore,
SignatureWorkerStore, SignatureWorkerStore,
UserErasureWorkerStore,
BaseSlavedStore): BaseSlavedStore):
def __init__(self, db_conn, hs): def __init__(self, db_conn, hs):

View File

@ -254,7 +254,9 @@ class DeactivateAccountRestServlet(ClientV1RestServlet):
if not is_admin: if not is_admin:
raise AuthError(403, "You are not a server admin") raise AuthError(403, "You are not a server admin")
yield self._deactivate_account_handler.deactivate_account(target_user_id) yield self._deactivate_account_handler.deactivate_account(
target_user_id, False,
)
defer.returnValue((200, {})) defer.returnValue((200, {}))

View File

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015, 2016 OpenMarket Ltd # Copyright 2015, 2016 OpenMarket Ltd
# Copyright 2017 Vector Creations Ltd # Copyright 2017 Vector Creations Ltd
# Copyright 2018 New Vector Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -15,6 +16,7 @@
# limitations under the License. # limitations under the License.
import logging import logging
from six.moves import http_client
from twisted.internet import defer from twisted.internet import defer
from synapse.api.auth import has_access_token from synapse.api.auth import has_access_token
@ -186,13 +188,20 @@ class DeactivateAccountRestServlet(RestServlet):
@defer.inlineCallbacks @defer.inlineCallbacks
def on_POST(self, request): def on_POST(self, request):
body = parse_json_object_from_request(request) body = parse_json_object_from_request(request)
erase = body.get("erase", False)
if not isinstance(erase, bool):
raise SynapseError(
http_client.BAD_REQUEST,
"Param 'erase' must be a boolean, if given",
Codes.BAD_JSON,
)
requester = yield self.auth.get_user_by_req(request) requester = yield self.auth.get_user_by_req(request)
# allow ASes to dectivate their own users # allow ASes to dectivate their own users
if requester.app_service: if requester.app_service:
yield self._deactivate_account_handler.deactivate_account( yield self._deactivate_account_handler.deactivate_account(
requester.user.to_string() requester.user.to_string(), erase,
) )
defer.returnValue((200, {})) defer.returnValue((200, {}))
@ -200,7 +209,7 @@ class DeactivateAccountRestServlet(RestServlet):
requester, body, self.hs.get_ip_from_request(request), requester, body, self.hs.get_ip_from_request(request),
) )
yield self._deactivate_account_handler.deactivate_account( yield self._deactivate_account_handler.deactivate_account(
requester.user.to_string(), requester.user.to_string(), erase,
) )
defer.returnValue((200, {})) defer.returnValue((200, {}))

View File

@ -20,6 +20,7 @@ import time
import logging import logging
from synapse.storage.devices import DeviceStore from synapse.storage.devices import DeviceStore
from synapse.storage.user_erasure_store import UserErasureStore
from .appservice import ( from .appservice import (
ApplicationServiceStore, ApplicationServiceTransactionStore ApplicationServiceStore, ApplicationServiceTransactionStore
) )
@ -88,6 +89,7 @@ class DataStore(RoomMemberStore, RoomStore,
DeviceInboxStore, DeviceInboxStore,
UserDirectoryStore, UserDirectoryStore,
GroupServerStore, GroupServerStore,
UserErasureStore,
): ):
def __init__(self, db_conn, hs): def __init__(self, db_conn, hs):

View File

@ -0,0 +1,21 @@
/* Copyright 2018 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-- a table of users who have requested that their details be erased
CREATE TABLE erased_users (
user_id TEXT NOT NULL
);
CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id);

View File

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
# Copyright 2018 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import operator
from twisted.internet import defer
from synapse.storage._base import SQLBaseStore
from synapse.util.caches.descriptors import cachedList, cached
class UserErasureWorkerStore(SQLBaseStore):
@cached()
def is_user_erased(self, user_id):
"""
Check if the given user id has requested erasure
Args:
user_id (str): full user id to check
Returns:
Deferred[bool]: True if the user has requested erasure
"""
return self._simple_select_onecol(
table="erased_users",
keyvalues={"user_id": user_id},
retcol="1",
desc="is_user_erased",
).addCallback(operator.truth)
@cachedList(
cached_method_name="is_user_erased",
list_name="user_ids",
inlineCallbacks=True,
)
def are_users_erased(self, user_ids):
"""
Checks which users in a list have requested erasure
Args:
user_ids (iterable[str]): full user id to check
Returns:
Deferred[dict[str, bool]]:
for each user, whether the user has requested erasure.
"""
# this serves the dual purpose of (a) making sure we can do len and
# iterate it multiple times, and (b) avoiding duplicates.
user_ids = tuple(set(user_ids))
def _get_erased_users(txn):
txn.execute(
"SELECT user_id FROM erased_users WHERE user_id IN (%s)" % (
",".join("?" * len(user_ids))
),
user_ids,
)
return set(r[0] for r in txn)
erased_users = yield self.runInteraction(
"are_users_erased", _get_erased_users,
)
res = dict((u, u in erased_users) for u in user_ids)
defer.returnValue(res)
class UserErasureStore(UserErasureWorkerStore):
def mark_user_erased(self, user_id):
"""Indicate that user_id wishes their message history to be erased.
Args:
user_id (str): full user_id to be erased
"""
def f(txn):
# first check if they are already in the list
txn.execute(
"SELECT 1 FROM erased_users WHERE user_id = ?",
(user_id, )
)
if txn.fetchone():
return
# they are not already there: do the insert.
txn.execute(
"INSERT INTO erased_users (user_id) VALUES (?)",
(user_id, )
)
self._invalidate_cache_and_stream(
txn, self.is_user_erased, (user_id,)
)
return self.runInteraction("mark_user_erased", f)

View File

@ -12,15 +12,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import itertools
import logging import logging
import operator
from twisted.internet import defer from twisted.internet import defer
from synapse.api.constants import Membership, EventTypes from synapse.api.constants import EventTypes, Membership
from synapse.events.utils import prune_event
from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.logcontext import (
make_deferred_yieldable, preserve_fn,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -95,16 +97,27 @@ def filter_events_for_client(store, user_id, events, is_peeking=False,
if ignore_dict_content else [] if ignore_dict_content else []
) )
erased_senders = yield store.are_users_erased((e.sender for e in events))
def allowed(event): def allowed(event):
""" """
Args: Args:
event (synapse.events.EventBase): event to check event (synapse.events.EventBase): event to check
Returns:
None|EventBase:
None if the user cannot see this event at all
a redacted copy of the event if they can only see a redacted
version
the original event if they can see it as normal.
""" """
if not event.is_state() and event.sender in ignore_list: if not event.is_state() and event.sender in ignore_list:
return False return None
if event.event_id in always_include_ids: if event.event_id in always_include_ids:
return True return event
state = event_id_to_state[event.event_id] state = event_id_to_state[event.event_id]
@ -118,10 +131,6 @@ def filter_events_for_client(store, user_id, events, is_peeking=False,
if visibility not in VISIBILITY_PRIORITY: if visibility not in VISIBILITY_PRIORITY:
visibility = "shared" visibility = "shared"
# if it was world_readable, it's easy: everyone can read it
if visibility == "world_readable":
return True
# Always allow history visibility events on boundaries. This is done # Always allow history visibility events on boundaries. This is done
# by setting the effective visibility to the least restrictive # by setting the effective visibility to the least restrictive
# of the old vs new. # of the old vs new.
@ -155,7 +164,7 @@ def filter_events_for_client(store, user_id, events, is_peeking=False,
if membership == "leave" and ( if membership == "leave" and (
prev_membership == "join" or prev_membership == "invite" prev_membership == "join" or prev_membership == "invite"
): ):
return True return event
new_priority = MEMBERSHIP_PRIORITY.index(membership) new_priority = MEMBERSHIP_PRIORITY.index(membership)
old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) old_priority = MEMBERSHIP_PRIORITY.index(prev_membership)
@ -166,31 +175,55 @@ def filter_events_for_client(store, user_id, events, is_peeking=False,
if membership is None: if membership is None:
membership_event = state.get((EventTypes.Member, user_id), None) membership_event = state.get((EventTypes.Member, user_id), None)
if membership_event: if membership_event:
# XXX why do we do this?
# https://github.com/matrix-org/synapse/issues/3350
if membership_event.event_id not in event_id_forgotten: if membership_event.event_id not in event_id_forgotten:
membership = membership_event.membership membership = membership_event.membership
# if the user was a member of the room at the time of the event, # if the user was a member of the room at the time of the event,
# they can see it. # they can see it.
if membership == Membership.JOIN: if membership == Membership.JOIN:
return True return event
# otherwise, it depends on the room visibility.
if visibility == "joined": if visibility == "joined":
# we weren't a member at the time of the event, so we can't # we weren't a member at the time of the event, so we can't
# see this event. # see this event.
return False return None
elif visibility == "invited": elif visibility == "invited":
# user can also see the event if they were *invited* at the time # user can also see the event if they were *invited* at the time
# of the event. # of the event.
return membership == Membership.INVITE return (
event if membership == Membership.INVITE else None
)
else: elif visibility == "shared" and is_peeking:
# visibility is shared: user can also see the event if they have # if the visibility is shared, users cannot see the event unless
# become a member since the event # they have *subequently* joined the room (or were members at the
# time, of course)
# #
# XXX: if the user has subsequently joined and then left again, # XXX: if the user has subsequently joined and then left again,
# ideally we would share history up to the point they left. But # ideally we would share history up to the point they left. But
# we don't know when they left. # we don't know when they left. We just treat it as though they
return not is_peeking # never joined, and restrict access.
return None
defer.returnValue(list(filter(allowed, events))) # the visibility is either shared or world_readable, and the user was
# not a member at the time. We allow it, provided the original sender
# has not requested their data to be erased, in which case, we return
# a redacted version.
if erased_senders[event.sender]:
return prune_event(event)
return event
# check each event: gives an iterable[None|EventBase]
filtered_events = itertools.imap(allowed, events)
# remove the None entries
filtered_events = filter(operator.truth, filtered_events)
# we turn it into a list before returning it.
defer.returnValue(list(filtered_events))