diff --git a/synapse/storage/chunk_ordered_table.py b/synapse/storage/chunk_ordered_table.py index 33089c2c60..442d82cbb2 100644 --- a/synapse/storage/chunk_ordered_table.py +++ b/synapse/storage/chunk_ordered_table.py @@ -33,13 +33,18 @@ class ChunkDBOrderedListStore(OrderedListStore): """Used as the list store for room chunks, efficiently maintaining them in topological order on updates. + A room chunk is a connected portion of the room events DAG. As such it + inherits a DAG, i.e. if an event in one chunk references an event in a + second chunk, then we say that the first chunk references the second, and + thus forming a DAG. + The class is designed for use inside transactions and so takes a transaction object in the constructor. This means that it needs to be re-instantiated in each transaction, so all state needs to be stored in the database. Internally the ordering is implemented using floats, and the average is - taken when a node is inserted inbetween other nodes. To avoid presicion + taken when a node is inserted between other nodes. To avoid precision errors a minimum difference between sucessive orderings is attempted to be kept; whenever the difference is too small we attempt to rebalance. See the `_rebalance` function for implementation details. @@ -51,6 +56,10 @@ class ChunkDBOrderedListStore(OrderedListStore): edge is from B to A. This ensures that newer chunks get inserted at the end (rather than the start). + Note: Calls to `add_node` and `add_edge` cannot overlap for the same room, + and so callers should perform some form of per-room locking when using + this class. + Args: txn room_id (str) @@ -59,7 +68,7 @@ class ChunkDBOrderedListStore(OrderedListStore): in a range around the node, where the bounds are rounded to this number of digits. min_difference (int): A rebalance is triggered when the difference - between two successive orderings are less than the reverse of + between two successive orderings is less than the reciprocal of this. """ def __init__(self, diff --git a/synapse/util/katriel_bodlaender.py b/synapse/util/katriel_bodlaender.py index b924a4cfdf..887a6b4681 100644 --- a/synapse/util/katriel_bodlaender.py +++ b/synapse/util/katriel_bodlaender.py @@ -16,8 +16,9 @@ """This module contains an implementation of the Katriel-Bodlaender algorithm, which is used to do online topological ordering of graphs. -Note that the ordering derived from the graph has the first node one with no -incoming edges at the start, and the last node one with no outgoing edges. +Note that the ordering derived from the graph is such that the source node of +an edge comes before the target node of the edge, i.e. a graph of A -> B -> C +would produce the ordering [A, B, C]. This ordering is therefore opposite to what one might expect when considering the room DAG, as newer messages would be added to the start rather than the @@ -25,15 +26,46 @@ end. ***We therefore invert the direction of edges when using the algorithm*** -See https://www.sciencedirect.com/science/article/pii/S0304397507006573 +See: + A tight analysis of the Katriel–Bodlaender algorithm for online topological + ordering + Hsiao-Fei Liua and Kun-Mao Chao + https://www.sciencedirect.com/science/article/pii/S0304397507006573 +and: + Online Topological Ordering + Irit Katriel and Hans L. Bodlaender + http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.78.7933 ) """ from abc import ABCMeta, abstractmethod class OrderedListStore(object): - """An abstract base class that is used to store a topological ordering of - a graph. Suitable for use with the Katriel-Bodlaender algorithm. + """An abstract base class that is used to store a graph and maintain a + topological consistent, total ordering. + + Internally this uses the Katriel-Bodlaender algorithm, which requires the + store expose an interface for the total ordering that supports: + + - Insertion of the node into the ordering either immediately before or + after another node. + - Deletion of the node from the ordering + - Comparing the relative ordering of two arbitary nodes + - Get the node immediately before or after a given node in the ordering + + It also needs to be able to interact with the graph in the following ways: + + - Query the number of edges from a node in the graph + - Query the number of edges into a node in the graph + - Add an edge to the graph + + + Users of subclasses should call `add_node` and `add_edge` whenever editing + the graph. The total ordering exposed will remain constant until the next + call to one of these methods. + + Note: Calls to `add_node` and `add_edge` cannot overlap, and so callers + should perform some form of locking. """ __metaclass__ = ABCMeta @@ -53,7 +85,8 @@ class OrderedListStore(object): @abstractmethod def get_prev(self, node_id): - """Gets the node immediately before the given node + """Gets the node immediately before the given node in the topological + ordering. Args: node_id (str) @@ -65,7 +98,8 @@ class OrderedListStore(object): @abstractmethod def get_next(self, node_id): - """Gets the node immediately after the given node + """Gets the node immediately after the given node in the topological + ordering. Args: node_id (str) @@ -125,7 +159,8 @@ class OrderedListStore(object): list[tuple[float, str]]: Returns a list of tuple of an ordering term and the node ID. The ordering term can be used to sort the returned list. - The ordering is valid until subsequent calls to insert_* functions + The ordering is valid until subsequent calls to `add_edge` + functions """ pass diff --git a/tests/storage/test_chunk_linearizer_table.py b/tests/storage/test_chunk_linearizer_table.py index 5ca436c555..9890dffd60 100644 --- a/tests/storage/test_chunk_linearizer_table.py +++ b/tests/storage/test_chunk_linearizer_table.py @@ -23,6 +23,10 @@ from synapse.storage.chunk_ordered_table import ChunkDBOrderedListStore class ChunkLinearizerStoreTestCase(tests.unittest.TestCase): + """Tests to ensure that the ordering and rebalancing functions of + ChunkDBOrderedListStore work as expected. + """ + def __init__(self, *args, **kwargs): super(ChunkLinearizerStoreTestCase, self).__init__(*args, **kwargs)