synapse-old/synapse/util/katriel_bodlaender.py

338 lines
9.9 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# Copyright 2018 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This module contains an implementation of the Katriel-Bodlaender algorithm,
which is used to do online topological ordering of graphs.
2018-05-17 08:08:50 -06:00
Note that the ordering derived from the graph is such that the source node of
an edge comes before the target node of the edge, i.e. a graph of A -> B -> C
would produce the ordering [A, B, C].
This ordering is therefore opposite to what one might expect when considering
the room DAG, as newer messages would be added to the start rather than the
end.
2018-05-17 10:07:08 -06:00
***The ChunkDBOrderedListStore therefore inverts the direction of edges***
2018-05-17 08:08:50 -06:00
See:
A tight analysis of the KatrielBodlaender algorithm for online topological
ordering
Hsiao-Fei Liua and Kun-Mao Chao
https://www.sciencedirect.com/science/article/pii/S0304397507006573
and:
Online Topological Ordering
Irit Katriel and Hans L. Bodlaender
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.78.7933 )
"""
from abc import ABCMeta, abstractmethod
class OrderedListStore(object):
2018-05-17 08:08:50 -06:00
"""An abstract base class that is used to store a graph and maintain a
topological consistent, total ordering.
Internally this uses the Katriel-Bodlaender algorithm, which requires the
store expose an interface for the total ordering that supports:
- Insertion of the node into the ordering either immediately before or
after another node.
- Deletion of the node from the ordering
- Comparing the relative ordering of two arbitary nodes
- Get the node immediately before or after a given node in the ordering
It also needs to be able to interact with the graph in the following ways:
- Query the number of edges from a node in the graph
- Query the number of edges into a node in the graph
- Add an edge to the graph
Users of subclasses should call `add_node` and `add_edge` whenever editing
the graph. The total ordering exposed will remain constant until the next
call to one of these methods.
Note: Calls to `add_node` and `add_edge` cannot overlap, and so callers
should perform some form of locking.
"""
__metaclass__ = ABCMeta
def add_node(self, node_id):
"""Adds a node to the graph.
Args:
node_id (str)
"""
self._insert_before(node_id, None)
def add_edge(self, source, target):
2018-05-17 10:07:08 -06:00
"""Adds a new edge to the graph and updates the ordering.
See module level docs.
Note that both the source and target nodes must have been inserted into
the store (at an arbitrary position) already.
Args:
source (str): The source node of the new edge
target (str): The target node of the new edge
"""
# The following is the Katriel-Bodlaender algorithm.
to_s = []
from_t = []
to_s_neighbours = []
from_t_neighbours = []
to_s_indegree = 0
from_t_outdegree = 0
s = source
t = target
while s and t and not self.is_before(s, t):
m_s = to_s_indegree
m_t = from_t_outdegree
# These functions return a tuple where the first term is a float
# that can be used to order the the list of neighbours.
# These are valid until the next write
pe_s = self.get_nodes_with_edges_to(s)
fe_t = self.get_nodes_with_edges_from(t)
l_s = len(pe_s)
l_t = len(fe_t)
if m_s + l_s <= m_t + l_t:
to_s.append(s)
to_s_neighbours.extend(pe_s)
to_s_indegree += l_s
if to_s_neighbours:
to_s_neighbours.sort()
_, s = to_s_neighbours.pop()
else:
s = None
if m_s + l_s >= m_t + l_t:
from_t.append(t)
from_t_neighbours.extend(fe_t)
from_t_outdegree += l_t
if from_t_neighbours:
from_t_neighbours.sort(reverse=True)
_, t = from_t_neighbours.pop()
else:
t = None
if s is None:
s = self.get_prev(target)
if t is None:
t = self.get_next(source)
while to_s:
s1 = to_s.pop()
self._delete_ordering(s1)
self._insert_after(s1, s)
s = s1
while from_t:
t1 = from_t.pop()
self._delete_ordering(t1)
self._insert_before(t1, t)
t = t1
self._add_edge_to_graph(source, target)
@abstractmethod
def is_before(self, first_node, second_node):
"""Returns whether the first node is before the second node.
Args:
first_node (str)
second_node (str)
Returns:
bool: True if first_node is before second_node
"""
pass
@abstractmethod
def get_prev(self, node_id):
2018-05-17 08:08:50 -06:00
"""Gets the node immediately before the given node in the topological
ordering.
Args:
node_id (str)
Returns:
str|None: A node ID or None if no preceding node exists
"""
pass
@abstractmethod
def get_next(self, node_id):
2018-05-17 08:08:50 -06:00
"""Gets the node immediately after the given node in the topological
ordering.
Args:
node_id (str)
Returns:
str|None: A node ID or None if no proceding node exists
"""
pass
@abstractmethod
def get_nodes_with_edges_to(self, node_id):
"""Get all nodes with edges to the given node
Args:
node_id (str)
Returns:
list[tuple[float, str]]: Returns a list of tuple of an ordering
term and the node ID. The ordering term can be used to sort the
returned list.
The ordering is valid until subsequent calls to `add_edge`
functions
"""
pass
@abstractmethod
def get_nodes_with_edges_from(self, node_id):
"""Get all nodes with edges from the given node
Args:
node_id (str)
Returns:
list[tuple[float, str]]: Returns a list of tuple of an ordering
term and the node ID. The ordering term can be used to sort the
returned list.
2018-05-17 08:08:50 -06:00
The ordering is valid until subsequent calls to `add_edge`
functions
"""
pass
@abstractmethod
def _insert_before(self, node_id, target_id):
"""Inserts node immediately before target node.
If target_id is None then the node is inserted at the end of the list
Args:
node_id (str)
target_id (str|None)
"""
pass
@abstractmethod
def _insert_after(self, node_id, target_id):
"""Inserts node immediately after target node.
If target_id is None then the node is inserted at the start of the list
Args:
node_id (str)
target_id (str|None)
"""
pass
@abstractmethod
def _delete_ordering(self, node_id):
"""Deletes the given node from the ordered list (but not the graph).
Used when we want to reinsert it into a different position
Args:
node_id (str)
"""
pass
@abstractmethod
def _add_edge_to_graph(self, source_id, target_id):
"""Adds an edge to the graph from source to target.
Does not update ordering.
Args:
source_id (str)
target_id (str)
"""
pass
class InMemoryOrderedListStore(OrderedListStore):
"""An in memory OrderedListStore
"""
def __init__(self):
# The ordered list of nodes
self.list = []
# Map from node to set of nodes that it references
self.edges_from = {}
# Map from node to set of nodes that it is referenced by
self.edges_to = {}
def is_before(self, first_node, second_node):
return self.list.index(first_node) < self.list.index(second_node)
def get_prev(self, node_id):
idx = self.list.index(node_id) - 1
if idx >= 0:
return self.list[idx]
else:
return None
def get_next(self, node_id):
idx = self.list.index(node_id) + 1
if idx < len(self.list):
return self.list[idx]
else:
return None
def _insert_before(self, node_id, target_id):
if target_id is not None:
idx = self.list.index(target_id)
self.list.insert(idx, node_id)
else:
self.list.append(node_id)
def _insert_after(self, node_id, target_id):
if target_id is not None:
idx = self.list.index(target_id) + 1
self.list.insert(idx, node_id)
else:
self.list.insert(0, node_id)
def _delete_ordering(self, node_id):
self.list.remove(node_id)
def get_nodes_with_edges_to(self, node_id):
to_nodes = self.edges_to.get(node_id, [])
return [(self.list.index(nid), nid) for nid in to_nodes]
def get_nodes_with_edges_from(self, node_id):
from_nodes = self.edges_from.get(node_id, [])
return [(self.list.index(nid), nid) for nid in from_nodes]
def _add_edge_to_graph(self, source_id, target_id):
self.edges_from.setdefault(source_id, set()).add(target_id)
self.edges_to.setdefault(target_id, set()).add(source_id)