# -*- coding: utf-8 -*- # Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This module contains an implementation of the Katriel-Bodlaender algorithm, which is used to do online topological ordering of graphs. Note that the ordering derived from the graph is such that the source node of an edge comes before the target node of the edge, i.e. a graph of A -> B -> C would produce the ordering [A, B, C]. This ordering is therefore opposite to what one might expect when considering the room DAG, as newer messages would be added to the start rather than the end. ***The ChunkDBOrderedListStore therefore inverts the direction of edges*** See: A tight analysis of the Katriel–Bodlaender algorithm for online topological ordering Hsiao-Fei Liua and Kun-Mao Chao https://www.sciencedirect.com/science/article/pii/S0304397507006573 and: Online Topological Ordering Irit Katriel and Hans L. Bodlaender http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.78.7933 ) """ from abc import ABCMeta, abstractmethod class OrderedListStore(object): """An abstract base class that is used to store a graph and maintain a topological consistent, total ordering. Internally this uses the Katriel-Bodlaender algorithm, which requires the store expose an interface for the total ordering that supports: - Insertion of the node into the ordering either immediately before or after another node. - Deletion of the node from the ordering - Comparing the relative ordering of two arbitary nodes - Get the node immediately before or after a given node in the ordering It also needs to be able to interact with the graph in the following ways: - Query the number of edges from a node in the graph - Query the number of edges into a node in the graph - Add an edge to the graph Users of subclasses should call `add_node` and `add_edge` whenever editing the graph. The total ordering exposed will remain constant until the next call to one of these methods. Note: Calls to `add_node` and `add_edge` cannot overlap, and so callers should perform some form of locking. """ __metaclass__ = ABCMeta def add_node(self, node_id): """Adds a node to the graph. Args: node_id (str) """ self._insert_before(node_id, None) def add_edge(self, source, target): """Adds a new edge to the graph and updates the ordering. See module level docs. Note that both the source and target nodes must have been inserted into the store (at an arbitrary position) already. Args: source (str): The source node of the new edge target (str): The target node of the new edge """ # The following is the Katriel-Bodlaender algorithm. to_s = [] from_t = [] to_s_neighbours = [] from_t_neighbours = [] to_s_indegree = 0 from_t_outdegree = 0 s = source t = target while s and t and not self.is_before(s, t): m_s = to_s_indegree m_t = from_t_outdegree # These functions return a tuple where the first term is a float # that can be used to order the the list of neighbours. # These are valid until the next write pe_s = self.get_nodes_with_edges_to(s) fe_t = self.get_nodes_with_edges_from(t) l_s = len(pe_s) l_t = len(fe_t) if m_s + l_s <= m_t + l_t: to_s.append(s) to_s_neighbours.extend(pe_s) to_s_indegree += l_s if to_s_neighbours: to_s_neighbours.sort() _, s = to_s_neighbours.pop() else: s = None if m_s + l_s >= m_t + l_t: from_t.append(t) from_t_neighbours.extend(fe_t) from_t_outdegree += l_t if from_t_neighbours: from_t_neighbours.sort(reverse=True) _, t = from_t_neighbours.pop() else: t = None if s is None: s = self.get_prev(target) if t is None: t = self.get_next(source) while to_s: s1 = to_s.pop() self._delete_ordering(s1) self._insert_after(s1, s) s = s1 while from_t: t1 = from_t.pop() self._delete_ordering(t1) self._insert_before(t1, t) t = t1 self._add_edge_to_graph(source, target) @abstractmethod def is_before(self, first_node, second_node): """Returns whether the first node is before the second node. Args: first_node (str) second_node (str) Returns: bool: True if first_node is before second_node """ pass @abstractmethod def get_prev(self, node_id): """Gets the node immediately before the given node in the topological ordering. Args: node_id (str) Returns: str|None: A node ID or None if no preceding node exists """ pass @abstractmethod def get_next(self, node_id): """Gets the node immediately after the given node in the topological ordering. Args: node_id (str) Returns: str|None: A node ID or None if no proceding node exists """ pass @abstractmethod def get_nodes_with_edges_to(self, node_id): """Get all nodes with edges to the given node Args: node_id (str) Returns: list[tuple[float, str]]: Returns a list of tuple of an ordering term and the node ID. The ordering term can be used to sort the returned list. The ordering is valid until subsequent calls to `add_edge` functions """ pass @abstractmethod def get_nodes_with_edges_from(self, node_id): """Get all nodes with edges from the given node Args: node_id (str) Returns: list[tuple[float, str]]: Returns a list of tuple of an ordering term and the node ID. The ordering term can be used to sort the returned list. The ordering is valid until subsequent calls to `add_edge` functions """ pass @abstractmethod def _insert_before(self, node_id, target_id): """Inserts node immediately before target node. If target_id is None then the node is inserted at the end of the list Args: node_id (str) target_id (str|None) """ pass @abstractmethod def _insert_after(self, node_id, target_id): """Inserts node immediately after target node. If target_id is None then the node is inserted at the start of the list Args: node_id (str) target_id (str|None) """ pass @abstractmethod def _delete_ordering(self, node_id): """Deletes the given node from the ordered list (but not the graph). Used when we want to reinsert it into a different position Args: node_id (str) """ pass @abstractmethod def _add_edge_to_graph(self, source_id, target_id): """Adds an edge to the graph from source to target. Does not update ordering. Args: source_id (str) target_id (str) """ pass class InMemoryOrderedListStore(OrderedListStore): """An in memory OrderedListStore """ def __init__(self): # The ordered list of nodes self.list = [] # Map from node to set of nodes that it references self.edges_from = {} # Map from node to set of nodes that it is referenced by self.edges_to = {} def is_before(self, first_node, second_node): return self.list.index(first_node) < self.list.index(second_node) def get_prev(self, node_id): idx = self.list.index(node_id) - 1 if idx >= 0: return self.list[idx] else: return None def get_next(self, node_id): idx = self.list.index(node_id) + 1 if idx < len(self.list): return self.list[idx] else: return None def _insert_before(self, node_id, target_id): if target_id is not None: idx = self.list.index(target_id) self.list.insert(idx, node_id) else: self.list.append(node_id) def _insert_after(self, node_id, target_id): if target_id is not None: idx = self.list.index(target_id) + 1 self.list.insert(idx, node_id) else: self.list.insert(0, node_id) def _delete_ordering(self, node_id): self.list.remove(node_id) def get_nodes_with_edges_to(self, node_id): to_nodes = self.edges_to.get(node_id, []) return [(self.list.index(nid), nid) for nid in to_nodes] def get_nodes_with_edges_from(self, node_id): from_nodes = self.edges_from.get(node_id, []) return [(self.list.index(nid), nid) for nid in from_nodes] def _add_edge_to_graph(self, source_id, target_id): self.edges_from.setdefault(source_id, set()).add(target_id) self.edges_to.setdefault(target_id, set()).add(source_id)