Source code for graph

"""
Module graph
============
A module containing Graph class.
We assume that every graph is simple, i.e.,
there is no loops or multi edges.
"""
from collections import deque
import math


[docs]class Graph(object): def __init__(self, graph_dict={}): """ initializes a graph object """ self.__graph_dict = graph_dict
[docs] def vertices(self): """ :returns: the vertices of a graph """ return list(self.__graph_dict.keys())
[docs] def number_of_vertices(self): """ :return: returns the number of vertices """ return len(self.__graph_dict.keys())
[docs] def number_of_edges(self): """ :return: returns the number of edges """ return len(self.edges())
[docs] def edges(self): """ :return: a list of tuples representing the edges of the graph """ return self.__generate_edges()
[docs] def add_vertex(self, vertex): """ If vertex is not in self.__graph_dict, a key "vertex" with an empty list as a value is added to the dictionary. Otherwise nothing has to be done. :return: True if the vertex is not present in the graph, otherwise false. """ if vertex not in self.__graph_dict.keys(): self.__graph_dict[vertex] = [] return True else: return False
[docs] def is_vertex_present(self, vertex): """ Checks if the vertex is already created. Returns True if it is otherwise False :param vertex: name of the vertex :return: true if vertex is in the graph, false otherwise """ if vertex in self.__graph_dict.keys(): return True else: return False
[docs] def in_neighbourhood(self, u, v): """ :param u: a vertex of the graph :param v: a vertex of the graph :return: Returns True if v is in the neighborhood of u. It raises KeyError if u is not present in the graph. """ if u not in self.vertices(): raise KeyError("Vertex is not present in the graph") u_neighbors = self.get_neighbors(u) return v in u_neighbors
[docs] def update_neighbors(self, u, new_neighbors): """ Updates the list of vertices of a vertex. :param u: Vertex whose neighborhood is updated :param new_neighbors: (list) New neighbors of u :return: None """ self.__graph_dict[u] = new_neighbors
[docs] def add_edge(self, edge): """ Add an edge to graph. Assumes that edge is of type set, tuple or list. No loops or multiple edges. :return: Returns true if edge is not present, otherwise false.""" u, v = edge return_type = False if not self.is_vertex_present(u): self.add_vertex(u) if not self.is_vertex_present(v): self.add_vertex(v) if v not in self.__graph_dict[u]: return_type = True self.__graph_dict[u].append(v) if u not in self.__graph_dict[v]: self.__graph_dict[v].append(u) return return_type
[docs] def add_edges(self, edges): """ Adds a list of edges to graph. :param edges: a list of edges to be added :return: void """ for edge in edges: self.add_edge(edge)
def __generate_edges(self): """ A static method generating the edges of the graph "graph". Edges \ are represented as sets two vertices, with no loops. :return: a list of tuples representing the edges """ edges = set() for u in self.vertices(): new_edges = [(u, v) for v in self.get_neighbors(u) if (u, v) not in edges and (v,u) not in edges] edges.update(new_edges) return list(edges)
[docs] def vertex_degree(self, vertex): """ Returns the degree of the vertex. :param vertex: vertex for which degree is computed :return: return the degree if the vertex is present, otherwise raises keyerror """ if self.is_vertex_present(vertex): return len(self.__graph_dict[vertex]) else: raise KeyError("Vertex is not in the graph")
[docs] def vertices_degree_list(self): """ :return: returns a list of degrees of vertices """ degrees = [self.vertex_degree(u) for u in self.vertices()] return degrees
[docs] def find_isolated_vertices(self): """ :return: a list of vertices of degree 0. """ isolated_vertices = [u for u in self.vertices() if self.vertex_degree(u) == 0] return isolated_vertices
[docs] def density(self): """ Density is ratio of number of edges and number of edges that complete graph has with the same number of vertices. :return: returns the density of the graph """ num_of_edges = self.number_of_edges() num_of_vertices = self.number_of_vertices() poss_num_of_edges = num_of_vertices * (num_of_vertices - 1) / 2 if poss_num_of_edges == 0: return 0 return float(num_of_edges) / float(poss_num_of_edges)
[docs] def degree_sequence(self): """ Degree sequence is a list of all degrees in non-increasing order. :return: a degree sequence of the graph """ degree_seq = sorted(self.vertices_degree_list(), reverse=True) return degree_seq
[docs] def erdos_gallai(self, degree_seq=None): """ Erdos Gallai theorem states that a non-increasing sequence of n numbers d_i is a degree sequence of a simple graph if and only if the sum of sequence is even and the condition $\sum_{i=1}^{k}{d_i} \leq k(k-1) + \sum_{i=k+1}^n \min(d_i, k) \text{ for all } k$ This can be implemented in time $O(n)$. However, the current implementation requires O(n^2) time. :param degree_seq: If it is None, then the degree sequence of the graph is used. :return: True if the degree sequence satisfies both conditions and False otherwise """ if degree_seq is None: degree_seq = self.degree_sequence() num_vertices = self.number_of_vertices() else: num_vertices = len(degree_seq) degree_seq = sorted(degree_seq, reverse=True) # first condition is satisfied by default if sum(degree_seq) % 2 != 0: return False # second condition def compute_right_sum(k): elements = [min(d, k + 1) for d in degree_seq[k + 1:]] return sum(elements) left_sum = 0 for k in range(num_vertices): left_sum = left_sum + degree_seq[k] right_sum = compute_right_sum(k) if left_sum > k * (k + 1) + right_sum: return False return True
[docs] def global_clustering_coefficient(self): """ Global clustering coefficient is the ratio of number of triangles and number of connected triplets of vertices :return: float, clustering coefficient of the graph """ num_closed_triplets = 0 num_conn_triplets = 0 for u in self.vertices(): # center it at vertex u tmp_conn_trplets = 0 tmp_closed_triplets = 0 # check if there is a closed or conn # triplet centered at u u_neighbors = self.get_neighbors(u) for i, v in enumerate(u_neighbors): for w in u_neighbors[i + 1:]: if self.in_neighbourhood(v, w): # this is a closed triplet tmp_closed_triplets += 1 num_conn_triplets += 1 num_closed_triplets += tmp_closed_triplets num_conn_triplets += tmp_conn_trplets if num_conn_triplets == 0: return 0.0 return float(num_closed_triplets) / float(num_conn_triplets)
[docs] def get_neighbors(self, u): """ :param u: vertex in the graph :return: a list of neighbors of u """ if not self.is_vertex_present(u): raise KeyError("Vertex is not in the graph") else: return self.__graph_dict[u]
[docs] def connected_components(self): """ Computes connected components. :return: a list of lists, each containing vertices from the same conn component. """ n = self.number_of_vertices() all_comps = [] # # keep all unvistied nodes in a set all_nodes = set(self.vertices()) while len(all_nodes) > 0: # while there is an unvisited node u = all_nodes.pop() curr_component = {u} # all nodes belonging to this component bfs_queue = {u} # use set (our favourite data struct) as a bfs queue while len(bfs_queue) > 0: # while there is a node in the queue u = bfs_queue.pop() # get all my neighbors # keep it in a set in order to make operations faster u_neighbors = set(self.get_neighbors(u)) # remove those vertices I already saw u_neighbors.difference_update(curr_component) # update current component curr_component.update(u_neighbors) # update set of visited nodes all_nodes.difference_update(u_neighbors) # update queue bfs_queue.update(u_neighbors) all_comps.append(list(curr_component)) return all_comps
[docs] def connected_component(self): """ Computes the number of connected components and the size of each componenent :return: a pair: thi first entry is the number of components, the second is a list constaining sizes of components """ components = self.connected_components() number_of_comps = len(components) sizes = [] for i, comp in enumerate(components): sizes.append(len(comp)) return number_of_comps, sizes
[docs] def shortest_path(self, u=None, v=None): """ Computes the shortest path between u and v. If both of them are None, then computes the shorthest path between all pair of vertices. If u is not None and v is None, then it computes the shortest path between u and all other vertices. Finally, if both of them are not None, it computes the shorthest path between u and v. :param u: the source vertex :param v: the end vertex :return: the shortest path """ # now we need a real queue :( if u is None and v is None: return {u: self.shortest_path(u=u) for u in self.vertices()} dist, arg_u, arg_v = {}, u, v u = u if u is not None else v # love python queue = deque([u]) # distance to me is 0 dist[u] = 0 while len(queue) > 0: # take the best so far u = queue.popleft() u_neighbors = self.get_neighbors(u) updated = {v: dist[u] + 1 for v in u_neighbors if v not in dist.keys()} # append the queue queue.extend(updated.keys()) # update distances dist.update(updated) # nodes which are not in the list are not in the # same component as u non_reachable = {v: float("inf") for v in self.vertices() if v not in dist.keys()} # update for non_reachable dist.update(non_reachable) if arg_u is not None and arg_v is not None: return dist[arg_v] else: return dist
[docs] def diameter(self): """ Computes the diameter of the graph. :return: The maximum of distances between any pair of nodes or infinity if the graph is not connected """ dist = self.shortest_path() # a temporary function to compute the largest distance # starting at u def u_diameter(u): values = dist[u].values() return 0 if len(values) == 0 else max(values) # compute the largest distances from every vertex comp_diams = [u_diameter(u) for u in self.vertices()] # finally compute the diameter diam = 0 if len(comp_diams) == 0 else max(comp_diams) return diam
[docs] def biggest_component_diameter(self): """ :return: the diameter of the biggest component """ all_components = self.connected_components() # get components and their sizes component_sizes = [(len(comp), i) for i, comp in enumerate(all_components)] if len(component_sizes) == 0: return 0 size_of_largest, idx_largest = max(component_sizes) # now it is guaranteed that there is at least one vertex dist = self.shortest_path() # map every vertex from the largest component to # largest distance starting at that vertex def u_diameter(u): values = [val for val in dist[u].values() if not math.isinf(val)] return 0 if len(values) == 0 else max(values) diam_candidates = [u_diameter(u) for u in all_components[idx_largest]] return max(diam_candidates)
[docs] def spanning_tree(self): """ Computes the spanning tree of the graph using Kruskal algorithm and union - find. Complexity: $O(m\log*(n))$ :return: a list of tuples representing edges of the tree """ parent_and_rank = {node: [node, 0] for node in self.vertices()} s_tree = [] # edges of resulting tree def find_parent(x): if parent_and_rank[x][0] != x: parent_and_rank[x] = find_parent(parent_and_rank[x][0]) return parent_and_rank[x] for u, v in self.edges(): uroot = find_parent(u)[0] vroot = find_parent(v)[0] if uroot == vroot: # this edge create a cycle continue # otherwise, add this edge to the tree and join components s_tree.append((u, v)) uroot_rank = parent_and_rank[uroot][1] vroot_rank = parent_and_rank[vroot][1] if uroot_rank > vroot_rank: parent_and_rank[vroot] = [uroot, parent_and_rank[uroot][1]] elif uroot_rank < vroot_rank: parent_and_rank[uroot] = [vroot, parent_and_rank[vroot][1]] else: parent_and_rank[vroot] = [uroot, parent_and_rank[uroot][1]] parent_and_rank[uroot][1] += 1 return s_tree
[docs] def read_from_file(self, filename=None): """ Reads graph from a file. :param filename: path to the file :return: True if the graph is read successfully. If filename is not specified or it does not exist, ValueError and IOError are raised (respectively). """ if filename is None: raise ValueError("Filename is not specified") input_file = open(filename, "r") for i, str_edge in enumerate(input_file.readlines()): [u, v] = str_edge.rstrip().split('\t') self.add_edge((u, v)) input_file.close() return True
if __name__ == "__main__": G = { "a": ["c", "d", "g"], "b": ["c", "f"], "c": ["a", "b", "d", "f"], "d": ["a", "c", "e", "g"], "e": ["d"], "f": ["b", "c"], "g": ["a", "d"] } g = Graph() g.read_from_file("../test_data/graph_100n_1000m.txt") g.shortest_path(u='2') print(g.diameter())