"""
Module graph
============
A module containing Graph class.
We assume that every graph is simple, i.e.,
there is no loops or multi edges.
"""
from collections import deque
import math
[docs]class Graph(object):
def __init__(self, graph_dict={}):
"""
initializes a graph object
"""
self.__graph_dict = graph_dict
[docs] def vertices(self):
"""
:returns: the vertices of a graph
"""
return list(self.__graph_dict.keys())
[docs] def number_of_vertices(self):
"""
:return: returns the number of vertices
"""
return len(self.__graph_dict.keys())
[docs] def number_of_edges(self):
"""
:return: returns the number of edges
"""
return len(self.edges())
[docs] def edges(self):
"""
:return: a list of tuples representing the edges of the graph
"""
return self.__generate_edges()
[docs] def add_vertex(self, vertex):
"""
If vertex is not in self.__graph_dict, a key "vertex" with an empty list
as a value is added to the dictionary. Otherwise nothing has to be done.
:return: True if the vertex is not present in the graph, otherwise false.
"""
if vertex not in self.__graph_dict.keys():
self.__graph_dict[vertex] = []
return True
else:
return False
[docs] def is_vertex_present(self, vertex):
"""
Checks if the vertex is already created. Returns True if it is otherwise False
:param vertex: name of the vertex
:return: true if vertex is in the graph, false otherwise
"""
if vertex in self.__graph_dict.keys():
return True
else:
return False
[docs] def in_neighbourhood(self, u, v):
"""
:param u: a vertex of the graph
:param v: a vertex of the graph
:return: Returns True if v is in the neighborhood of u.
It raises KeyError if u is not present in the graph.
"""
if u not in self.vertices():
raise KeyError("Vertex is not present in the graph")
u_neighbors = self.get_neighbors(u)
return v in u_neighbors
[docs] def update_neighbors(self, u, new_neighbors):
"""
Updates the list of vertices of a vertex.
:param u: Vertex whose neighborhood is updated
:param new_neighbors: (list) New neighbors of u
:return: None
"""
self.__graph_dict[u] = new_neighbors
[docs] def add_edge(self, edge):
"""
Add an edge to graph. Assumes that edge is of type set, tuple or list.
No loops or multiple edges.
:return: Returns true if edge is not present, otherwise false."""
u, v = edge
return_type = False
if not self.is_vertex_present(u):
self.add_vertex(u)
if not self.is_vertex_present(v):
self.add_vertex(v)
if v not in self.__graph_dict[u]:
return_type = True
self.__graph_dict[u].append(v)
if u not in self.__graph_dict[v]:
self.__graph_dict[v].append(u)
return return_type
[docs] def add_edges(self, edges):
"""
Adds a list of edges to graph.
:param edges: a list of edges to be added
:return: void
"""
for edge in edges:
self.add_edge(edge)
def __generate_edges(self):
"""
A static method generating the edges of the graph "graph". Edges \
are represented as sets two vertices, with no loops.
:return: a list of tuples representing the edges
"""
edges = set()
for u in self.vertices():
new_edges = [(u, v) for v in self.get_neighbors(u) if (u, v) not in edges and (v,u) not in edges]
edges.update(new_edges)
return list(edges)
[docs] def vertex_degree(self, vertex):
"""
Returns the degree of the vertex.
:param vertex: vertex for which degree is computed
:return: return the degree if the vertex is present, otherwise raises keyerror
"""
if self.is_vertex_present(vertex):
return len(self.__graph_dict[vertex])
else:
raise KeyError("Vertex is not in the graph")
[docs] def vertices_degree_list(self):
"""
:return: returns a list of degrees of vertices
"""
degrees = [self.vertex_degree(u) for u in self.vertices()]
return degrees
[docs] def find_isolated_vertices(self):
"""
:return: a list of vertices of degree 0.
"""
isolated_vertices = [u for u in self.vertices() if self.vertex_degree(u) == 0]
return isolated_vertices
[docs] def density(self):
"""
Density is ratio of number of edges and number of edges that
complete graph has with the same number of vertices.
:return: returns the density of the graph
"""
num_of_edges = self.number_of_edges()
num_of_vertices = self.number_of_vertices()
poss_num_of_edges = num_of_vertices * (num_of_vertices - 1) / 2
if poss_num_of_edges == 0:
return 0
return float(num_of_edges) / float(poss_num_of_edges)
[docs] def degree_sequence(self):
"""
Degree sequence is a list of all degrees in non-increasing order.
:return: a degree sequence of the graph
"""
degree_seq = sorted(self.vertices_degree_list(), reverse=True)
return degree_seq
[docs] def erdos_gallai(self, degree_seq=None):
"""
Erdos Gallai theorem states that a non-increasing
sequence of n numbers d_i is a degree sequence of a simple graph
if and only if the sum of sequence is even and the condition
$\sum_{i=1}^{k}{d_i} \leq k(k-1) + \sum_{i=k+1}^n \min(d_i, k) \text{ for all } k$
This can be implemented in time $O(n)$. However, the current implementation
requires O(n^2) time.
:param degree_seq: If it is None, then the degree sequence of the graph is used.
:return: True if the degree sequence satisfies both conditions and False otherwise
"""
if degree_seq is None:
degree_seq = self.degree_sequence()
num_vertices = self.number_of_vertices()
else:
num_vertices = len(degree_seq)
degree_seq = sorted(degree_seq, reverse=True)
# first condition is satisfied by default
if sum(degree_seq) % 2 != 0:
return False
# second condition
def compute_right_sum(k):
elements = [min(d, k + 1) for d in degree_seq[k + 1:]]
return sum(elements)
left_sum = 0
for k in range(num_vertices):
left_sum = left_sum + degree_seq[k]
right_sum = compute_right_sum(k)
if left_sum > k * (k + 1) + right_sum:
return False
return True
[docs] def global_clustering_coefficient(self):
"""
Global clustering coefficient is the ratio of number of triangles and
number of connected triplets of vertices
:return: float, clustering coefficient of the graph
"""
num_closed_triplets = 0
num_conn_triplets = 0
for u in self.vertices():
# center it at vertex u
tmp_conn_trplets = 0
tmp_closed_triplets = 0
# check if there is a closed or conn
# triplet centered at u
u_neighbors = self.get_neighbors(u)
for i, v in enumerate(u_neighbors):
for w in u_neighbors[i + 1:]:
if self.in_neighbourhood(v, w):
# this is a closed triplet
tmp_closed_triplets += 1
num_conn_triplets += 1
num_closed_triplets += tmp_closed_triplets
num_conn_triplets += tmp_conn_trplets
if num_conn_triplets == 0:
return 0.0
return float(num_closed_triplets) / float(num_conn_triplets)
[docs] def get_neighbors(self, u):
"""
:param u: vertex in the graph
:return: a list of neighbors of u
"""
if not self.is_vertex_present(u):
raise KeyError("Vertex is not in the graph")
else:
return self.__graph_dict[u]
[docs] def connected_components(self):
"""
Computes connected components.
:return: a list of lists, each containing vertices from the same conn component.
"""
n = self.number_of_vertices()
all_comps = [] #
# keep all unvistied nodes in a set
all_nodes = set(self.vertices())
while len(all_nodes) > 0: # while there is an unvisited node
u = all_nodes.pop()
curr_component = {u} # all nodes belonging to this component
bfs_queue = {u} # use set (our favourite data struct) as a bfs queue
while len(bfs_queue) > 0: # while there is a node in the queue
u = bfs_queue.pop()
# get all my neighbors
# keep it in a set in order to make operations faster
u_neighbors = set(self.get_neighbors(u))
# remove those vertices I already saw
u_neighbors.difference_update(curr_component)
# update current component
curr_component.update(u_neighbors)
# update set of visited nodes
all_nodes.difference_update(u_neighbors)
# update queue
bfs_queue.update(u_neighbors)
all_comps.append(list(curr_component))
return all_comps
[docs] def connected_component(self):
"""
Computes the number of connected components and
the size of each componenent
:return: a pair: thi first entry is the number of components, the second
is a list constaining sizes of components
"""
components = self.connected_components()
number_of_comps = len(components)
sizes = []
for i, comp in enumerate(components):
sizes.append(len(comp))
return number_of_comps, sizes
[docs] def shortest_path(self, u=None, v=None):
"""
Computes the shortest path between u and v.
If both of them are None, then computes the shorthest path
between all pair of vertices.
If u is not None and v is None, then it computes the shortest path
between u and all other vertices.
Finally, if both of them are not None, it computes the shorthest path
between u and v.
:param u: the source vertex
:param v: the end vertex
:return: the shortest path
"""
# now we need a real queue :(
if u is None and v is None:
return {u: self.shortest_path(u=u) for u in self.vertices()}
dist, arg_u, arg_v = {}, u, v
u = u if u is not None else v # love python
queue = deque([u])
# distance to me is 0
dist[u] = 0
while len(queue) > 0:
# take the best so far
u = queue.popleft()
u_neighbors = self.get_neighbors(u)
updated = {v: dist[u] + 1 for v in u_neighbors if v not in dist.keys()}
# append the queue
queue.extend(updated.keys())
# update distances
dist.update(updated)
# nodes which are not in the list are not in the
# same component as u
non_reachable = {v: float("inf") for v in self.vertices() if v not in dist.keys()}
# update for non_reachable
dist.update(non_reachable)
if arg_u is not None and arg_v is not None:
return dist[arg_v]
else:
return dist
[docs] def diameter(self):
"""
Computes the diameter of the graph.
:return: The maximum of distances between any pair
of nodes or infinity if the graph is not connected
"""
dist = self.shortest_path()
# a temporary function to compute the largest distance
# starting at u
def u_diameter(u):
values = dist[u].values()
return 0 if len(values) == 0 else max(values)
# compute the largest distances from every vertex
comp_diams = [u_diameter(u) for u in self.vertices()]
# finally compute the diameter
diam = 0 if len(comp_diams) == 0 else max(comp_diams)
return diam
[docs] def biggest_component_diameter(self):
"""
:return: the diameter of the biggest component
"""
all_components = self.connected_components()
# get components and their sizes
component_sizes = [(len(comp), i) for i, comp in enumerate(all_components)]
if len(component_sizes) == 0:
return 0
size_of_largest, idx_largest = max(component_sizes)
# now it is guaranteed that there is at least one vertex
dist = self.shortest_path()
# map every vertex from the largest component to
# largest distance starting at that vertex
def u_diameter(u):
values = [val for val in dist[u].values() if not math.isinf(val)]
return 0 if len(values) == 0 else max(values)
diam_candidates = [u_diameter(u) for u in all_components[idx_largest]]
return max(diam_candidates)
[docs] def spanning_tree(self):
"""
Computes the spanning tree of the graph using Kruskal algorithm
and union - find. Complexity: $O(m\log*(n))$
:return: a list of tuples representing edges of the tree
"""
parent_and_rank = {node: [node, 0] for node in self.vertices()}
s_tree = [] # edges of resulting tree
def find_parent(x):
if parent_and_rank[x][0] != x:
parent_and_rank[x] = find_parent(parent_and_rank[x][0])
return parent_and_rank[x]
for u, v in self.edges():
uroot = find_parent(u)[0]
vroot = find_parent(v)[0]
if uroot == vroot: # this edge create a cycle
continue
# otherwise, add this edge to the tree and join components
s_tree.append((u, v))
uroot_rank = parent_and_rank[uroot][1]
vroot_rank = parent_and_rank[vroot][1]
if uroot_rank > vroot_rank:
parent_and_rank[vroot] = [uroot, parent_and_rank[uroot][1]]
elif uroot_rank < vroot_rank:
parent_and_rank[uroot] = [vroot, parent_and_rank[vroot][1]]
else:
parent_and_rank[vroot] = [uroot, parent_and_rank[uroot][1]]
parent_and_rank[uroot][1] += 1
return s_tree
[docs] def read_from_file(self, filename=None):
"""
Reads graph from a file.
:param filename: path to the file
:return: True if the graph is read successfully. If
filename is not specified or it does not exist, ValueError
and IOError are raised (respectively).
"""
if filename is None:
raise ValueError("Filename is not specified")
input_file = open(filename, "r")
for i, str_edge in enumerate(input_file.readlines()):
[u, v] = str_edge.rstrip().split('\t')
self.add_edge((u, v))
input_file.close()
return True
if __name__ == "__main__":
G = {
"a": ["c", "d", "g"],
"b": ["c", "f"],
"c": ["a", "b", "d", "f"],
"d": ["a", "c", "e", "g"],
"e": ["d"],
"f": ["b", "c"],
"g": ["a", "d"]
}
g = Graph()
g.read_from_file("../test_data/graph_100n_1000m.txt")
g.shortest_path(u='2')
print(g.diameter())