### * ADD Weisfeiler-Lehman kernel based on shortest path kernel and its results.

parent 0be419b5
 # py-graph a python package for graph kernels. # requirements ## requirements * numpy * scipy ... ...
This diff is collapsed.
This diff is collapsed.
 import sys import pathlib sys.path.insert(0, "../") import networkx as nx import numpy as np import time from pygraph.kernels.spkernel import spkernel def weisfeilerlehmankernel(*args): """Calculate Weisfeiler-Lehman kernels between graphs. Parameters ---------- Gn : List of NetworkX graph List of graphs between which the kernels are calculated. / G1, G2 : NetworkX graphs 2 graphs between which the kernel is calculated. Return ------ Kmatrix/Kernel : Numpy matrix/int Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs. References ----------  Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61. """ if len(args) == 1: # for a list of graphs Gn = args Kmatrix = np.zeros((len(Gn), len(Gn))) start_time = time.time() for i in range(0, len(Gn)): for j in range(i, len(Gn)): Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j]) Kmatrix[j][i] = Kmatrix[i][j] print("\n --- Weisfeiler-Lehman kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time))) return Kmatrix else: # for only 2 graphs start_time = time.time() kernel = _pathkernel_do(args, args) print("\n --- Weisfeiler-Lehman kernel built in %s seconds ---" % (time.time() - start_time)) return kernel def _weisfeilerlehmankernel_do(G1, G2): """Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration. Parameters ---------- G1, G2 : NetworkX graphs 2 graphs between which the kernel is calculated. Return ------ Kernel : int Weisfeiler-Lehman Kernel between 2 graphs. """ # init. kernel = 0 # init kernel num_nodes1 = G1.number_of_nodes() num_nodes2 = G2.number_of_nodes() # the first iteration. labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) } labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) } kernel += spkernel(G1, G2) for height in range(0, min(num_nodes1, num_nodes2)): #Q how to determine the upper bound of the height? if labelset1 != labelset2: break # Weisfeiler-Lehman test of graph isomorphism. relabel(G1) relabel(G2) # calculate kernel kernel += spkernel(G1, G2) # get label sets of both graphs labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) } labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) } return kernel def relabel(G): ''' Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism. Parameters ---------- G : NetworkX graph The graphs whose nodes are relabeled. ''' set_multisets = [] for node in G.nodes(data = True): # Multiset-label determination. multiset = [ G.node[neighbors]['label'] for neighbors in G[node] ] # sorting each multiset multiset.sort() multiset = node['label'] + ''.join(multiset) # concatenate to a string and add the prefix set_multisets.append(multiset) # label compression # set_multisets.sort() # this is unnecessary set_unique = list(set(set_multisets)) # set of unique multiset labels set_compressed = { value : str(set_unique.index(value)) for value in set_unique } # assign indices as the new labels # relabel nodes for node in G.nodes(data = True): node['label'] = set_compressed[set_multisets[node]] \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!