Commit f9a60b7d authored by ljia's avatar ljia

* ADD Weisfeiler-Lehman kernel based on shortest path kernel and its results.

parent 0be419b5
# py-graph
a python package for graph kernels.
# requirements
## requirements
* numpy
* scipy
......
This diff is collapsed.
This diff is collapsed.
import sys
import pathlib
sys.path.insert(0, "../")
import networkx as nx
import numpy as np
import time
from pygraph.kernels.spkernel import spkernel
def weisfeilerlehmankernel(*args):
"""Calculate Weisfeiler-Lehman kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs.
References
----------
[1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j])
Kmatrix[j][i] = Kmatrix[i][j]
print("\n --- Weisfeiler-Lehman kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
start_time = time.time()
kernel = _pathkernel_do(args[0], args[1])
print("\n --- Weisfeiler-Lehman kernel built in %s seconds ---" % (time.time() - start_time))
return kernel
def _weisfeilerlehmankernel_do(G1, G2):
"""Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration.
Parameters
----------
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kernel : int
Weisfeiler-Lehman Kernel between 2 graphs.
"""
# init.
kernel = 0 # init kernel
num_nodes1 = G1.number_of_nodes()
num_nodes2 = G2.number_of_nodes()
# the first iteration.
labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }
labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }
kernel += spkernel(G1, G2)
for height in range(0, min(num_nodes1, num_nodes2)): #Q how to determine the upper bound of the height?
if labelset1 != labelset2:
break
# Weisfeiler-Lehman test of graph isomorphism.
relabel(G1)
relabel(G2)
# calculate kernel
kernel += spkernel(G1, G2)
# get label sets of both graphs
labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }
labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }
return kernel
def relabel(G):
'''
Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism.
Parameters
----------
G : NetworkX graph
The graphs whose nodes are relabeled.
'''
set_multisets = []
for node in G.nodes(data = True):
# Multiset-label determination.
multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]
# sorting each multiset
multiset.sort()
multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix
set_multisets.append(multiset)
# label compression
# set_multisets.sort() # this is unnecessary
set_unique = list(set(set_multisets)) # set of unique multiset labels
set_compressed = { value : str(set_unique.index(value)) for value in set_unique } # assign indices as the new labels
# relabel nodes
for node in G.nodes(data = True):
node[1]['label'] = set_compressed[set_multisets[node[0]]]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment