Commit bf414baa authored by ljia's avatar ljia

mments to code of treelet kernel.

parent 14296494
......@@ -32,7 +32,20 @@ For predition we randomly divide the data in train and test subset, where 90% of
* The targets of training data are normalized before calculating *path kernel* and *treelet kernel*.
* See detail results in [results.md](pygraph/kernels/results.md).
## References
[1] K. M. Borgwardt and H.-P. Kriegel. Shortest-path kernels on graphs. In Proceedings of the International Conference on Data Mining, pages 74-81, 2005.
[2] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
[3] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
[4] N. Shervashidze, P. Schweitzer, E. J. van Leeuwen, K. Mehlhorn, and K. M. Borgwardt. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, 12:2539-2561, 2011.
[5] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47.
## Updates
### 2018.01.17
* ADD comments to code of treelet kernel. - linlin
### 2018.01.16
* ADD *treelet kernel* and its result on dataset Asyclic. - linlin
* MOD the way to calculate WL subtree kernel, correct its results. - linlin
......
......@@ -8,8 +8,8 @@ def deltakernel(condition):
Return
------
Kernel : integer
Delta Kernel.
kernel : integer
Delta kernel.
References
----------
......
......@@ -29,8 +29,8 @@ def marginalizedkernel(*args, node_label = 'atom', edge_label = 'bond_type', p_q
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs.
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized kernel between 2 graphs.
References
----------
......@@ -65,24 +65,24 @@ def marginalizedkernel(*args, node_label = 'atom', edge_label = 'bond_type', p_q
def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
"""Calculate marginalized graph kernels between 2 graphs.
"""Calculate marginalized graph kernel between 2 graphs.
Parameters
----------
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
node attribute used as label.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
edge attribute used as label.
p_quit : integer
the termination probability in the random walks generating step
the termination probability in the random walks generating step.
itr : integer
time of iterations to calculate R_inf
time of iterations to calculate R_inf.
Return
------
Kernel : int
kernel : float
Marginalized Kernel between 2 graphs.
"""
# init parameters
......
......@@ -25,8 +25,8 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the path kernel between 2 praphs. / Path Kernel between 2 graphs.
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs.
References
----------
......@@ -64,7 +64,7 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight = None):
"""Calculate mean average path kernels between 2 graphs.
"""Calculate mean average path kernel between 2 graphs.
Parameters
----------
......@@ -79,7 +79,7 @@ def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight
Return
------
Kernel : int
kernel : float
Path Kernel between 2 graphs.
"""
# calculate shortest paths for both graphs
......
......@@ -25,8 +25,8 @@ def spkernel(*args, edge_weight = 'bond_type'):
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP kernel between 2 graphs.
References
----------
......
import sys
import pathlib
sys.path.insert(0, "../")
import networkx as nx
import numpy as np
import time
from pygraph.utils.utils import getSPGraph
def spkernel(*args, edge_weight = 'bond_type'):
"""Calculate shortest-path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
edge_weight : string
edge attribute corresponding to the edge weight. The default edge weight is bond_type.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
Sn = [] # get shortest path graphs of Gn
for i in range(0, len(Gn)):
Sn.append(getSPGraph(Gn[i], edge_weight = edge_weight))
start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Sn[i].edges(data = True):
for e2 in Sn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] += (0 if i == j else 1)
run_time = time.time() - start_time
print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
return Kmatrix, run_time
else: # for only 2 graphs
G1 = getSPGraph(args[0], edge_weight = edge_weight)
G2 = getSPGraph(args[1], edge_weight = edge_weight)
kernel = 0
start_time = time.time()
for e1 in G1.edges(data = True):
for e2 in G2.edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
kernel += 1
# print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
return kernel
\ No newline at end of file
This diff is collapsed.
......@@ -9,8 +9,6 @@ import time
from pygraph.kernels.spkernel import spkernel
from pygraph.kernels.pathKernel import pathkernel
# test of WL subtree kernel on many graphs
import sys
import pathlib
from collections import Counter
......@@ -44,8 +42,8 @@ def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type',
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs.
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman kernel between 2 graphs.
Notes
-----
......@@ -125,7 +123,7 @@ def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', h
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
......@@ -229,8 +227,8 @@ def _weisfeilerlehmankernel_do(G1, G2, height = 0):
Return
------
Kernel : int
Weisfeiler-Lehman Kernel between 2 graphs.
kernel : float
Weisfeiler-Lehman kernel between 2 graphs.
"""
# init.
......@@ -298,4 +296,4 @@ def relabel(G):
# get the set of compressed labels
labels_comp = list(nx.get_node_attributes(G, 'label').values())
num_of_each_label.update(dict(Counter(labels_comp)))
\ No newline at end of file
num_of_each_label.update(dict(Counter(labels_comp)))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment