Commit 0be419b5 authored by ljia's avatar ljia

* ADD mean average path kernel and its result on dataset Asyclic. - linlin

* ADD delta kernel. - linlin
* MOD reconstruction the code of marginalized kernel. - linlin
parent 4d58912e
# py-graph
a python package for graph kernels.
# requirements
* numpy
......@@ -5,4 +8,4 @@
* matplotlib
* networkx
* sklearn
* tabulate
\ No newline at end of file
* tabulate
This folder contains marginalized kernel matrices results for acyclic dataset. Each file records a kernel matrix corresponding to a termination probability (p_quit).
1.851851851845114916e-01 1.715846994535283054e-01 1.111111111111111327e-01 1.658860265417390345e-01 1.190476190476190410e-01 1.645021645021644552e-01 1.728142076502438007e-01 1.428571428571428492e-01 1.250000000000000000e-01 1.647727272727272374e-01 1.688015355386982830e-01 1.458333333333333148e-01 1.660692951015528829e-01 1.690644515140747317e-01 1.333333333333333315e-01 1.661290322580643020e-01 1.642096642096627368e-01 1.644100580270792533e-01 1.515151515151514361e-01
1.715846994535283054e-01 1.998811757348291918e-01 1.517333333333333034e-01 1.843559610953250938e-01 1.646526315789462180e-01 2.118472338216294271e-01 1.898596369052136801e-01 1.996019142722996187e-01 1.681972332015810312e-01 2.154011537890504757e-01 1.957526354741883912e-01 2.041482002828035602e-01 2.184241889249090574e-01 2.000166386402571217e-01 1.875496943119546456e-01 2.205599010801453097e-01 2.050616538136656175e-01 2.225644534523652518e-01 2.141792004113506342e-01
1.111111111111111327e-01 1.517333333333333034e-01 1.630315604810822772e-01 1.341647770219198677e-01 1.690349428808844701e-01 1.696057260838522707e-01 1.386293564985520976e-01 1.851112948783439904e-01 1.698927555337589324e-01 1.739541731393236534e-01 1.476235079516818594e-01 1.870922130863246491e-01 1.770647653000592125e-01 1.529350590946779720e-01 1.797093910193169886e-01 1.797577474954638876e-01 1.608278529980657345e-01 1.829525198509154293e-01 1.918657293026174715e-01
1.658860265417390345e-01 1.843559610953250938e-01 1.341647770219198677e-01 1.741392296911389026e-01 1.452951085195982872e-01 1.923044887110414158e-01 1.777582800045086620e-01 1.759885815412612076e-01 1.489222329351688745e-01 1.946266323857845093e-01 1.816655496533643444e-01 1.798602947158312870e-01 1.964604023427546364e-01 1.845069500668397311e-01 1.651037635666586234e-01 1.978885324792245270e-01 1.876399011221044355e-01 1.992154138959279508e-01 1.884341866656164544e-01
1.190476190476190410e-01 1.646526315789462180e-01 1.690349428808844701e-01 1.452951085195982872e-01 1.770322527788827094e-01 1.846487193324772291e-01 1.500289479993194564e-01 1.978545544560053882e-01 1.777966310335341582e-01 1.895091732042779031e-01 1.601008134395562332e-01 2.005742954298423975e-01 1.930613092736430947e-01 1.659997651853381828e-01 1.911352866506819126e-01 1.960531049535002290e-01 1.750640011772102433e-01 1.996379415839388272e-01 2.069646394958871982e-01
1.645021645021644552e-01 2.118472338216294271e-01 1.696057260838522707e-01 1.923044887110414158e-01 1.846487193324772291e-01 2.326931412246718844e-01 1.968155205597167945e-01 2.245027638024112704e-01 1.871931998682148934e-01 2.376584439103849311e-01 2.073324800459713235e-01 2.296792461171615896e-01 2.411989984746620574e-01 2.133731381420688189e-01 2.112534062801647572e-01 2.442696349159975688e-01 2.228533264773001343e-01 2.480255514376055459e-01 2.415666892155883816e-01
1.728142076502438007e-01 1.898596369052136801e-01 1.386293564985520976e-01 1.777582800045086620e-01 1.500289479993194564e-01 1.968155205597167945e-01 1.830926894080937284e-01 1.815227343868854037e-01 1.541158458257969321e-01 1.993530861698264711e-01 1.864121849264289144e-01 1.855603756513764524e-01 2.016952728409279749e-01 1.894602942655504163e-01 1.703003240859909950e-01 2.032069376985556619e-01 1.919238161697774669e-01 2.042595977646093908e-01 1.942899883118828219e-01
1.428571428571428492e-01 1.996019142722996187e-01 1.851112948783439904e-01 1.759885815412612076e-01 1.978545544560053882e-01 2.245027638024112704e-01 1.815227343868854037e-01 2.326931412246718844e-01 2.016873508659712544e-01 2.304958436661139753e-01 1.940792598956584403e-01 2.369417565719455498e-01 2.348608369985791255e-01 2.013440395303174313e-01 2.204298444986837746e-01 2.385490557179342497e-01 2.127571129839336272e-01 2.430295894893172437e-01 2.467819734111023067e-01
1.250000000000000000e-01 1.681972332015810312e-01 1.698927555337589324e-01 1.489222329351688745e-01 1.777966310335341582e-01 1.871931998682148934e-01 1.541158458257969321e-01 2.016873508659712544e-01 1.839135570555245902e-01 1.918858750573161631e-01 1.636560646532739527e-01 2.042816143805762374e-01 1.952436047578060552e-01 1.694048912272962670e-01 1.919248984251117895e-01 1.981551126965962195e-01 1.776088050992688239e-01 2.015283740570520399e-01 2.098880539920287003e-01
1.647727272727272374e-01 2.154011537890504757e-01 1.739541731393236534e-01 1.946266323857845093e-01 1.895091732042779031e-01 2.376584439103849311e-01 1.993530861698264711e-01 2.304958436661139753e-01 1.918858750573161631e-01 2.429685886783167770e-01 2.105827752776856798e-01 2.358608629272750390e-01 2.467903637908182013e-01 2.170263489303834170e-01 2.169948288742958031e-01 2.500670122922188376e-01 2.272464560830733671e-01 2.540783678321864580e-01 2.481862516364052584e-01
1.688015355386982830e-01 1.957526354741883912e-01 1.476235079516818594e-01 1.816655496533643444e-01 1.601008134395562332e-01 2.073324800459713235e-01 1.864121849264289144e-01 1.940792598956584403e-01 1.636560646532739527e-01 2.105827752776856798e-01 1.921462927865995041e-01 1.984298929684912283e-01 2.131729755867476928e-01 1.960921333398846500e-01 1.822517516856926034e-01 2.151566975643480761e-01 2.008813931359722249e-01 2.171272957405011306e-01 2.081033854350700374e-01
1.458333333333333148e-01 2.041482002828035602e-01 1.870922130863246491e-01 1.798602947158312870e-01 2.005742954298423975e-01 2.296792461171615896e-01 1.855603756513764524e-01 2.369417565719455498e-01 2.042816143805762374e-01 2.358608629272750390e-01 1.984298929684912283e-01 2.415488463393239416e-01 2.404205381767979821e-01 2.059026410425359999e-01 2.243921922156122684e-01 2.442145177034795722e-01 2.176914889472909564e-01 2.488030419410623417e-01 2.520024597593376048e-01
1.660692951015528829e-01 2.184241889249090574e-01 1.770647653000592125e-01 1.964604023427546364e-01 1.930613092736430947e-01 2.411989984746620574e-01 2.016952728409279749e-01 2.348608369985791255e-01 1.952436047578060552e-01 2.467903637908182013e-01 2.131729755867476928e-01 2.404205381767979821e-01 2.510706871400874007e-01 2.198819530389543586e-01 2.212654775697119602e-01 2.544692104629461826e-01 2.305889589277120844e-01 2.585594897631460221e-01 2.531218199116737066e-01
1.690644515140747317e-01 2.000166386402571217e-01 1.529350590946779720e-01 1.845069500668397311e-01 1.659997651853381828e-01 2.133731381420688189e-01 1.894602942655504163e-01 2.013440395303174313e-01 1.694048912272962670e-01 2.170263489303834170e-01 1.960921333398846500e-01 2.059026410425359999e-01 2.198819530389543586e-01 2.005295888043065466e-01 1.891755102481660900e-01 2.221202677126309366e-01 2.061695981364425334e-01 2.244123870467931436e-01 2.160756274647735453e-01
1.333333333333333315e-01 1.875496943119546456e-01 1.797093910193169886e-01 1.651037635666586234e-01 1.911352866506819126e-01 2.112534062801647572e-01 1.703003240859909950e-01 2.204298444986837746e-01 1.919248984251117895e-01 2.169948288742958031e-01 1.822517516856926034e-01 2.243921922156122684e-01 2.212654775697119602e-01 1.891755102481660900e-01 2.112185007492166089e-01 2.247810022348857850e-01 2.002196060213428108e-01 2.290446698038669793e-01 2.335601198264010137e-01
1.661290322580643020e-01 2.205599010801453097e-01 1.797577474954638876e-01 1.978885324792245270e-01 1.960531049535002290e-01 2.442696349159975688e-01 2.032069376985556619e-01 2.385490557179342497e-01 1.981551126965962195e-01 2.500670122922188376e-01 2.151566975643480761e-01 2.442145177034795722e-01 2.544692104629461826e-01 2.221202677126309366e-01 2.247810022348857850e-01 2.580011483279751605e-01 2.332655897075114060e-01 2.622606697746047710e-01 2.571714362075354643e-01
1.642096642096627368e-01 2.050616538136656175e-01 1.608278529980657345e-01 1.876399011221044355e-01 1.750640011772102433e-01 2.228533264773001343e-01 1.919238161697774669e-01 2.127571129839336272e-01 1.776088050992688239e-01 2.272464560830733671e-01 2.008813931359722249e-01 2.176914889472909564e-01 2.305889589277120844e-01 2.061695981364425334e-01 2.002196060213428108e-01 2.332655897075114060e-01 2.144219207547575778e-01 2.364527959734974982e-01 2.289178822880236497e-01
1.644100580270792533e-01 2.225644534523652518e-01 1.829525198509154293e-01 1.992154138959279508e-01 1.996379415839388272e-01 2.480255514376055459e-01 2.042595977646093908e-01 2.430295894893172437e-01 2.015283740570520399e-01 2.540783678321864580e-01 2.171272957405011306e-01 2.488030419410623417e-01 2.585594897631460221e-01 2.244123870467931436e-01 2.290446698038669793e-01 2.622606697746047710e-01 2.364527959734974982e-01 2.668738448004014385e-01 2.621030501774097021e-01
1.515151515151514361e-01 2.141792004113506342e-01 1.918657293026174715e-01 1.884341866656164544e-01 2.069646394958871982e-01 2.415666892155883816e-01 1.942899883118828219e-01 2.467819734111023067e-01 2.098880539920287003e-01 2.481862516364052584e-01 2.081033854350700374e-01 2.520024597593376048e-01 2.531218199116737066e-01 2.160756274647735453e-01 2.335601198264010137e-01 2.571714362075354643e-01 2.289178822880236497e-01 2.621030501774097021e-01 2.638699932005915683e-01
This diff is collapsed.
This diff is collapsed.
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 5,
"metadata": {
"scrolled": false
},
......@@ -155,7 +155,7 @@
"\n",
"import sys\n",
"import pathlib\n",
"sys.path.insert(0, \"../py-graph/\")\n",
"sys.path.insert(0, \"../\")\n",
"from tabulate import tabulate\n",
"\n",
"import random\n",
......@@ -166,8 +166,8 @@
"from sklearn.metrics import accuracy_score, mean_squared_error\n",
"from sklearn import svm\n",
"\n",
"from kernels.spkernel import spkernel\n",
"from utils.graphfiles import loadDataset\n",
"from pygraph.kernels.spkernel import spkernel\n",
"from pygraph.utils.graphfiles import loadDataset\n",
"\n",
"print('\\n Loading dataset from file...')\n",
"dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
......@@ -336,9 +336,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" "
]
"source": []
}
],
"metadata": {
......@@ -357,7 +355,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.5.2"
}
},
"nbformat": 4,
......
def deltakernel(condition):
"""Return 1 if condition holds, 0 otherwise.
Parameters
----------
condition : Boolean
A condition, according to which the kernel is set to 1 or 0.
Return
------
Kernel : integer
Delta Kernel.
References
----------
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
"""
return (1 if condition else 0)
\ No newline at end of file
......@@ -6,6 +6,8 @@ import networkx as nx
import numpy as np
import time
from pygraph.kernels.deltaKernel import deltakernel
def marginalizedkernel(*args):
"""Calculate marginalized graph kernels between graphs.
......@@ -36,9 +38,10 @@ def marginalizedkernel(*args):
Kmatrix = np.zeros((len(Gn), len(Gn)))
start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2])
Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], args[1], args[2])
Kmatrix[j][i] = Kmatrix[i][j]
print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
......@@ -47,70 +50,72 @@ def marginalizedkernel(*args):
else: # for only 2 graphs
# init parameters
G1 = args[0]
G2 = args[1]
p_quit = args[2] # the termination probability in the random walks generating step
itr = args[3] # time of iterations to calculate R_inf
kernel = 0
num_nodes_G1 = nx.number_of_nodes(G1)
num_nodes_G2 = nx.number_of_nodes(G2)
p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
p_init_G2 = 1 / num_nodes_G2
q = p_quit * p_quit
r1 = q
# initial R_inf
R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes
# calculate R_inf with a simple interative method
for i in range(1, itr):
R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
R_inf_new.fill(r1)
# calculate R_inf for each pair of nodes
for node1 in G1.nodes(data = True):
neighbor_n1 = G1[node1[0]]
p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
for node2 in G2.nodes(data = True):
neighbor_n2 = G2[node2[0]]
p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t = p_trans_n1 * p_trans_n2 * \
deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \
deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label'])
R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)
R_inf[:] = R_inf_new
start_time = time.time()
# add elements of R_inf up and calculate kernel
for node1 in G1.nodes(data = True):
for node2 in G2.nodes(data = True):
s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label'])
kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
kernel = _marginalizedkernel_do(args[0], args[1], args[2], args[3])
print("\n --- marginalized kernel built in %s seconds ---" % (time.time() - start_time))
return kernel
def deltaKernel(condition):
"""Return 1 if condition holds, 0 otherwise.
def _marginalizedkernel_do(G1, G2, p_quit, itr):
"""Calculate marginalized graph kernels between 2 graphs.
Parameters
----------
condition : Boolean
A condition, according to which the kernel is set to 1 or 0.
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
p_quit : integer
the termination probability in the random walks generating step
itr : integer
time of iterations to calculate R_inf
Return
------
Kernel : integer
Delta Kernel.
References
----------
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
Kernel : int
Marginalized Kernel between 2 graphs.
"""
return (1 if condition else 0)
\ No newline at end of file
# init parameters
kernel = 0
num_nodes_G1 = nx.number_of_nodes(G1)
num_nodes_G2 = nx.number_of_nodes(G2)
p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
p_init_G2 = 1 / num_nodes_G2
q = p_quit * p_quit
r1 = q
# initial R_inf
R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes
# calculate R_inf with a simple interative method
for i in range(1, itr):
R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
R_inf_new.fill(r1)
# calculate R_inf for each pair of nodes
for node1 in G1.nodes(data = True):
neighbor_n1 = G1[node1[0]]
p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
for node2 in G2.nodes(data = True):
neighbor_n2 = G2[node2[0]]
p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t = p_trans_n1 * p_trans_n2 * \
deltakernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \
deltakernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label'])
R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)
R_inf[:] = R_inf_new
# add elements of R_inf up and calculate kernel
for node1 in G1.nodes(data = True):
for node2 in G2.nodes(data = True):
s = p_init_G1 * p_init_G2 * deltakernel(node1[1]['label'] == node2[1]['label'])
kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
return kernel
\ No newline at end of file
import sys
import pathlib
sys.path.insert(0, "../")
import networkx as nx
import numpy as np
import time
from pygraph.kernels.deltaKernel import deltakernel
def pathkernel(*args):
"""Calculate mean average path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the path kernel between 2 praphs. / Path Kernel between 2 graphs.
References
----------
[1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j])
Kmatrix[j][i] = Kmatrix[i][j]
print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
start_time = time.time()
kernel = _pathkernel_do(args[0], args[1])
print("\n --- mean average path kernel built in %s seconds ---" % (time.time() - start_time))
return kernel
def _pathkernel_do(G1, G2):
"""Calculate mean average path kernels between 2 graphs.
Parameters
----------
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kernel : int
Path Kernel between 2 graphs.
"""
# calculate shortest paths for both graphs
sp1 = []
num_nodes = G1.number_of_nodes()
for node1 in range(num_nodes):
for node2 in range(node1 + 1, num_nodes):
sp1.append(nx.shortest_path(G1, node1, node2, weight = 'cost'))
sp2 = []
num_nodes = G2.number_of_nodes()
for node1 in range(num_nodes):
for node2 in range(node1 + 1, num_nodes):
sp2.append(nx.shortest_path(G2, node1, node2, weight = 'cost'))
# calculate kernel
kernel = 0
for path1 in sp1:
for path2 in sp2:
if len(path1) == len(path2):
kernel_path = deltakernel(G1.node[path1[0]]['label'] == G2.node[path2[0]]['label'])
if kernel_path:
for i in range(1, len(path1)):
# kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
kernel_path *= deltakernel(G1[path1[i - 1]][path1[i]]['label'] == G2[path2[i - 1]][path2[i]]['label']) * deltakernel(G1.node[path1[i]]['label'] == G2.node[path2[i]]['label'])
kernel += kernel_path # add up kernels of all paths
kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
return kernel
\ No newline at end of file
......@@ -7,7 +7,7 @@ import networkx as nx
import numpy as np
import time
from utils.utils import getSPGraph
from pygraph.utils.utils import getSPGraph
def spkernel(*args):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment