Commit bfc6e766 authored by ljia's avatar ljia

plus de choses sont changées...

parent b5c6761d
# Jupyter Notebook
.ipynb_checkpoints
datasets
notebooks/results
__pycache__
##*#
This diff is collapsed.
This diff is collapsed.
This folder contains marginalized kernel matrices results for acyclic dataset. Each file records a kernel matrix corresponding to a termination probability (p_quit).
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -46,10 +46,10 @@ def cyclicpatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', la
start_time = time.time()
# get all cyclic and tree patterns of all graphs before calculating kernels to save time, but this may consume a lot of memory for large dataset.
all_patterns = [ get_patterns(Gn[i], node_label = node_label, edge_label = edge_label, labeled = labeled, cycle_bound = cycle_bound)
for i in tqdm(range(0, len(Gn)), desc = 'retrieve patterns', file=sys.stdout) ]
all_patterns = [ get_patterns(Gn[i], node_label=node_label, edge_label = edge_label, labeled = labeled, cycle_bound = cycle_bound)
for i in tqdm(range(0, len(Gn)), desc='retrieve patterns', file=sys.stdout) ]
for i in tqdm(range(0, len(Gn)), desc = 'calculate kernels', file=sys.stdout):
for i in tqdm(range(0, len(Gn)), desc='calculate kernels', file=sys.stdout):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _cyclicpatternkernel_do(all_patterns[i], all_patterns[j])
Kmatrix[j][i] = Kmatrix[i][j]
......
"""
@author: linlin
@references:
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
[2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
"""
import sys
import pathlib
sys.path.insert(0, "../")
import time
from tqdm import tqdm
tqdm.monitor_interval = 0
import networkx as nx
import numpy as np
import time
from matplotlib import pyplot as plt
from pygraph.kernels.deltaKernel import deltakernel
from pygraph.utils.utils import untotterTransformation
from pygraph.utils.graphdataset import get_dataset_attributes
def marginalizedkernel(*args, node_label = 'atom', edge_label = 'bond_type', p_quit = 0.5, itr = 20):
def marginalizedkernel(*args,
node_label='atom',
edge_label='bond_type',
p_quit=0.5,
itr=20,
remove_totters=True):
"""Calculate marginalized graph kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
......@@ -19,54 +37,66 @@ def marginalizedkernel(*args, node_label = 'atom', edge_label = 'bond_type', p_q
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
p_quit : integer
the termination probability in the random walks generating step
itr : integer
time of iterations to calculate R_inf
remove_totters : boolean
whether to remove totters. The default value is True.
Return
------
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized kernel between 2 graphs.
References
----------
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the marginalized kernel between 2 praphs.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label, edge_label, p_quit, itr)
Kmatrix[j][i] = Kmatrix[i][j]
run_time = time.time() - start_time
print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
return Kmatrix, run_time
else: # for only 2 graphs
start_time = time.time()
kernel = _marginalizedkernel_do(args[0], args[1], node_label, edge_label, p_quit, itr)
run_time = time.time() - start_time
print("\n --- marginalized kernel built in %s seconds ---" % (run_time))
return kernel, run_time
# arrange all graphs in a list
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
node_label=node_label,
edge_label=edge_label)
if not ds_attrs['node_labeled']:
for G in Gn:
nx.set_node_attributes(G, '0', 'atom')
if not ds_attrs['edge_labeled']:
for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type')
start_time = time.time()
if remove_totters:
Gn = [
untotterTransformation(G, node_label, edge_label)
for G in tqdm(Gn, desc='removing tottering', file=sys.stdout)
]
pbar = tqdm(
total=(1 + len(Gn)) * len(Gn) / 2,
desc='calculating kernels',
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label,
edge_label, p_quit, itr)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
run_time = time.time() - start_time
print(
"\n --- marginalized kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))
return Kmatrix, run_time
def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
"""Calculate marginalized graph kernel between 2 graphs.
Parameters
----------
G1, G2 : NetworkX graphs
......@@ -79,7 +109,7 @@ def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
the termination probability in the random walks generating step.
itr : integer
time of iterations to calculate R_inf.
Return
------
kernel : float
......@@ -89,14 +119,15 @@ def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
kernel = 0
num_nodes_G1 = nx.number_of_nodes(G1)
num_nodes_G2 = nx.number_of_nodes(G2)
p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
p_init_G2 = 1 / num_nodes_G2
q = p_quit * p_quit
r1 = q
# initial R_inf
R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes
# matrix to save all the R_inf for all pairs of nodes
R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
# calculate R_inf with a simple interative method
for i in range(1, itr):
......@@ -104,27 +135,29 @@ def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
R_inf_new.fill(r1)
# calculate R_inf for each pair of nodes
for node1 in G1.nodes(data = True):
for node1 in G1.nodes(data=True):
neighbor_n1 = G1[node1[0]]
p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
for node2 in G2.nodes(data = True):
# the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
for node2 in G2.nodes(data=True):
neighbor_n2 = G2[node2[0]]
p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t = p_trans_n1 * p_trans_n2 * \
deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \
deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])
R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)
R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
neighbor2] # ref [1] equation (8)
R_inf[:] = R_inf_new
# add elements of R_inf up and calculate kernel
for node1 in G1.nodes(data = True):
for node2 in G2.nodes(data = True):
s = p_init_G1 * p_init_G2 * deltakernel(node1[1][node_label] == node2[1][node_label])
kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
for node1 in G1.nodes(data=True):
for node2 in G2.nodes(data=True):
s = p_init_G1 * p_init_G2 * deltakernel(
node1[1][node_label] == node2[1][node_label])
kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
return kernel
\ No newline at end of file
return kernel
This diff is collapsed.
"""
@author: linlin
@references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
"""
import sys
import pathlib
sys.path.insert(0, "../")
import time
from tqdm import tqdm
# from collections import Counter
import networkx as nx
import numpy as np
from pygraph.utils.graphdataset import get_dataset_attributes
def randomwalkkernel(*args,
node_label='atom',
edge_label='bond_type',
h=10,
compute_method=''):
"""Calculate random walk graph kernels.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
n : integer
Longest length of walks.
method : string
Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.
Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
"""
compute_method = compute_method.lower()
h = int(h)
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
node_label=node_label,
edge_label=edge_label)
if not ds_attrs['node_labeled']:
for G in Gn:
nx.set_node_attributes(G, '0', 'atom')
if not ds_attrs['edge_labeled']:
for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type')
start_time = time.time()
# # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
# all_walks = [
# find_all_walks_until_length(
# Gn[i],
# n,
# node_label=node_label,
# edge_label=edge_label,
# labeled=labeled) for i in range(0, len(Gn))
# ]
pbar = tqdm(
total=(1 + len(Gn)) * len(Gn) / 2,
desc='calculating kernels',
file=sys.stdout)
if compute_method == 'sylvester':
import warnings
warnings.warn(
'The Sylvester equation (rather than generalized Sylvester equation) is used; only walks of length 1 is considered.'
)
from control import dlyap
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_sylvester(
all_walks[i],
all_walks[j],
node_label=node_label,
edge_label=edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
elif compute_method == 'conjugate':
pass
elif compute_method == 'fp':
pass
elif compute_method == 'spectral':
pass
elif compute_method == 'kron':
pass
else:
raise Exception(
'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_do(
all_walks[i],
all_walks[j],
node_label=node_label,
edge_label=edge_label,
labeled=labeled)
Kmatrix[j][i] = Kmatrix[i][j]
run_time = time.time() - start_time
print(
"\n --- kernel matrix of walk kernel up to %d of size %d built in %s seconds ---"
% (n, len(Gn), run_time))
return Kmatrix, run_time
def _randomwalkkernel_sylvester(walks1,
walks2,
node_label='atom',
edge_label='bond_type'):
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method.
Parameters
----------
walks1, walks2 : list
List of walks in 2 graphs, where for unlabeled graphs, each walk is represented by a list of nodes; while for labeled graphs, each walk is represented by a string consists of labels of nodes and edges on that walk.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
Return
------
kernel : float
Treelet Kernel between 2 graphs.
"""
dpg = nx.tensor_product(G1, G2) # direct product graph
X = dlyap(A, Q, C)
return kernel
......@@ -6,16 +6,17 @@
import sys
import pathlib
sys.path.insert(0, "../")
from tqdm import tqdm
import time
import networkx as nx
import numpy as np
import time
from pygraph.utils.utils import getSPGraph
from pygraph.utils.graphdataset import get_dataset_attributes
def spkernel(*args, edge_weight = 'bond_type'):
def spkernel(*args, node_label='atom', edge_weight=None):
"""Calculate shortest-path kernels between graphs.
Parameters
......@@ -26,34 +27,71 @@ def spkernel(*args, edge_weight = 'bond_type'):
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
edge_weight : string
edge attribute corresponding to the edge weight. The default edge weight is bond_type.
Edge attribute corresponding to the edge weight.
Return
------
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP kernel between 2 graphs.
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the sp kernel between 2 praphs.
"""
Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
weight = edge_label if isinstance(some_weight, float) or isinstance(
some_weight, int) else None
except:
weight = None
ds_attrs = get_dataset_attributes(
Gn, attr_names=['node_labeled'], node_label=node_label)
start_time = time.time()
Gn = [ getSPGraph(G, edge_weight = edge_weight) for G in args[0] ] # get shortest path graphs of Gn
# get shortest path graphs of Gn
Gn = [
getSPGraph(G, edge_weight=edge_weight)
for G in tqdm(Gn, desc='getting sp graphs', file=sys.stdout)
]
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
pbar = tqdm(
total=((len(Gn) + 1) * len(Gn) / 2),
desc='calculating kernels',
file=sys.stdout)
if ds_attrs['node_labeled']:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
# cost of a node to itself equals to 0, cost between two disconnected nodes is Inf.
if e1[2]['cost'] != 0 and e1[2] != np.Inf and e1[2]['cost'] == e2[2]['cost'] and {
Gn[i].nodes[e1[0]][node_label],
Gn[i].nodes[e1[1]][node_label]
} == {
Gn[j].nodes[e2[0]][node_label],
Gn[j].nodes[e2[1]][node_label]
}:
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
else:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
# kernel_t = [ e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])) \
# for e1 in Sn[i].edges(data = True) for e2 in Sn[j].edges(data = True) ]
# Kmatrix[i][j] = np.sum(kernel_t)
# Kmatrix[j][i] = Kmatrix[i][j]
for e1 in Gn[i].edges(data = True):
for e2 in Gn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] != 0 and e1[2] != np.Inf and e1[2]['cost'] == e2[2]['cost']:
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
run_time = time.time() - start_time
print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
print(
"--- shortest path kernel matrix of size %d built in %s seconds ---" %
(len(Gn), run_time))
return Kmatrix, run_time
\ No newline at end of file
return Kmatrix, run_time
......@@ -8,13 +8,24 @@ import pathlib
sys.path.insert(0, "../")
import time
from collections import Counter
import networkx as nx
import numpy as np
from collections import Counter
from tqdm import tqdm
tqdm.monitor_interval = 0
from pygraph.utils.utils import untotterTransformation
def treepatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, kernel_type = 'untiln', lmda = 1, h = 1):
def treepatternkernel(*args,
node_label='atom',
edge_label='bond_type',
labeled=True,
kernel_type='untiln',
lmda=1,
h=1,
remove_totters=True):
"""Calculate tree pattern graph kernels between graphs.
Parameters
----------
......@@ -35,6 +46,8 @@ def treepatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labe
Weight to decide whether linear patterns or trees pattern of increasing complexity are favored.
h : integer
The upper bound of the height of tree patterns.
remove_totters : boolean
whether to remove totters. The default value is True.
Return
------
......@@ -44,24 +57,38 @@ def treepatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labe
if h < 1:
raise Exception('h > 0 is requested.')
kernel_type = kernel_type.lower()
Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list
# arrange all graphs in a list
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
h = int(h)
start_time = time.time()
if remove_totters:
Gn = [untotterTransformation(G, node_label, edge_label) for G in Gn]
pbar = tqdm(
total=(1 + len(Gn)) * len(Gn) / 2,
desc='calculate kernels',
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _treepatternkernel_do(Gn[i], Gn[j], node_label, edge_label, labeled, kernel_type, lmda, h)
Kmatrix[i][j] = _treepatternkernel_do(Gn[i], Gn[j], node_label,
edge_label, labeled,
kernel_type, lmda, h)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
run_time = time.time() - start_time
print("\n --- kernel matrix of tree pattern kernel of size %d built in %s seconds ---" % (len(Gn), run_time))
print(
"\n --- kernel matrix of tree pattern kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))
return Kmatrix, run_time
def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type, lmda, h):
def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
lmda, h):
"""Calculate tree pattern graph kernels between 2 graphs.
Parameters
......@@ -97,17 +124,22 @@ def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
"""Find all sets R of pairs by combination.
"""
if length == 1:
mset = [ [pair] for pair in allpairs ]
mset = [[pair] for pair in allpairs]
return mset, mset
else:
mset, mset_l = mset_com(allpairs, length - 1)
mset_tmp = []
for pairset in mset_l: # for each pair set of length l-1
nodeset1 = [ pair[0] for pair in pairset ] # nodes already in the set
nodeset2 = [ pair[1] for pair in pairset ]
for pairset in mset_l: # for each pair set of length l-1
nodeset1 = [pair[0] for pair in pairset
] # nodes already in the set
nodeset2 = [pair[1] for pair in pairset]
for pair in allpairs:
if (pair[0] not in nodeset1) and (pair[1] not in nodeset2): # nodes in R should be unique
mset_tmp.append(pairset + [pair]) # add this pair to the pair set of length l-1, constructing a new set of length l
if (pair[0] not in nodeset1) and (
pair[1] not in nodeset2
): # nodes in R should be unique
mset_tmp.append(
pairset + [pair]
) # add this pair to the pair set of length l-1, constructing a new set of length l
nodeset1.append(pair[0])
nodeset2.append(pair[1])
......@@ -115,8 +147,8 @@ def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
return mset, mset_tmp
allpairs = [] # all pairs those have the same node labels and edge labels
allpairs = [
] # all pairs those have the same node labels and edge labels
for neighbor1 in G1[n1]:
for neighbor2 in G2[n2]:
if G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label] \
......@@ -130,7 +162,6 @@ def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
return mset
def kernel_h(h):
"""Calculate kernel of h-th iteration.
"""
......@@ -147,9 +178,11 @@ def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
for R in mset:
kh_tmp = 1
for pair in R:
kh_tmp *= lmda * all_kh[str(pair[0]) + '.' + str(pair[1])]
kh_tmp *= lmda * all_kh[str(pair[0])
+ '.' + str(pair[1])]
kh += 1 / lmda * kh_tmp
kh = (G1.node[n1][node_label] == G2.node[n2][node_label]) * (1 + kh)
kh = (G1.node[n1][node_label] == G2.node[n2][
node_label]) * (1 + kh)
all_kh_tmp[str(n1) + '.' + str(n2)] = kh
all_kh = all_kh_tmp.copy()
......@@ -165,9 +198,11 @@ def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
for R in mset:
kh_tmp = 1
for pair in R:
kh_tmp *= lmda * all_kh[str(pair[0]) + '.' + str(pair[1])]
kh_tmp *= lmda * all_kh[str(pair[0])
+ '.' + str(pair[1])]
kh += kh_tmp
kh *= lmda * (G1.node[n1][node_label] == G2.node[n2][node_label])
kh *= lmda * (
G1.node[n1][node_label] == G2.node[n2][node_label])
all_kh_tmp[str(n1) + '.' + str(n2)] = kh
all_kh = all_kh_tmp.copy()
......@@ -183,16 +218,16 @@ def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
for R in mset:
kh_tmp = 1
for pair in R:
kh_tmp *= lmda * all_kh[str(pair[0]) + '.' + str(pair[1])]
kh_tmp *= lmda * all_kh[str(pair[0])
+ '.' + str(pair[1])]
kh += 1 / lmda * kh_tmp
kh *= (G1.node[n1][node_label] == G2.node[n2][node_label])
kh *= (
G1.node[n1][node_label] == G2.node[n2][node_label])
all_kh_tmp[str(n1) + '.' + str(n2)] = kh