Commit 66e58d69 authored by ljia's avatar ljia

ADD path kernel up to depth d and its result on dataset Asyclic.

    MOD treelet kernel, retrieve canonkeys of all graphs before calculate kernels, wildly speed it up.
parent 6e95146f
......@@ -17,13 +17,14 @@ The criteria used for prediction are SVM for classification and kernel Ridge reg
For predition we randomly divide the data in train and test subset, where 90% of entire dataset is for training and rest for testing. 10 splits are performed. For each split, we first train on the train data, then evaluate the performance on the test set. We choose the optimal parameters for the test set and finally provide the corresponding performance. The final results correspond to the average of the performances on the test sets.
| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time |
|---------------|:---------:|:--------:|-------------:|-------:|
| Shortest path | 35.19 | 4.50 | - | 14.58" |
| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" |
| Path | 14.00 | 6.93 | - | 36.21" |
| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" |
| Treelet | 8.31 | 3.38 | - | 49.58" |
| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time |
|---------------|:-------:|:------:|-------------:|-------:|
| Shortest path | 35.19 | 4.50 | - | 14.58" |
| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" |
| Path | 14.00 | 6.93 | - | 36.21" |
| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" |
| Treelet | 8.31 | 3.38 | - | 0.50" |
| Path up to d | 7.43 | 2.69 | depth = 2 | 0.59" |
* RMSE stands for arithmetic mean of the root mean squared errors on all splits.
* STD stands for standard deviation of the root mean squared errors on all splits.
......
......@@ -2,23 +2,24 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The line_profiler extension is already loaded. To reload it, use:\n",
" %reload_ext line_profiler\n",
"\n",
" --- This is a regression problem ---\n",
"\n",
"\n",
"\n",
" Loading dataset from file...\n",
"\n",
" Calculating kernel matrix, this could take a while...\n",
"\n",
" --- treelet kernel matrix of size 185 built in 50.925347328186035 seconds ---\n",
" --- treelet kernel matrix of size 185 built in 0.48417091369628906 seconds ---\n",
"[[ 4.00000000e+00 2.60653066e+00 1.00000000e+00 ..., 1.26641655e-14\n",
" 1.26641655e-14 1.26641655e-14]\n",
" [ 2.60653066e+00 6.00000000e+00 1.00000000e+00 ..., 1.26641655e-14\n",
......@@ -42,19 +43,18 @@
"With standard deviation: 5.035844\n",
"\n",
"\n",
" RMSE_test std_test RMSE_train std_train k_time\n",
" rmse_test std_test rmse_train std_train k_time\n",
"----------- ---------- ------------ ----------- --------\n",
" 10.0997 5.03584 2.68803 1.54162 50.9253\n",
" 10.0997 5.03584 2.68803 1.54162 0.484171\n",
"\n",
" --- This is a regression problem ---\n",
"\n",
"\n",
"\n",
" Loading dataset from file...\n",
"\n",
" Calculating kernel matrix, this could take a while...\n",
"\n",
" --- treelet kernel matrix of size 185 built in 49.581383228302 seconds ---\n",
" --- treelet kernel matrix of size 185 built in 0.5003015995025635 seconds ---\n",
"[[ 4.00000000e+00 2.60653066e+00 1.00000000e+00 ..., 1.26641655e-14\n",
" 1.26641655e-14 1.26641655e-14]\n",
" [ 2.60653066e+00 6.00000000e+00 1.00000000e+00 ..., 1.26641655e-14\n",
......@@ -78,9 +78,9 @@
"With standard deviation: 3.378376\n",
"\n",
"\n",
" RMSE_test std_test RMSE_train std_train k_time\n",
" rmse_test std_test rmse_train std_train k_time\n",
"----------- ---------- ------------ ----------- --------\n",
" 8.3079 3.37838 2.90887 1.2679 49.5814\n"
" 8.3079 3.37838 2.90887 1.2679 0.500302\n"
]
}
],
......@@ -97,12 +97,12 @@
"\n",
"kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n",
"\n",
"# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n",
"kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n",
"\n",
"kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = True)\n",
"\n",
"# %lprun -f spkernel \\\n",
"# kernel_train_test(datafile, kernel_file_path, spkernel, kernel_para, normalize = False)"
"# %lprun -f treeletkernel \\\n",
"# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)"
]
},
{
......@@ -116,12 +116,12 @@
"# with y normalization\n",
" RMSE_test std_test RMSE_train std_train k_time\n",
"----------- ---------- ------------ ----------- --------\n",
" 8.3079 3.37838 2.90887 1.2679 49.5814\n",
" 8.3079 3.37838 2.90887 1.2679 0.500302\n",
"\n",
"# without y normalization\n",
" RMSE_test std_test RMSE_train std_train k_time\n",
"----------- ---------- ------------ ----------- --------\n",
" 10.0997 5.03584 2.68803 1.54162 50.9253"
" 10.0997 5.03584 2.68803 1.54162 0.484171"
]
},
{
......
This diff is collapsed.
......@@ -17,13 +17,14 @@ The criteria used for prediction are SVM for classification and kernel Ridge reg
For predition we randomly divide the data in train and test subset, where 90% of entire dataset is for training and rest for testing. 10 splits are performed. For each split, we first train on the train data, then evaluate the performance on the test set. We choose the optimal parameters for the test set and finally provide the corresponding performance. The final results correspond to the average of the performances on the test sets.
| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time |
|---------------|:---------:|:--------:|-------------:|-------:|
| Shortest path | 35.19 | 4.50 | - | 14.58" |
| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" |
| Path | 14.00 | 6.93 | - | 36.21" |
| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" |
| Treelet | 8.31 | 3.38 | - | 49.58" |
| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time |
|---------------|:-------:|:------:|-------------:|-------:|
| Shortest path | 35.19 | 4.50 | - | 14.58" |
| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" |
| Path | 14.00 | 6.93 | - | 36.21" |
| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" |
| Treelet | 8.31 | 3.38 | - | 0.50" |
| Path up to d | 7.43 | 2.69 | depth = 2 | 0.59" |
* RMSE stands for arithmetic mean of the root mean squared errors on all splits.
* STD stands for standard deviation of the root mean squared errors on all splits.
......@@ -43,7 +44,12 @@ For predition we randomly divide the data in train and test subset, where 90% of
[5] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47.
[6] Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre Baldi. Graph kernels for chemical informatics. Neural networks, 18(8):1093–1110, 2005.
## Updates
### 2018.01.24
* ADD *path kernel up to depth d* and its result on dataset Asyclic.
* MOD treelet kernel, retrieve canonkeys of all graphs before calculate kernels, wildly speed it up.
### 2018.01.17
* ADD comments to code of treelet kernel. - linlin
### 2018.01.16
......
......@@ -10,7 +10,7 @@ from pygraph.kernels.deltaKernel import deltakernel
def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
"""Calculate mean average path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
......@@ -19,15 +19,15 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
Return
------
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs.
References
----------
[1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
......@@ -35,13 +35,13 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
some_graph = args[0][0] if len(args) == 1 else args[0] # only edge attributes of type int or float can be used as edge weight to calculate the shortest paths.
some_weight = list(nx.get_edge_attributes(some_graph, edge_label).values())[0]
weight = edge_label if isinstance(some_weight, float) or isinstance(some_weight, int) else None
if len(args) == 1: # for a list of graphs
Gn = args[0]
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], node_label, edge_label, weight = weight)
......@@ -49,34 +49,34 @@ def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
run_time = time.time() - start_time
print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
return Kmatrix, run_time
else: # for only 2 graphs
start_time = time.time()
kernel = _pathkernel_do(args[0], args[1], node_label, edge_label, weight = weight)
run_time = time.time() - start_time
print("\n --- mean average path kernel built in %s seconds ---" % (run_time))
return kernel, run_time
def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight = None):
"""Calculate mean average path kernel between 2 graphs.
Parameters
----------
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
weight : string/None
edge attribute used as weight to calculate the shortest path. The default edge label is None.
Return
------
kernel : float
......@@ -88,7 +88,7 @@ def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight
for node1 in range(num_nodes):
for node2 in range(node1 + 1, num_nodes):
sp1.append(nx.shortest_path(G1, node1, node2, weight = weight))
sp2 = []
num_nodes = G2.number_of_nodes()
for node1 in range(num_nodes):
......@@ -108,5 +108,5 @@ def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight
kernel += kernel_path # add up kernels of all paths
kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
return kernel
\ No newline at end of file
return kernel
......@@ -7,13 +7,14 @@ For predition we randomly divide the data in train and test subset, where 90% of
## Summary
| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time |
|---------------|:---------:|:--------:|-------------:|-------:|
| Shortest path | 35.19 | 4.50 | - | 14.58" |
| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" |
| Path | 14.00 | 6.94 | - | 37.58" |
| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" |
| Treelet | 8.31 | 3.38 | - | 49.58" |
| Kernels | RMSE(℃) | STD(℃) | Parameter | k_time |
|---------------|:-------:|:------:|-------------:|-------:|
| Shortest path | 35.19 | 4.50 | - | 14.58" |
| Marginalized | 18.02 | 6.29 | p_quit = 0.1 | 4'19" |
| Path | 14.00 | 6.94 | - | 37.58" |
| WL subtree | 7.55 | 2.33 | height = 1 | 0.84" |
| Treelet | 8.31 | 3.38 | - | 0.50" |
| Path up to d | 7.43 | 2.69 | depth = 2 | 0.52" |
* RMSE stands for arithmetic mean of the root mean squared errors on all splits.
* STD stands for standard deviation of the root mean squared errors on all splits.
......@@ -76,9 +77,46 @@ The table below shows the results of the WL subtree under different subtree heig
```
### Treelet kernel
**The targets of training data are normalized before calculating the kernel.**
**The targets of training data are normalized before calculating the kernel.**
```
RMSE_test std_test RMSE_train std_train k_time
----------- ---------- ------------ ----------- --------
8.3079 3.37838 2.90887 1.2679 49.5814
8.3079 3.37838 2.90887 1.2679 0.500302
```
### Path kernel up to depth *d*
The table below shows the results of the path kernel up to different depth *d*.
The first table is the results using Tanimoto kernel, where **The targets of training data are normalized before calculating the kernel.**.
```
depth rmse_test std_test rmse_train std_train k_time
------- ----------- ---------- ------------ ----------- ---------
0 41.6202 6.453 43.6169 2.13212 0.0904737
1 38.8446 6.44648 40.8329 3.44147 0.175414
2 35.2915 4.7813 35.7461 1.61134 0.344896
3 29.4845 3.90351 28.4646 3.00137 0.553939
4 22.6693 6.28053 19.2517 3.42893 0.770649
5 21.7956 5.5225 16.886 2.60519 1.01558
6 20.6049 5.49983 13.1097 2.58431 1.33302
7 20.3479 5.17631 12.0152 2.5928 1.60266
8 19.8228 5.13769 10.7981 2.13082 1.81218
9 19.8734 5.10369 10.7997 2.09549 2.21726
10 19.8708 5.09217 10.7787 2.10002 2.41006
```
The second table is the results using MinMax kernel.
```
depth rmse_test std_test rmse_train std_train k_time
------- ----------- ---------- ------------ ----------- --------
0 12.58 2.73235 12.1209 0.500467 0.377576
1 12.6215 2.18866 10.2243 0.734261 0.456332
2 7.42903 2.69395 2.71885 0.732922 0.585278
3 9.02468 2.50808 1.54 1.13813 0.706556
4 10.0811 3.6477 1.36029 1.42399 0.847957
5 11.3005 4.44163 1.08518 1.06206 1.00086
6 12.186 4.88816 1.06443 1.00191 1.19792
7 12.7534 5.14529 1.19912 1.34031 1.4372
8 13.0471 5.27184 1.35822 1.84315 1.68449
9 13.1789 5.27707 1.36002 1.84834 1.96545
10 13.2538 5.26425 1.36208 1.85426 2.24943
```
"""
@author: linlin
@references: Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47.
"""
import sys
import pathlib
sys.path.insert(0, "../")
......@@ -38,9 +43,13 @@ def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled
start_time = time.time()
# get all canonical keys of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
canonkeys = [ get_canonkeys(Gn[i], node_label = node_label, edge_label = edge_label, labeled = labeled) \
for i in range(0, len(Gn)) ]
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _treeletkernel_do(Gn[i], Gn[j], node_label = node_label, edge_label = edge_label, labeled = labeled)
Kmatrix[i][j] = _treeletkernel_do(canonkeys[i], canonkeys[j], node_label = node_label, edge_label = edge_label, labeled = labeled)
Kmatrix[j][i] = Kmatrix[i][j]
run_time = time.time() - start_time
......@@ -51,8 +60,11 @@ def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled
else: # for only 2 graphs
start_time = time.time()
canonkey1 = get_canonkeys(args[0], node_label = node_label, edge_label = edge_label, labeled = labeled)
canonkey2 = get_canonkeys(args[1], node_label = node_label, edge_label = edge_label, labeled = labeled)
kernel = _treeletkernel_do(args[0], args[1], node_label = node_label, edge_label = edge_label, labeled = labeled)
kernel = _treeletkernel_do(canonkey1, canonkey2, node_label = node_label, edge_label = edge_label, labeled = labeled)
run_time = time.time() - start_time
print("\n --- treelet kernel built in %s seconds ---" % (run_time))
......@@ -60,17 +72,17 @@ def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled
return kernel, run_time
def _treeletkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', labeled = True):
def _treeletkernel_do(canonkey1, canonkey2, node_label = 'atom', edge_label = 'bond_type', labeled = True):
"""Calculate treelet graph kernel between 2 graphs.
Parameters
----------
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
canonkey1, canonkey2 : list
List of canonical keys in 2 graphs, where each key is represented by a string.
node_label : string
node attribute used as label. The default node label is atom.
Node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
Edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.
......@@ -79,12 +91,9 @@ def _treeletkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', lab
kernel : float
Treelet Kernel between 2 graphs.
"""
canonkey1 = get_canonkeys(G1, node_label = node_label, edge_label = edge_label, labeled = labeled)
canonkey2 = get_canonkeys(G2, node_label = node_label, edge_label = edge_label, labeled = labeled)
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
vector1 = np.matrix([ (canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys ])
vector2 = np.matrix([ (canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys ])
vector1 = np.array([ (canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys ])
vector2 = np.array([ (canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys ])
kernel = np.sum(np.exp(- np.square(vector1 - vector2) / 2))
return kernel
......@@ -108,10 +117,6 @@ def get_canonkeys(G, node_label = 'atom', edge_label = 'bond_type', labeled = Tr
------
canonkey/canonkey_l : dict
For unlabeled graphs, canonkey is a dictionary which records amount of every tree pattern. For labeled graphs, canonkey_l is one which keeps track of amount of every treelet.
References
----------
[1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47.
"""
patterns = {} # a dictionary which consists of lists of patterns for all graphlet.
canonkey = {} # canonical key, a dictionary which records amount of every tree pattern.
......
"""
@author: linlin
@references: Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre Baldi. Graph kernels for chemical informatics. Neural networks, 18(8):1093–1110, 2005.
"""
import sys
import pathlib
sys.path.insert(0, "../")
import time
from collections import Counter
import networkx as nx
import numpy as np
def untildpathkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, depth = 10, k_func = 'tanimoto'):
"""Calculate path graph kernels up to depth d between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.
depth : integer
Depth of search. Longest length of paths.
k_func : function
A kernel function used using different notions of fingerprint similarity.
Return
------
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the path kernel up to d between 2 praphs. / Path kernel up to d between 2 graphs.
"""
depth = int(depth)
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
start_time = time.time()
# get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
all_paths = [ find_all_paths_until_length(Gn[i], depth, node_label = node_label, edge_label = edge_label, labeled = labeled) for i in range(0, len(Gn)) ]
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _untildpathkernel_do(all_paths[i], all_paths[j], k_func, node_label = node_label, edge_label = edge_label, labeled = labeled)
Kmatrix[j][i] = Kmatrix[i][j]
run_time = time.time() - start_time
print("\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" % (depth, len(Gn), run_time))
return Kmatrix, run_time
else: # for only 2 graphs
start_time = time.time()
all_paths1 = find_all_paths_until_length(args[0], depth, node_label = node_label, edge_label = edge_label, labeled = labeled)
all_paths2 = find_all_paths_until_length(args[1], depth, node_label = node_label, edge_label = edge_label, labeled = labeled)
kernel = _untildpathkernel_do(all_paths1, all_paths2, k_func, node_label = node_label, edge_label = edge_label, labeled = labeled)
run_time = time.time() - start_time
print("\n --- path kernel up to %d built in %s seconds ---" % (depth, run_time))
return kernel, run_time
def _untildpathkernel_do(paths1, paths2, k_func, node_label = 'atom', edge_label = 'bond_type', labeled = True):
"""Calculate path graph kernels up to depth d between 2 graphs.
Parameters
----------
paths1, paths2 : list
List of paths in 2 graphs, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path.
k_func : function
A kernel function used using different notions of fingerprint similarity.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.
Return
------
kernel : float
Treelet Kernel between 2 graphs.
"""
all_paths = list(set(paths1 + paths2))
if k_func == 'tanimoto':
vector1 = [ (1 if path in paths1 else 0) for path in all_paths ]
vector2 = [ (1 if path in paths2 else 0) for path in all_paths ]
kernel_uv = np.dot(vector1, vector2)
kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
else: # MinMax kernel
path_count1 = Counter(paths1)
path_count2 = Counter(paths2)
vector1 = [ (path_count1[key] if (key in path_count1.keys()) else 0) for key in all_paths ]
vector2 = [ (path_count2[key] if (key in path_count2.keys()) else 0) for key in all_paths ]
kernel = np.sum(np.minimum(vector1, vector2)) / np.sum(np.maximum(vector1, vector2))
return kernel
# this method find paths repetively, it could be faster.
def find_all_paths_until_length(G, length, node_label = 'atom', edge_label = 'bond_type', labeled = True):
"""Find all paths with a certain maximum length in a graph. A recursive depth first search is applied.
Parameters
----------
G : NetworkX graphs
The graph in which paths are searched.
length : integer
The maximum length of paths.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.
Return
------
path : list
List of paths retrieved, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path.
"""
all_paths = []
for i in range(0, length + 1):
new_paths = find_all_paths(G, i)
if new_paths == []:
break
all_paths.extend(new_paths)
if labeled == True: # convert paths to strings
path_strs = []
for path in all_paths:
strlist = [ G.node[node][node_label] + G[node][path[path.index(node) + 1]][edge_label] for node in path[:-1] ]
path_strs.append(''.join(strlist) + G.node[path[-1]][node_label])
return path_strs
return all_paths
def find_paths(G, source_node, length):
"""Find all paths with a certain length those start from a source node. A recursive depth first search is applied.
Parameters
----------
G : NetworkX graphs
The graph in which paths are searched.
source_node : integer
The number of the node from where all paths start.
length : integer
The length of paths.
Return
------
path : list of list
List of paths retrieved, where each path is represented by a list of nodes.
"""
return [[source_node]] if length == 0 else \
[ [source_node] + path for neighbor in G[source_node] \
for path in find_paths(G, neighbor, length - 1) if source_node not in path ]
def find_all_paths(G, length):
"""Find all paths with a certain length in a graph. A recursive depth first search is applied.
Parameters
----------
G : NetworkX graphs
The graph in which paths are searched.
length : integer
The length of paths.
Return
------
path : list of list
List of paths retrieved, where each path is represented by a list of nodes.
"""
all_paths = []
for node in G:
all_paths.extend(find_paths(G, node, length))
### The following process is not carried out according to the original article
# all_paths_r = [ path[::-1] for path in all_paths ]
# # For each path, two presentation are retrieved from its two extremities. Remove one of them.
# for idx, path in enumerate(all_paths[:-1]):
# for path2 in all_paths_r[idx+1::]:
# if path == path2:
# all_paths[idx] = []
# break
# return list(filter(lambda a: a != [], all_paths))
return all_paths
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment