Commit d8a96ce4 authored by jajupmochi's avatar jajupmochi

Merge branch 'master' of https://git.litislab.fr/bgauzere/py-graph

add pygraph/kernels/spkernel.py
modify pygraph/utils/util.py and pygraph/utils/graphfiles.py
parents 4ade070e e1c2c3ac
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import paths\n",
"\n",
"import pygraph\n",
"\n",
"from pygraph.utils.graphfiles import loadDataset\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import networkx as nx\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# We load a ds dataset\n",
"# load it from https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz\n",
"dataset, y = loadDataset(\"/home/bgauzere/work/Datasets/Acyclic/dataset_bps.ds\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 183/183 [07:41<00:00, 2.52s/it]\n",
"100%|██████████| 183/183 [08:39<00:00, 2.84s/it]\n",
"100%|██████████| 183/183 [05:19<00:00, 1.75s/it]\n",
"100%|██████████| 183/183 [05:50<00:00, 1.91s/it]\n"
]
}
],
"source": [
"#Compute graph edit distances\n",
"\n",
"from tqdm import tqdm\n",
"from pygraph.c_ext.lsape_binders import lsap_solverHG\n",
"from pygraph.ged.costfunctions import ConstantCostFunction\n",
"from pygraph.ged.GED import ged\n",
"import time\n",
"\n",
"cf = ConstantCostFunction(1,3,1,3)\n",
"N=len(dataset)\n",
"\n",
"methods=['Riesen + LSAP', 'Neigh + LSAP', 'Riesen + LSAPE', 'Neigh + LSAPE']\n",
"ged_distances = [ np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N)), np.zeros((N,N))]\n",
"\n",
"times = list()\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[0][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen')[0]\n",
"times.append(time.clock() - start)\n",
"\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[1][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood')[0]\n",
"\n",
"times.append(time.clock() - start)\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[2][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Riesen',solver=lsap_solverHG)[0]\n",
"times.append(time.clock() - start)\n",
"\n",
"start = time.clock()\n",
"for i in tqdm(range(0,N)):\n",
" for j in range(0,N):\n",
" ged_distances[3][i,j] = ged(dataset[i],dataset[j],cf=cf, method='Neighboorhood',solver=lsap_solverHG)[0]\n",
"times.append(time.clock() - start)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"autoscroll": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" method \t mean \t mean \t time\n",
" Riesen + LSAP \t 37.79903849025053 \t 35.31207262086058 \t 463.300405 \n",
" Neigh + LSAP \t 36.2281047508137 \t 33.85869987159963 \t 521.7821730000001 \n",
" Riesen + LSAPE \t 35.95508973095643 \t 34.10092866314312 \t 319.83455500000014 \n",
" Neigh + LSAPE \t 34.5005822807489 \t 32.5735614679447 \t 350.48029599999995 \n"
]
}
],
"source": [
"print(\" method \\t mean \\t mean \\t time\")\n",
"data = list()\n",
"for i in range(0,len(ged_distances)):\n",
" ged_ = np.minimum(ged_distances[i],ged_distances[i].transpose())\n",
" print(\" {} \\t {} \\t {} \\t {} \".format(methods[i], np.mean(ged_distances[i]),np.mean(ged_), times[i]))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
},
"name": "py-graph_test.ipynb"
},
"nbformat": 4,
"nbformat_minor": 2
}
# -*-coding:utf-8 -*-
"""
Pygraph
This package contains 4 sub packages :
* c_ext : binders to C++ code
* ged : allows to compute graph edit distance between networkX graphs
* kernels : computation of graph kernels, ie graph similarity measure compatible with SVM
* notebooks : examples of code using this library
* utils : Diverse computation on graphs
"""
# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"
# import sub modules
from pygraph import c_ext
from pygraph import ged
from pygraph import utils
# You must specify your env variable LSAPE_DIR
#LSAPE_DIR=/home/bgauzere/Téléchargements/lsape/include/
liblsap.so:lsap.cpp
g++ -fPIC -I/home/bgauzere/Téléchargements/lsape/include/ -shared lsap.cpp -o liblsap.so -O3 -I$(LSAPE_DIR)
Python wrapper for lsape method
Specify your LSAPE_DIR env variable with the location of the source
code to compile
source code : https://bougleux.users.greyc.fr/lsape/
# -*-coding:utf-8 -*-
"""Pygraph - c_ext module
This package binds some C++ code to python
lsape_binders.py : binders to C++ code of LSAPE methods implemented in
https://bougleux.users.greyc.fr/lsape/
"""
# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"
# import sub modules
from pygraph.c_ext import lsape_binders
/*
Python wrapper
*/
#include "hungarian-lsape.hh"
#include "hungarian-lsap.hh"
#include <cstdio>
extern "C" int lsap(double * C, const int nm, long * rho, long * varrho){
double * u = new double[nm];
double * v = new double[nm];
int * rho_int = new int[nm];
int * varrho_int = new int[nm];
hungarianLSAP(C,nm,nm,rho_int,u,v,varrho_int);
//Find a better way to do
for (int i =0;i<nm;i++){
rho[i] = (long)(rho_int[i]);
varrho[i] = (long)(varrho_int[i]);
}
return 0;
}
extern "C" int * lsape(double * C, const int n, const int m, long * rho, long * varrho){
double * u = new double[n];
double * v = new double[m];
int * rho_int = new int[n];
int * varrho_int = new int[m];
hungarianLSAPE(C,n,m,rho_int,varrho_int,u,v);
for (int i =0;i<n;i++)
rho[i] = (long)(rho_int[i]);
for (int i =0;i<m;i++)
varrho[i] = (long)(varrho_int[i]);
return 0;
}
import numpy as np
import ctypes as c
from ctypes import cdll
import os.path
def lsap_solverHG(C):
''' Binding for lsape hungarian solver '''
nm = C.shape[0]
dll_name = 'liblsap.so'
lib = cdll.LoadLibrary(os.path.abspath(
os.path.join(os.path.dirname(__file__), dll_name)))
lib.lsap.restype = c.c_int
rho = np.zeros((nm, 1), int)
varrho = np.zeros((nm, 1), int)
C[C == np.inf] = 10000
lib.lsap(c.c_void_p(C.transpose().ctypes.data),
c.c_int(nm),
c.c_void_p(rho.ctypes.data),
c.c_void_p(varrho.ctypes.data))
return np.array(range(0, nm)), np.array([c.c_int(i).value for i in varrho])
from ged.costfunctions import BasicCostFunction, RiesenCostFunction
from ged.costfunctions import NeighboorhoodCostFunction
from ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping
from pygraph.ged.costfunctions import ConstantCostFunction, RiesenCostFunction
from pygraph.ged.costfunctions import NeighboorhoodCostFunction
from pygraph.ged.bipartiteGED import computeBipartiteCostMatrix, getOptimalMapping
from scipy.optimize import linear_sum_assignment
def ged(G1, G2, method='Riesen', rho=None, varrho=None,
cf=BasicCostFunction(1, 3, 1, 3)):
cf=ConstantCostFunction(1, 3, 1, 3),
solver=linear_sum_assignment):
"""Compute Graph Edit Distance between G1 and G2 according to mapping
encoded within rho and varrho. Graph's node must be indexed by a
index which is used is rho and varrho
......@@ -14,31 +15,32 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None,
"""
if ((rho is None) or (varrho is None)):
if(method == 'Riesen'):
cf_bp = RiesenCostFunction(cf)
cf_bp = RiesenCostFunction(cf,lsap_solver=solver)
elif(method == 'Neighboorhood'):
cf_bp = NeighboorhoodCostFunction(cf)
cf_bp = NeighboorhoodCostFunction(cf,lsap_solver=solver)
elif(method == 'Basic'):
cf_bp = cf
else:
raise NameError('Non existent method ')
rho, varrho = getOptimalMapping(computeBipartiteCostMatrix(G1, G2, cf_bp))
rho, varrho = getOptimalMapping(
computeBipartiteCostMatrix(G1, G2, cf_bp), lsap_solver=solver)
n = G1.number_of_nodes()
m = G2.number_of_nodes()
ged = 0
for i in G1.nodes_iter():
for i in G1.nodes():
phi_i = rho[i]
if(phi_i >= m):
ged += cf.cnd(i, G1)
else:
ged += cf.cns(i, phi_i, G1, G2)
for j in G2.nodes_iter():
for j in G2.nodes():
phi_j = varrho[j]
if(phi_j >= n):
ged += cf.cni(j, G2)
for e in G1.edges_iter(data=True):
for e in G1.edges(data=True):
i = e[0]
j = e[1]
phi_i = rho[i]
......@@ -49,13 +51,13 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None,
if(mappedEdge):
e2 = [phi_i, phi_j, G2[phi_i][phi_j]]
min_cost = min(cf.ces(e, e2, G1, G2),
cf.ced(e, G1), cf.cei(e2, G2))
cf.ced(e, G1) + cf.cei(e2, G2))
ged += min_cost
else:
ged += cf.ced(e, G1)
else:
ged += cf.ced(e, G1)
for e in G2.edges_iter(data=True):
for e in G2.edges(data=True):
i = e[0]
j = e[1]
phi_i = varrho[i]
......@@ -68,7 +70,3 @@ def ged(G1, G2, method='Riesen', rho=None, varrho=None,
else:
ged += cf.ced(e, G2)
return ged, rho, varrho
def computeDistanceMatrix(dataset):
pass
# -*-coding:utf-8 -*-
"""Pygraph - ged module
Implement some methods to compute ged between graphs
"""
# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"
from pygraph.ged import costfunctions
from pygraph.ged import bipartiteGED
from pygraph.ged import GED
import numpy as np
from scipy.optimize import linear_sum_assignment
from ged.costfunctions import BasicCostFunction
from pygraph.ged.costfunctions import ConstantCostFunction
def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)):
def computeBipartiteCostMatrix(G1, G2, cf=ConstantCostFunction(1, 3, 1, 3)):
"""Compute a Cost Matrix according to cost function cf"""
n = G1.number_of_nodes()
m = G2.number_of_nodes()
......@@ -11,23 +11,23 @@ def computeBipartiteCostMatrix(G1, G2, cf=BasicCostFunction(1, 3, 1, 3)):
C = np.ones([nm, nm])*np.inf
C[n:, m:] = 0
for u in G1.nodes_iter():
for v in G2.nodes_iter():
for u in G1.nodes():
for v in G2.nodes():
cost = cf.cns(u, v, G1, G2)
C[u, v] = cost
for v in G1.nodes_iter():
for v in G1.nodes():
C[v, m + v] = cf.cnd(v, G1)
for v in G2.nodes_iter():
for v in G2.nodes():
C[n + v, v] = cf.cni(v, G2)
return C
def getOptimalMapping(C):
def getOptimalMapping(C, lsap_solver=linear_sum_assignment):
"""Compute an optimal linear mapping according to cost Matrix C
inclure les progs C de Seb
"""
row_ind, col_ind = linear_sum_assignment(C)
row_ind, col_ind = lsap_solver(C)
return col_ind, row_ind[np.argsort(col_ind)]
......@@ -2,15 +2,17 @@ import numpy as np
from scipy.optimize import linear_sum_assignment
class BasicCostFunction:
class ConstantCostFunction:
""" Define a symmetric constant cost fonction for edit operations """
def __init__(self, cns, cni, ces, cei):
self.cns_ = cns
self.cni_ = self.cnd_ = cni
self.ces_ = ces
self.cei_ = self.ced_ = cei
def cns(self, u, v, G1, G2):
return (G1.node[u]['label'] != G2.node[v]['label'])*self.cns_
def cns(self, node_u, node_v, g1, g2):
""" return substitution edit operation cost between node_u of G1 and node_v of G2"""
return (g1.node[node_u]['label'] != g2.node[node_v]['label'])*self.cns_
def cnd(self, u, G1):
return self.cnd_
......@@ -30,9 +32,11 @@ class BasicCostFunction:
return self.cei_
class RiesenCostFunction(BasicCostFunction):
def __init__(self, cf):
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_)
class RiesenCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver
def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
......@@ -48,41 +52,43 @@ class RiesenCostFunction(BasicCostFunction):
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.ces(e1, e2, G1, G2)
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
j += 1
i += 1
i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
i += 1
j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
j += 1
row_ind, col_ind = linear_sum_assignment(sub_C)
row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return BasicCostFunction.cns(self, u, v, G1, G2) + cost
return self.cf_.cns(u, v, G1, G2) + cost
def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += BasicCostFunction.ced(self,[u,nbr,G1[u][nbr]],G1)
cost += self.cf_.ced([u,nbr,G1[u][nbr]],G1)
return BasicCostFunction.cnd(self,u,G1) + cost
return self.cf_.cnd(u,G1) + cost
def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += BasicCostFunction.cei(self, [v,nbr,G2[v][nbr]], G2)
cost += self.cf_.cei([v,nbr,G2[v][nbr]], G2)
return BasicCostFunction.cni(self, v, G2) + cost
return self.cf_.cni(v, G2) + cost
class NeighboorhoodCostFunction(BasicCostFunction):
def __init__(self, cf):
BasicCostFunction.__init__(self, cf.cns_, cf.cni_, cf.ces_, cf.cei_)
class NeighboorhoodCostFunction():
""" Cost function associated to the computation of a cost matrix between nodes for LSAP"""
def __init__(self, cf, lsap_solver=linear_sum_assignment):
self.cf_ = cf
self.lsap_solver_ = lsap_solver
def cns(self, u, v, G1, G2):
""" u et v sont des id de noeuds """
......@@ -98,36 +104,35 @@ class NeighboorhoodCostFunction(BasicCostFunction):
e1 = [u, nbr_u, G1[u][nbr_u]]
for nbr_v in G2[v]:
e2 = [v, nbr_v, G2[v][nbr_v]]
sub_C[i, j] = self.ces(e1, e2, G1, G2)
sub_C[i, j] += BasicCostFunction.cns(self,
nbr_u, nbr_v, G1, G2)
sub_C[i, j] = self.cf_.ces(e1, e2, G1, G2)
sub_C[i, j] += self.cf_.cns(nbr_u, nbr_v, G1, G2)
j += 1
i += 1
i = 0
for nbr_u in l_nbr_u:
sub_C[i, m+i] = self.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] += BasicCostFunction.cnd(self, nbr_u, G1)
sub_C[i, m+i] = self.cf_.ced([u, nbr_u, G1[u][nbr_u]], G1)
sub_C[i, m+i] += self.cf_.cnd(nbr_u, G1)
i += 1
j = 0
for nbr_v in l_nbr_v:
sub_C[n+j, j] = self.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] += BasicCostFunction.cni(self, nbr_v, G2)
sub_C[n+j, j] = self.cf_.cei([v, nbr_v, G2[v][nbr_v]], G2)
sub_C[n+j, j] += self.cf_.cni(nbr_v, G2)
j += 1
row_ind, col_ind = linear_sum_assignment(sub_C)
row_ind, col_ind = self.lsap_solver_(sub_C)
cost = np.sum(sub_C[row_ind, col_ind])
return BasicCostFunction.cns(self, u, v, G1, G2) + cost
return self.cf_.cns(u, v, G1, G2) + cost
def cnd(self, u, G1):
cost = 0
for nbr in G1[u]:
cost += BasicCostFunction.ced(self, [u, nbr, G1[u][nbr]], G1)
return BasicCostFunction.cnd(self, u, G1) + cost
cost += self.cf_.ced([u, nbr, G1[u][nbr]], G1)
return self.cf_.cnd(u, G1) + cost
def cni(self, v, G2):
cost = 0
for nbr in G2[v]:
cost += BasicCostFunction.cei(self, [v, nbr, G2[v][nbr]], G2)
return BasicCostFunction.cni(self, v, G2) + cost
cost += self.cf_.cei([v, nbr, G2[v][nbr]], G2)
return self.cf_.cni(v, G2) + cost
import sys
import pathlib
sys.path.insert(0, "../")
import networkx as nx
import numpy as np
import time
from utils.utils import getSPGraph
def spkernel(*args):
"""Calculate shortest-path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
Kmatrix/Kernel : Numpy matrix/int
Kernel matrix, each element of which is the sp kernel between 2 praphs. / SP Kernel between 2 graphs.
References
----------
[1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
"""
if len(args) == 1: # for a list of graphs
Gn = args[0]
Kmatrix = np.zeros((len(Gn), len(Gn)))
Sn = [] # get shortest path graphs of Gn
for i in range(0, len(Gn)):
Sn.append(getSPGraph(Gn[i]))
start_time = time.time()
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Sn[i].edges(data = True):
for e2 in Sn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] += (0 if i == j else 1)
print("--- shortest path kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
return Kmatrix
else: # for only 2 graphs
G1 = args[0]
G2 = args[1]
kernel = 0
for e1 in G1.edges(data = True):
for e2 in G2.edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
kernel += 1
print("--- shortest path kernel built in %s seconds ---" % (time.time() - start_time))
return kernel
\ No newline at end of file
# -*-coding:utf-8 -*-
"""Pygraph - utils module
Implement some methods to manage graphs
graphfiles.py : load .gxl and .ct files
utils.py : compute some properties on networkX graphs
"""
# info
__version__ = "0.1"
__author__ = "Benoit Gaüzère"
__date__ = "November 2017"
from pygraph.utils import graphfiles
from pygraph.utils import utils
import networkx as nx
def loadCT(filename):
"""load data from .ct file.
Notes
------
a typical example of data in .ct is like this:
3 2 <- number of nodes and edges
0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?
0.0000 0.0000 0.0000 C
0.0000 0.0000 0.0000 O
1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?
2 3 1 1
"""
content = open(filename).read().splitlines()
G = nx.Graph(name=str(content[0]))
G = nx.Graph(name=str(content[0])) # set name of the graph
tmp = content[1].split(" ")
if tmp[0] == '':
nb_nodes = int(tmp[1])
nb_edges = int(tmp[2])
nb_nodes = int(tmp[1]) # number of the nodes
nb_edges = int(tmp[2]) # number of the edges
else:
nb_nodes = int(tmp[0])
nb_edges = int(tmp[1])
......@@ -18,7 +30,7 @@ def loadCT(filename):
G.add_node(i, label=tmp[3])
for i in range(0, nb_edges):
tmp = content[i+G.number_of_nodes()+2].split(" ")
tmp = content[i + G.number_of_nodes() + 2].split(" ")
tmp = [x for x in tmp if x != '']
G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
return G
......@@ -43,9 +55,10 @@ def loadGXL(filename):
label = edge.find('attr')[0].text
G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
return G
def loadDataset(filename):
"""load file list of the dataset.
"""
from os.path import dirname, splitext
dirname_dataset = dirname(filename)
......@@ -56,7 +69,7 @@ def loadDataset(filename):
content = open(filename).read().splitlines()
for i in range(0, len(content)):
tmp = content[i].split(' ')
data.append(loadCT(dirname_dataset + '/' + tmp[0]))
data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
y.append(float(tmp[1]))