From 94a49fa8898d524d4845eea41c00b439bd50601f Mon Sep 17 00:00:00 2001 From: Yann SOULLARD <yann.soullard@univ-rouen.fr> Date: Mon, 8 Apr 2019 10:45:46 +0200 Subject: [PATCH] Update summary and improvement in load_model --- CTCModel.py | 190 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 76 deletions(-) diff --git a/CTCModel.py b/CTCModel.py index 4e3025a..1c38972 100644 --- a/CTCModel.py +++ b/CTCModel.py @@ -3,8 +3,6 @@ import tensorflow as tf import numpy as np import os - -#import warpctc_tensorflow from keras import Input from keras.engine import Model from keras.layers import Lambda @@ -21,7 +19,6 @@ from keras.layers import TimeDistributed, Activation, Dense #from ocr_ctc.utils.utils_keras import Kreshape_To1D from keras.preprocessing import sequence - """ authors: Yann Soullard, Cyprien Ruffino (2017) LITIS lab, university of Rouen (France) @@ -69,7 +66,6 @@ class CTCModel: self.charset = charset - def compile(self, optimizer): """ Configures the CTC Model for training. @@ -534,7 +530,6 @@ class CTCModel: return outmetrics - def predict_on_batch(self, x): """Returns predictions for a single batch of samples. @@ -544,10 +539,11 @@ class CTCModel: # Returns Numpy array(s) of predictions. """ - batch_size = x[0].shape[0] - - return self.predict(x, batch_size=batch_size) + #batch_size = x[0].shape[0] + out = self.model_pred.predict_on_batch(x) + output = [[pr for pr in pred if pr != -1] for pred in out] + return output # self.predict(x, batch_size=batch_size) def predict_generator(self, generator, steps, max_queue_size=10, @@ -559,7 +555,7 @@ class CTCModel: The generator should return the same kind of data as accepted by `predict_on_batch`. - + generator = DataGenerator class that returns: x = Input data as a 3D Tensor (batch_size, max_input_len, dim_features) y = Input data as a 2D Tensor (batch_size, max_label_len) @@ -654,7 +650,7 @@ class CTCModel: all_lab.append([]) for i, out in enumerate(outs): - all_outs[i].append([val_out for val_out in out if val_out!=-1]) + all_outs[i].append([val_out for val_out in out if val_out != -1]) if isinstance(y_length[i], list): all_lab[i].append(y[i][:y_length[i][0]]) elif isinstance(y_length[i], int): @@ -678,7 +674,7 @@ class CTCModel: lab_out = [] for i in range(nb_data): pred_out += [all_outs[b][i] for b in range(batch_size)] - lab_out += [all_lab[b][i] for b in range(batch_size)] + lab_out += [all_lab[b][i] for b in range(batch_size)] if decode_func is not None: # convert model prediction (a label between 0 to nb_labels to an original label sequence) pred_out = decode_func(pred_out, self.charset) @@ -687,12 +683,16 @@ class CTCModel: return pred_out, lab_out + def summary(self): + return self.model_train.summary() + + def predict(self, x, batch_size=None, verbose=0, steps=None, max_len=None, max_value=999): """ The same function as in the Keras Model but with a different function predict_loop for dealing with variable length predictions Except that x = [x_features, x_len] - + Generates output predictions for the input samples. Computation is done in batches. @@ -747,16 +747,17 @@ class CTCModel: self.model_pred._make_predict_function() f = self.model_pred.predict_function out = self._predict_loop(f, ins, batch_size=batch_size, max_value=max_value, - verbose=verbose, steps=steps, max_len=max_len) + verbose=verbose, steps=steps, max_len=max_len) - out_decode = [dec_data[:list(dec_data).index(max_value)] if max_value in dec_data else dec_data for i,dec_data in enumerate(out)] + out_decode = [dec_data[:list(dec_data).index(max_value)] if max_value in dec_data else dec_data for i, dec_data + in enumerate(out)] return out_decode def _predict_loop(self, f, ins, max_len=100, max_value=999, batch_size=32, verbose=0, steps=None): """Abstract method to loop over some data in batches. Keras function that has been modified. - + # Arguments f: Keras function returning a list of tensors. ins: list of tensors to be fed to `f`. @@ -819,17 +820,17 @@ class CTCModel: if batch_index == 0: # Pre-allocate the results arrays. for batch_out in batch_outs: - shape = (num_samples,max_len) + shape = (num_samples, max_len) outs.append(np.zeros(shape, dtype=batch_out.dtype)) for i, batch_out in enumerate(batch_outs): - outs[i][batch_start:batch_end] = sequence.pad_sequences(batch_out, value=float(max_value), maxlen=max_len, - dtype=batch_out.dtype, padding="post") + outs[i][batch_start:batch_end] = sequence.pad_sequences(batch_out, value=float(max_value), + maxlen=max_len, + dtype=batch_out.dtype, padding="post") if len(outs) == 1: return outs[0] return outs - @staticmethod def ctc_loss_lambda_func(args): """ @@ -841,7 +842,7 @@ class CTCModel: y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length)#, ignore_longer_outputs_than_inputs=True) - # return warpctc_tensorflow.ctc(y_pred, labels, label_length, input_length) + @staticmethod def ctc_complete_decoding_lambda_func(args, **arguments): @@ -944,7 +945,8 @@ class CTCModel: output.close() - def load_model(self, path_dir, optimizer, file_weights=None, change_parameters=False, init_last_layer=False): + def load_model(self, path_dir, optimizer, init_archi=True, file_weights=None, change_parameters=False, \ + init_last_layer=False, add_layers=None, trainable=False, removed_layers=2): """ Load a model in path_dir load model_train, model_pred and model_eval from json load inputs and outputs from json @@ -952,44 +954,52 @@ class CTCModel: :param path_dir: directory where the model is saved :param optimizer: The optimizer used during training + :param init_archi: load an architecture from json. Otherwise, the network archtiecture muste be initialized. + :param file_weights: weights to load (None = default parameters are returned). + :param init_last_layer: reinitialize the last layer using self.charset to get the number of labels. + :param add_layers: add some layers. None for no change in the network architecture. Otherwise, add_layers contains + a list of layers to add after the last layer of the current architecture. + :param trainable: in case of add_layers, lower layers can be not trained again. + :param removed_layers: remove the last layers of the current architecture. It is applied before adding new layers using add_layers. """ + if init_archi: - json_file = open(path_dir + '/model_train.json', 'r') - loaded_model_json = json_file.read() - json_file.close() - self.model_train = model_from_json(loaded_model_json) + json_file = open(path_dir + '/model_train.json', 'r') + loaded_model_json = json_file.read() + json_file.close() + self.model_train = model_from_json(loaded_model_json) - json_file = open(path_dir + '/model_pred.json', 'r') - loaded_model_json = json_file.read() - json_file.close() - self.model_pred = model_from_json(loaded_model_json, custom_objects={"tf": tf}) + json_file = open(path_dir + '/model_pred.json', 'r') + loaded_model_json = json_file.read() + json_file.close() + self.model_pred = model_from_json(loaded_model_json, custom_objects={"tf": tf}) - json_file = open(path_dir + '/model_eval.json', 'r') - loaded_model_json = json_file.read() - json_file.close() - self.model_eval = model_from_json(loaded_model_json, custom_objects={"tf": tf, "ctc": ctc, - "tf_edit_distance": tf_edit_distance, - "Kreshape_To1D": Kreshape_To1D}) + json_file = open(path_dir + '/model_eval.json', 'r') + loaded_model_json = json_file.read() + json_file.close() + self.model_eval = model_from_json(loaded_model_json, custom_objects={"tf": tf, "ctc": ctc, + "tf_edit_distance": tf_edit_distance, + "Kreshape_To1D": Kreshape_To1D}) - json_file = open(path_dir + '/model_init.json', 'r') - loaded_model_json = json_file.read() - json_file.close() - self.model_init = model_from_json(loaded_model_json, custom_objects={"tf": tf}) + json_file = open(path_dir + '/model_init.json', 'r') + loaded_model_json = json_file.read() + json_file.close() + self.model_init = model_from_json(loaded_model_json, custom_objects={"tf": tf}) - self.inputs = self.model_init.inputs - self.outputs = self.model_init.outputs + self.inputs = self.model_init.inputs + self.outputs = self.model_init.outputs - input = open(path_dir + "/model_param.pkl", 'rb') - p = pickle.Unpickler(input) - param = p.load() - input.close() + input = open(path_dir + "/model_param.pkl", 'rb') + p = pickle.Unpickler(input) + param = p.load() + input.close() - if not change_parameters: - self.greedy = param['greedy'] if 'greedy' in param.keys() else self.greedy - self.beam_width = param['beam_width'] if 'beam_width' in param.keys() else self.beam_width - self.top_paths = param['top_paths'] if 'top_paths' in param.keys() else self.top_paths - self.charset = param['charset'] if 'charset' in param.keys() and self.charset is None else self.charset + if not change_parameters: + self.greedy = param['greedy'] if 'greedy' in param.keys() else self.greedy + self.beam_width = param['beam_width'] if 'beam_width' in param.keys() else self.beam_width + self.top_paths = param['top_paths'] if 'top_paths' in param.keys() else self.top_paths + self.charset = param['charset'] if 'charset' in param.keys() and self.charset is None else self.charset self.compile(optimizer) @@ -1003,17 +1013,56 @@ class CTCModel: self.model_pred.set_weights(self.model_train.get_weights()) self.model_eval.set_weights(self.model_train.get_weights()) - if init_last_layer: + if add_layers != None: #Â add layers after transfer + labels = Input(name='labels', shape=[None]) + input_length = Input(name='input_length', shape=[1]) + label_length = Input(name='label_length', shape=[1]) + + new_layer = Input(name='input', shape=self.model_init.layers[0].output_shape[1:]) + self.inputs = [new_layer] + for layer in self.model_init.layers[1:-removed_layers]: + print(layer) + new_layer = layer(new_layer) + layer.trainable = trainable + for layer in add_layers: + new_layer = layer(new_layer) + layer.trainable = True + + + self.outputs = [new_layer] + loss_out = Lambda(self.ctc_loss_lambda_func, output_shape=(1,), name='CTCloss')( + self.outputs + [labels, input_length, label_length]) + # Lambda layer for the decoding function + out_decoded_dense = Lambda(self.ctc_complete_decoding_lambda_func, output_shape=(None, None), + name='CTCdecode', arguments={'greedy': self.greedy, + 'beam_width': self.beam_width, + 'top_paths': self.top_paths}, dtype="float32")( + self.outputs + [input_length]) + + # Lambda layer to perform an analysis (CER and SER) + out_analysis = Lambda(self.ctc_complete_analysis_lambda_func, output_shape=(None,), name='CTCanalysis', + arguments={'greedy': self.greedy, + 'beam_width': self.beam_width, 'top_paths': self.top_paths}, + dtype="float32")( + self.outputs + [labels, input_length, label_length]) + + # create Keras models + self.model_init = Model(inputs=self.inputs, outputs=self.outputs) + self.model_train = Model(inputs=self.inputs + [labels, input_length, label_length], outputs=loss_out) + self.model_pred = Model(inputs=self.inputs + [input_length], outputs=out_decoded_dense) + self.model_eval = Model(inputs=self.inputs + [labels, input_length, label_length], outputs=out_analysis) + + # Compile models + self.model_train.compile(loss={'CTCloss': lambda yt, yp: yp}, optimizer=optimizer) + self.model_pred.compile(loss={'CTCdecode': lambda yt, yp: yp}, optimizer=optimizer) + self.model_eval.compile(loss={'CTCanalysis': lambda yt, yp: yp}, optimizer=optimizer) + + + elif init_last_layer: labels = Input(name='labels', shape=[None]) input_length = Input(name='input_length', shape=[1]) label_length = Input(name='label_length', shape=[1]) - # new_model_init = Sequential() # new model - # for layer in self.model_init.layers[:-2]: - # new_model_init.add(layer) - # new_model_init.add(TimeDistributed(Dense(len(self.charset) + 1), name="DenseSoftmax")) - # new_model_init.add(Activation('softmax', name='Softmax')) - # self.model_init = new_model_init new_layer = Input(name='input', shape=self.model_init.layers[0].output_shape[1:]) self.inputs = [new_layer] for layer in self.model_init.layers[1:-2]: @@ -1023,14 +1072,6 @@ class CTCModel: self.outputs = [new_layer] - # new_model_train = Sequential() # new model - # nb_layers = len(self.model_train.layers) - # new_layer = Input(name='input', - # shape=self.model_train.layers[0].output_shape[1:]) - # for layer in self.model_train.layers[1:-6]: - # new_layer = layer(new_layer) - # new_layer = TimeDistributed(Dense(len(self.charset) + 1), name="DenseSoftmax")(new_layer) - # new_layer = Activation('softmax', name='Softmax')(new_layer) # Lambda layer for computing the loss function loss_out = Lambda(self.ctc_loss_lambda_func, output_shape=(1,), name='CTCloss')( @@ -1061,9 +1102,6 @@ class CTCModel: self.model_eval.compile(loss={'CTCanalysis': lambda yt, yp: yp}, optimizer=optimizer) - - - def _standardize_input_data(data, names, shapes=None, check_batch_axis=True, exception_prefix=''): @@ -1084,7 +1122,7 @@ def _standardize_input_data(data, names, shapes=None, exception_prefix: String prefix used for exception formatting. Keras function that has been modified. - + # Returns List of standardized input arrays (one array per model input). @@ -1095,7 +1133,7 @@ def _standardize_input_data(data, names, shapes=None, if data is not None and hasattr(data, '__len__') and len(data): raise ValueError('Error when checking model ' + exception_prefix + ': ' - 'expected no data, but got:', data) + 'expected no data, but got:', data) return [] if data is None: return [None for _ in range(len(names))] @@ -1196,7 +1234,7 @@ def _slice_arrays(arrays, start=None, stop=None): Can also work on list/array of indices: `_slice_arrays(x, indices)` Keras function that has been modified. - + # Arguments arrays: Single array or list of arrays. start: can be an integer index (start index) @@ -1232,7 +1270,7 @@ def _make_batches(size, batch_size): """Returns a list of batch indices (tuples of indices). Keras function that has been modified. - + # Arguments size: Integer, total size of the data to slice into batches. batch_size: Integer, batch size. @@ -1296,11 +1334,11 @@ def check_num_samples(ins, raise ValueError( 'If your data is in the form of symbolic tensors, ' 'you should specify the `' + steps_name + '` argument ' - '(instead of the `batch_size` argument, ' - 'because symbolic tensors are expected to produce ' - 'batches of input data).') + '(instead of the `batch_size` argument, ' + 'because symbolic tensors are expected to produce ' + 'batches of input data).') return None if hasattr(ins[0], 'shape'): return int(ins[0].shape[0]) - return None # Edge case where ins == [static_learning_phase] \ No newline at end of file + return None # Edge case where ins == [static_learning_phase] -- GitLab