Klassifikation mit Deep Neural Networks und Batch Normalization

import numpy as np
import tensorflow as tf
from sklearn.base import BaseEstimator

class DNNBNClassifier(BaseEstimator):
    
    def __init__(self,
                 hidden_units = [],
                 n_classes = None,
                 learning_rate = 0.03,
                 momentum = 0.9,
                 n_epochs = 30,
                 accuracy = 0.995,
                 batch_size = 50,
                 model_file = "./dnn.ckpt"):
        self.hidden_units = hidden_units
        self.n_classes = n_classes
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.n_epochs = n_epochs
        self.accuracy = accuracy
        self.batch_size = batch_size
        self.model_file = model_file
        self.tf_graph = None
        self.tf_inst = None
        self.tf_logits = None
        self.tf_saver = None
        
    def fit(self, insts, labels):
        self.tf_graph = None
        self.tf_inst = None
        self.tf_logits = None
        self.tf_saver = None
        tf_graph = tf.Graph()
        with tf_graph.as_default():
            tf_inst = tf.placeholder(tf.float64, shape = (None, insts.shape[1]), name = "inst")
            tf_label = tf.placeholder(tf.int64, shape = (None), name = "label")
            tf_training = tf.placeholder_with_default(False, shape = (), name = "training")
            tf_he_init = tf.contrib.layers.variance_scaling_initializer()
            tf_last = tf_inst
            index = 1
            with tf.name_scope("dnn"):
                for n_hidden in self.hidden_units:
                    tf_layer = tf.layers.dense(tf_last, n_hidden, name = "hidden%s" % index, kernel_initializer = tf_he_init)
                    tf_norm = tf.layers.batch_normalization(tf_layer, training = tf_training, momentum = 0.9)
                    tf_act = tf.nn.elu(tf_norm)
                    tf_last = tf_act
                    index = index + 1
                tf_out = tf.layers.dense(tf_last, self.n_classes, name = "ouputs")
                tf_logits = tf.layers.batch_normalization(tf_out, name = "logits", training = tf_training, momentum = 0.9)
            with tf.name_scope("loss"):
                tf_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = tf_label, logits = tf_logits)
                tf_loss = tf.reduce_mean(tf_entropy, name = "loss")
            with tf.name_scope("train"):
                tf_optimizer = tf.train.MomentumOptimizer(learning_rate = self.learning_rate, momentum = self.momentum, use_nesterov = True)
                tf_training_op = tf_optimizer.minimize(tf_loss)
            with tf.name_scope("eval"):
                tf_correct = tf.nn.in_top_k(tf.cast(tf_logits, tf.float32), tf_label, 1)
                tf_accuracy = tf.reduce_mean(tf.cast(tf_correct, tf.float32))
            tf_init_op = tf.global_variables_initializer()
            tf_update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            tf_saver = tf.train.Saver()
            with tf.Session() as sess:
                tf_init_op.run()
                for epoch in range(self.n_epochs):
                    start = 0
                    for iteration in range(insts.shape[0] // self.batch_size):
                        end = start + self.batch_size
                        if end >= insts.shape[0]:
                            break
                        insts_batch = insts[start:end]
                        labels_batch = labels[start:end]
                        sess.run([tf_training_op, tf_update_op], feed_dict = {tf_training: True, tf_inst: insts_batch, tf_label: labels_batch})
                        start = start + self.batch_size
                    acc_train = tf_accuracy.eval(feed_dict = {tf_inst: insts, tf_label: labels})
                    print(epoch, acc_train)
                    if acc_train > self.accuracy:
                        break
                tf_saver.save(sess, self.model_file)
        self.tf_graph = tf_graph
        self.tf_inst = tf_inst
        self.tf_logits = tf_logits
        self.tf_saver = tf_saver
    
    def predict(self, insts):
        if self.tf_graph is None or self.tf_saver is None or self.tf_logits is None or self.tf_inst is None:
            tf_graph = tf.Graph()
            with tf_graph.as_default():
                with tf.Session() as sess:
                    tf_saver = tf.train.import_meta_graph(self.model_file + ".meta")
            self.tf_graph = tf_graph
            self.tf_saver = tf_saver
            self.tf_logits = tf_graph.get_tensor_by_name("dnn/logits/batchnorm/add_1:0")
            self.tf_inst = tf_graph.get_tensor_by_name("inst:0")
        with self.tf_graph.as_default():
            with tf.Session() as sess:
                self.tf_saver.restore(sess, self.model_file)
                z = self.tf_logits.eval(feed_dict = {self.tf_inst: insts})
                return np.argmax(z, axis = 1)


MNIST-DNN-BN.ipynb

9808.pdf