Source code for graph_training_utils

"""
Utilities for training the parameters of tensorflow computational graphs.
"""

import tensorflow as tf
import sys
import math

OPTIMIZERS = {'grad': tf.train.GradientDescentOptimizer, 'adam': tf.train.AdamOptimizer}


[docs]class EarlyStop:
    """
    A class for determining when to stop a training while loop by a bad count criterion.
    If the data is exhausted or the model's performance hasn't improved for *badlimit* training
    steps, the __call__ function returns false. Otherwise it returns true.

    """
    def __init__(self, badlimit=20):
        """
        :param badlimit: Limit of for number of training steps without improvement for early stopping.
        """
        self.badlimit = badlimit
        self.badcount = 0
        self.current_loss = sys.float_info.max

[docs]    def __call__(self, mat, loss):
        """
        Returns a boolean for customizable stopping criterion.
        For first loop iteration set loss to sys.float_info.max.

        :param mat: Current batch of features for training.
        :param loss: Current loss during training.
        :return: boolean, True when mat is not None and self.badcount < self.badlimit and loss != inf, nan.
        """
        if mat is None:
            sys.stderr.write('Done Training. End of data stream.')
            cond = 0
        elif math.isnan(loss) or math.isinf(loss):
            sys.stderr.write('Exiting due divergence: %s\n\n' % loss)
            cond = -1
        elif loss > self.current_loss:
            self.badcount += 1
            if self.badcount >= self.badlimit:
                sys.stderr.write('Exiting. Exceeded max bad count.')
                cond = -1
            else:
                cond = 1
        else:
            self.badcount = 0
            cond = True
        self.current_loss = loss
        return cond


[docs]class ModelRunner:
    """
    A class for gradient descent training tensorflow models.
    """

    def __init__(self, loss, ph_dict, learnrate=0.01, opt='adam', debug=False, decay=True,
                 decay_rate=0.99, decay_steps=20):
        """

        :param loss: The objective function for optimization strategy.
        :param ph_dict: A dictionary of names (str) to tensorflow placeholders.
        :param learnrate: The step size for gradient descent.
        :param opt: Optimization algorithm can be 'adam', or 'grad'
        :param debug: Whether or not to print debugging info.
        :param decay: (boolean) Whether or not to use a learn rate with exponential decay.
        :param decay_rate: The rate parameter for exponential decay of learn rate.
        :param decay_steps: The number of training steps to decay learn rate.
        """
        self.loss = loss
        self.ph_dict = ph_dict
        self.debug = debug
        if decay:
            self.global_step = tf.Variable(0, trainable=False)
            learnrate = tf.train.exponential_decay(learnrate, self.global_step,
                                                   decay_steps, decay_rate, staircase=True)
        else:
            self.global_step = None
        self.train_op = OPTIMIZERS[opt](learnrate).minimize(loss, global_step=self.global_step)
        self.init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(self.init)

[docs]    def train_step(self, datadict, eval_tensors=[], update=True):
        """
        Performs a training step of gradient descent with given optimization strategy.

        :param datadict: A dictionary of names (str) matching names in ph_dict to numpy matrices for this mini-batch.
        :param eval_tensors: (list of Tensors) Tensors to evaluate along with train_op.
        :param update: (boolean) Whether to perform a gradient update this train step
        :return: A list of numpy arrays for eval_tensors. First element is None.
        """
        if update:
            train_op = [self.train_op]
        else:
            train_op = eval_tensors[0:1]
        return self.sess.run(train_op + eval_tensors,
                             feed_dict=get_feed_dict(datadict, self.ph_dict, debug=self.debug))

[docs]    def eval(self, datadict, eval_tensors):
        """
        Evaluates tensors without effecting parameters of model.

        :param datadict: A dictionary of names (str) matching names in ph_dict to numpy matrices for this mini-batch.
        :param eval_tensors: Tensors from computational graph to evaluate as numpy matrices.
        :return: A list of evaluated tensors as numpy matrices.
        """
        return self.sess.run(eval_tensors,
                             feed_dict=get_feed_dict(datadict, self.ph_dict, train=0, debug=self.debug))


[docs]def get_feed_dict(datadict, ph_dict, train=1, debug=False):

    """
    Function for pairing placeholders of a tensorflow computational graph with numpy arrays.

    :param datadict: A dictionary with keys matching keys in ph_dict, and values are numpy arrays.
    :param ph_dict: A dictionary where the keys match keys in datadict and values are placeholder tensors.
    :param train: {1,0}. Different values get fed to placeholders for dropout probability, and batch norm statistics
                depending on if model is training or evaluating.
    :param debug: (boolean) Whether or not to print dimensions of contents of placeholderdict, and datadict.
    :return: A feed dictionary with keys of placeholder tensors and values of numpy matrices.
    """
    fd = {}
    for k, v in ph_dict.iteritems():
        if type(v) is not list:
            fd[v] = datadict[k]
        else:
            for tensor, matrix in zip(v, datadict[k]):
                fd[tensor] = matrix
    dropouts = tf.get_collection('dropout_prob')
    bn_deciders = tf.get_collection('bn_deciders')
    if dropouts:
        for prob in dropouts:
            if train == 1:
                fd[prob[0]] = prob[1]
            else:
                fd[prob[0]] = 1.0
    if bn_deciders:
        fd.update({decider: [train] for decider in bn_deciders})
    if debug:
        for desc in ph_dict:
            if type(ph_dict[desc]) is not list:
                print('%s\n\tph: %s\t%s\tdt: %s\t%s' % (desc,
                                                    ph_dict[desc].get_shape().as_list(),
                                                    ph_dict[desc].dtype,
                                                    datadict[desc].shape,
                                                    datadict[desc].dtype))
        print(fd.keys())
    return fd