Machine Learning Project

DVRK91
Network.py

import tensorflow.compat.v1 as tf import os from random import * from tensorflow.python.ops import control_flow_ops import numpy as np class Agent(object): def __init__(self): tf.disable_v2_behavior() self.num_hidden_units = 50 self.num_output_units = 9 self.num_input_units = 9 self.learning_param = 0.01 self.epsilon = 1e-3 self.decay = 0.9 self.error = 0 self.TrainX = [] self.TrainY = [] self.beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32) self.gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32) #----set up place holders for inputs and ground truth image --- self.tf_inputX = tf.placeholder(tf.float32, [None, self.num_input_units], name = 'inputX') self.tf_keep_prob = tf.placeholder(tf.float32, name = 'keep_prob') # for DROP-OUT # labels for the image pair self.tf_Y = tf.placeholder(tf.float32, [None, self.num_output_units], name = 'Y') self.tf_phase_train = tf.placeholder(tf.bool) self.tf_global_step = tf.Variable(0, name='global_step', trainable=False) # outputs, loss function and training optimizer self.outputA = self.training_network(self.tf_inputX, self.tf_phase_train) self.loss = self.cross_entropy_loss_function() self.optimizer = self.training_initializer(self.loss, self.tf_global_step) # Initialize tensorflow session self.sess = tf.Session() self.saver = tf.train.Saver() self.sess.run(tf.global_variables_initializer()) def NN_layer(self, tf_input, num_hidden_units, phase_train, variable_name): tf_weight_initializer = tf.random_normal_initializer(mean = 0, stddev = 0.01) num_features = tf_input.get_shape()[1] with tf.variable_scope('Test', reuse=tf.AUTO_REUSE): W = tf.get_variable(name = variable_name + '_W', dtype = tf.float32, shape = [num_features, num_hidden_units], initializer = tf_weight_initializer) b = tf.get_variable(name = variable_name + '_b', dtype = tf.float32, shape = [num_hidden_units], initializer = tf.zeros_initializer()) #TODO return out # activation function in tensorflow: # tf.nn.tanh # tf.nn.softmax def training_network(self, tf_input, phase_train): x = tf.reshape(tf_input, shape=[-1, self.num_input_units]) #TODO return logis def cross_entropy_loss_function(self): labels = self.tf_Y logits = tf.add(tf.multiply(labels, tf.log(tf.add(self.epsilon, self.outputA))), tf.multiply(tf.subtract(1.0, labels), tf.log(tf.add(self.epsilon, tf.subtract(1.0, self.outputA))))) out = tf.reduce_sum(logits, 1) return -tf.reduce_mean(out) def training_initializer(self, cost, global_step): optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_param, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam') train_op = optimizer.minimize(cost, global_step=global_step) return train_op def explore(self, state): #take a random action while True: action = np.random.randint(0, np.size(state)) if state[ action ]==0: return action def max_reward(self, state): #get a prediction from the network action = self.predict_action(state) if state[ action ]==0: return action else: return self.explore(state) def epsilon_greedy_action_annealed(self, state, percentage, epsilon_start=1.0, epsilon_end=1e-2): #this function return self.explore(state) if we are exploring #or it returns self.max_reward(state) if we are exploiting #TODO: return action def learn(self, states, rewards, batchSize=1): # Train the network _, trainingLoss = self.sess.run([self.optimizer, self.loss], feed_dict = {self.tf_inputX: states, self.tf_Y: rewards, self.tf_phase_train:True, self.tf_keep_prob:0.6}) print('train loss %s ' % (trainingLoss)) self.error = trainingLoss def predict_action(self, state): #to get a prediction x=np.array(state).reshape(1, self.num_input_units) tmpA = self.sess.run(self.outputA, feed_dict = {self.tf_inputX: x, self.tf_phase_train:False, self.tf_keep_prob:1.0}) number = self.MaxIndex(tmpA) return (int)(number) def MaxIndex(self, output_param): index = 0 tmp = 0.0 output = output_param.flatten() length = output.shape[0] for i in range(0, length): if( tmp < output[i] ): tmp = output[i] index = i return index def saveModel(self, dir): model = dir+"/trainedmodel/tictactoe" if not os.path.exists(dir): os.makedirs(dir) print('Saving '+model) # Save the latest trained models self.saver.save(self.sess, model) def loadModel(self, dir): # restore the trained model model = dir+"/trainedmodel/tictactoe" print('Londing '+model) self.saver.restore(self.sess, model)