Machine Learning Project

profileDVRK91
EpisodeHistory.py

import numpy as np class EpisodeHistory(object): """This class saves each move the players make""" def __init__(self): self.reset() def reset(self): self.states = [] self.actions = [] self.rewards = [] def get_states(self): return self.states def end_of_game_reward(self, winer_reward, loser_reward): length = len(self.actions) (player, move_index) = self.actions[length-1] self.rewards[length-1][move_index] = winer_reward (player, move_index) = self.actions[length-2] self.rewards[length-2][move_index] = loser_reward def add_to_history(self, state, action, reward): reward_record=[0,0,0,0,0,0,0,0,0] (player, move_index) = action self.states.append(state) self.actions.append(action) reward_record[move_index]=reward self.rewards.append(reward_record) new_state = state.copy() new_state[move_index] = player return new_state def discounted_returns(self, gamma=0.5): #currently this function just copy the rewards to the output_rewards array #TODO: You need to discount the rewards according to the Monte Carlo formula length = len(self.actions) output_rewards = np.zeros(np.shape(self.rewards)) for player in range(2): for i in range(player, length, 2): (player_, target_index) = self.actions[i] val = self.rewards[i][target_index] exponent = 1 #TODO output_rewards[i][target_index] = val return output_rewards