/** * This applet demonstrates a simple game. It isn't designed to be general or reusable.

* This program gives core of the simulation. The GUI is in SGameGUI.java. The environment code is at SGameEnv.java. This controller is at SGameQController.java.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * @author David Poole poole@cs.ubc.ca * @version 0.42 2011-04-19 */ public class SGameQController extends SGameController { // SIZES of the domains public final int X_SIZE=5; public final int Y_SIZE=5; public final int PRIZE_SIZE=5; public final int DAMAGE_SIZE=2; public final int STATE_SIZE=250; public final int ACTION_SIZE=4; /** * Construct a new controller with the given environment */ SGameQController() { super(); title = "Q-learning contoller"; } //String title = "Q-learning contoller"; /** qvalues[state,action] gives Q_values */ double qvalues[][] = new double[STATE_SIZE][ACTION_SIZE]; /** visits[state,action] gives number of times action has been carried out in state */ int visits[][] = new int[STATE_SIZE][ACTION_SIZE]; /** The GUI uses qvalue(x,y,a) to display values and arrows. */ public double qvalue(int xval, int yval, int action) { return qvalues[state(xval,yval,environment.prize,environment.damaged)][action]; }; /** The GUI uses getCounts(x,y,a) to display counts. */ public int getCounts(int xval, int yval, int action) { return visits[state(xval,yval,environment.prize,environment.damaged)][action]; }; /** * gets the state from the xpos, ypos, prize, damage */ public int state(int xval, int yval, int prize, boolean damage) { return ((xval*Y_SIZE+yval)*PRIZE_SIZE+prize)*DAMAGE_SIZE+(damage?1:0); } /** * resets the Q-values. * * @param initVal the initial value given by a box in the GUI */ public void doreset(double initVal) { for (int st=0; st The actions are

0 is up
1 is right
2 is down
3 is left

* @param action the action that the agent does */ public void dostep(int action) { int oldState = state(environment.currX,environment.currY,environment.prize,environment.damaged); double reward = environment.dostep(action); // if (reward != 0) // System.out.println("reward: "+reward); int newState = state(environment.currX,environment.currY,environment.prize,environment.damaged); double newDatum=reward+discount*value(newState); visits[oldState][action]++; if (!alphaFixed) alpha = 1.0/visits[oldState][action]; qvalues[oldState][action]= (1-alpha) * qvalues[oldState][action] + alpha*newDatum; // if (qvalues[oldState][action]!= 0) // System.out.println("qvalues["+oldState+"]["+action+"]="+qvalues[oldState][action]); } /** * determines the value of a state * * the value is the maximum, for all actions, of the q-value * * @param state * @return the value of the state */ public double value(int state) { double val=qvalues[state][0]; for (int a=1; aval) { val=qvalues[state][a]; } } return val; } /** * do count number of steps * * This is where you would put your controller * @param count the number of steps to do * @param greedyProb the probability that is step is chosen greedily */ public void doSteps(int count, double greedyProb){ for(int i=0; i bestVal) { bestVal = qvalues[currState][startDir]; bestDir = startDir; } } dostep(bestDir); } else { // act randomly dostep((int) (Math.random() * ACTION_SIZE)); } } } }