/** * This applet demonstrates a simple game. It isn't designed to be general or reusable.

* This program gives core of the simulation. The GUI is in SGameGUI.java. The environment code is at SGameEnv.java. This controller is at SGameQController.java.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * @author David Poole poole@cs.ubc.ca * @version 0.42 2011-04-19 */ public class SGameModelController extends SGameController { // SIZES of the domains public final int X_SIZE=5; public final int Y_SIZE=5; public final int PRIZE_SIZE=5; public final int DAMAGE_SIZE=2; public final int STATE_SIZE=250; public final int ACTION_SIZE=4; /** * Construct a new controller with the given environment */ SGameModelController(SGameGUI gui) { super(); gui.alphaText ="updates/step"; alpha = 10; title = "model-based contoller"; } /** qvalues[state,action] gives Q_values */ double qvalues[][] = new double[STATE_SIZE][ACTION_SIZE]; /** transitions[s,a,s'] gives number of times action a has been carried out in state s resulting in state s' */ int transitions[][][] = new int[STATE_SIZE][ACTION_SIZE][STATE_SIZE]; /** count_actions_in_state[s,a] gives number of times action a has been carried out in state s. Note that count_actions_in_state can be computed from transition, but is cached. */ int count_actions_in_state[][] = new int[STATE_SIZE][ACTION_SIZE]; /** rewards[s,a] gives expected reward for action a in state s */ double rewards[][] = new double[STATE_SIZE][ACTION_SIZE]; public double num_updates() { return alpha; // alpha represents the number of updates / step } /** The GUI uses qvalue(x,y,a) to display values and arrows. */ public double qvalue(int xval, int yval, int action) { return qvalues[state(xval,yval,environment.prize,environment.damaged)][action]; }; /** The GUI uses getCounts(x,y,a) to display counts. */ public int getCounts(int xval, int yval, int action) { return count_actions_in_state[state(xval,yval,environment.prize,environment.damaged)][action]; }; /** * gets the state from the xpos, ypos, prize, damage */ public int state(int xval, int yval, int prize, boolean damage) { return ((xval*Y_SIZE+yval)*PRIZE_SIZE+prize)*DAMAGE_SIZE+(damage?1:0); } /** * resets the Q-values, and all of the transitions. * * @param initVal the initial value given by a box in the GUI */ public void doreset(double initVal) { for (int st=0; st0) { double qs1a1=0; for (int s2=0; s2val) { val=qvalues[state][a]; } } return val; } /** * do numsteps number of steps * * This is where you would put your controller * @param numsteps the number of steps to do * @param greedyProb the probability that is step is chosen greedily */ public void doSteps(int numsteps, double greedyProb){ for(int i=0; i bestVal) { bestVal = qvalues[currState][startDir]; bestDir = startDir; } } dostep(bestDir); } else { // act randomly dostep((int) (Math.random() * ACTION_SIZE)); } } } }