/** * This applet demonstrates a simple game. It isn't designed to be general or reusable.

* This program gives core of the simulation. The GUI is in SGameGUI.java. The environment code is at * SGameEnv.java. This * function-approximation controller is at SGameFAController.java the * features are defined at SGameFeatureSet.java.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * @author David Poole poole@cs.ubc.ca * @version 0.11 2007-09-09 */ public class SGameFAController extends SGameController { /** * Construct a new controller */ SGameFAController(SGameGUI gui) { super(); title = "linear fun contoller"; alphaFixed = true; alpha = 0.01; // this is interpreted as the step size gui.alphaText ="gradient descent step size"; gui.showCountsOption = false; discount = 0.99; } SGameFeatureSet featureSet = new SGameFeatureSet(); double featureWeights[] = new double[featureSet.NumFeatures]; // {0.0,-2.0,-0.5,5.0,2.0,-5.0,-1.0,2.0,2.0,5.0}; // - this can be used when there are just 10 features /** This gets the q-values from the feature values. It returns the dot product of the feature values and the feature weights. */ double qvalue(double[] featureVals) { double tot=0; for (int i=0; i< featureSet.NumFeatures; i++) tot += featureWeights[i]*featureVals[i]; return tot; } /** * resets the weights. * * @param initVal the initial value given by a box in the GUI */ public void doreset(double initVal) { for (int i=0; i< featureSet.NumFeatures; i++) featureWeights[i] = initVal; } double[] prevFeatureVals = new double[featureSet.NumFeatures]; double prevReward=0; /** * does one step. * * carries out the action in the environment. This may be a place * to record what the agent has learned from its experience. *

The actions are

0 is up
1 is right
2 is down
3 is left

* @param action the action that the agent does */ public void dostep(int action) { // get current state int newX = environment.currX; int newY = environment.currY; int newPrize = environment.prize; boolean newDamaged = environment.damaged; // Update Parameters double[] featureVals = featureSet.featureValues(newX,newY,newPrize,newDamaged,action); double delta = prevReward+discount*qvalue(featureVals) -qvalue(prevFeatureVals) ; for (int i=0; i=0; i--) { double qvi = qvalue(featureSet.featureValues(xval,yval,prize,damage,i)); if( qvi>val) { val= qvi; } } return val; } /** * does count number of steps * * This is where you would put your controller * @param count the number of steps to do * @param greedyProb the probability that is step is chosen greedily */ public void doSteps(int count, double greedyProb){ for(int i=0; i bestVal) { bestVal = qvalue(featVals); bestDir = startDir; } } dostep(bestDir); } else { // act randomly dostep((int) (Math.random() * 4)); } } } /** This is gives the q-values to be drawn by the GUI. The GUI uses it to display values and for the arrows. It shows the q-values for the current values of prize and damaged. */ public double qvalue(int xval, int yval, int action) { return qvalue(featureSet.featureValues(xval,yval,environment.prize,environment.damaged,action)); }; public double[] toDisplay() { return featureWeights; } }