/**
 * This applet demonstrates a simple game. It isn't designed to be general or reusable.
<p>
 * Copyright (C) 2006  <A HREF="http://www.cs.ubc.ca/spider/poole/">David Poole</A>.
<p>
 * This program gives core of the simulation. The GUI is in <A
 * HREF=SGameGUI.java">SGameGUI.java</A>.  The environment code is at
 * <A HREF="SGameEnv.java">SGameEnv.java</A>. This
 * function-approximation controller is at <A
 * HREF="SGameFAController.java">SGameFAController.java</A> the
 * features are defined at <A
 * HREF="SGameFeatureSet.java">SGameFeatureSet.java</A>. 
<p>
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
<p>
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
<p>
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


 * @author David Poole  poole@cs.ubc.ca
 * @version 0.11 2007-09-09 */

public class SGameFAController extends SGameController
{
    /**
     * Construct a new controller
    */
    SGameFAController(SGameGUI gui) {
	super();
	title = "linear fun contoller";
	alphaFixed = true;
	alpha = 0.01;  // this is interpreted as the step size
	gui.alphaText ="gradient descent step size";
	gui.showCountsOption = false;
	discount = 0.99;
    }

    SGameFeatureSet featureSet = new  SGameFeatureSet();
    double featureWeights[] = new double[featureSet.NumFeatures];
    // {0.0,-2.0,-0.5,5.0,2.0,-5.0,-1.0,2.0,2.0,5.0};
    //  - this can be used when there are just 10 features

 

   /**
      This gets the q-values from the feature values. It returns the dot
      product of the feature values and the feature weights.
     */
    double qvalue(double[] featureVals) 
    {
	double tot=0;
	for (int i=0; i< featureSet.NumFeatures; i++)
	    tot += featureWeights[i]*featureVals[i];
	return tot;
	    
    }

    /**
     * resets the weights.
     *
     * @param initVal   the initial value given by a box in the GUI
     */
    public void doreset(double initVal)
    {     
	for (int i=0; i< featureSet.NumFeatures; i++)
	    featureWeights[i] = initVal;
    }
    double[] prevFeatureVals = new double[featureSet.NumFeatures];
    double prevReward=0;

    /**
     * does one step.
     *
     * carries out the action in the environment. This may be a place
     * to record what the agent has learned from its experience.
     *
     <p>
     The actions are
     <ul>
     <li> 0 is up
     <li> 1 is right
     <li> 2 is down
     <li> 3 is left
     </ul>
     * @param action  the action that the agent does
     */
    public void dostep(int action)  { 
	// get current state
	int newX = environment.currX;
	int newY = environment.currY;
	int newPrize = environment.prize;
	boolean newDamaged = environment.damaged;

	// Update Parameters
	double[] featureVals = featureSet.featureValues(newX,newY,newPrize,newDamaged,action);
	double delta = prevReward+discount*qvalue(featureVals)
	              -qvalue(prevFeatureVals) ;
	for (int i=0; i<featureSet.NumFeatures; i++)
	    featureWeights[i] = featureWeights[i] + alpha * delta * prevFeatureVals[i];
	// Save feature values
	prevFeatureVals=featureVals;

	// Act & remember reward
	prevReward=environment.dostep(action);

	// keep setting this to be true, as false doesn't make sense....
	alphaFixed = true;

    }

    /**
     * determines the value of a state
     *
     * the value is the maximum, for all actions, of the q-value
     *
     * @param xval the x-coordinate
     * @param yval the y-coordinate
     * @return the value of the (xval,yval) position
     */
    public double value(int xval, int yval, int prize, boolean damage) {
	double val= qvalue(featureSet.featureValues(xval,yval,prize,damage,3));
	for (int i=2; i>=0; i--) {
	    double qvi =  qvalue(featureSet.featureValues(xval,yval,prize,damage,i));
	    if( qvi>val) {
		val= qvi;
	    }
	}
	return val;
    }


    /**
     * does count number of steps
     *
     * This is where you would put your controller
     * @param count  the number of steps to do
     * @param greedyProb  the probability that is step is chosen greedily
     */
    public void doSteps(int count, double greedyProb){
	for(int i=0; i<count; i++)
	    {
		double rand = Math.random();
		if (rand<greedyProb)
		    {// act greedily
			int startDir = (int) (Math.random() * 4);
			double bestVal = qvalue(featureSet.featureValues(environment.currX, environment.currY, environment.prize, environment.damaged, startDir));
			int bestDir = startDir;
			for (int dir=1; dir<4; dir++)
			    {
				startDir = (startDir+1)%4;
				double[] featVals = featureSet.featureValues(environment.currX, environment.currY, environment.prize, environment.damaged, startDir);
				if (qvalue(featVals) > bestVal)
				    {
					bestVal = qvalue(featVals);
					bestDir = startDir;
				    }
			    }
			dostep(bestDir);
		    }
		else
		    { // act randomly
			dostep((int) (Math.random() * 4));
		    }
	    }
    }


    /**
       This is gives the q-values to be drawn by the GUI. The GUI uses it to display values and for the arrows.  
       It shows the q-values for the current values of prize and damaged.
     */
    public double qvalue(int xval, int yval, int action) 
    {
	return qvalue(featureSet.featureValues(xval,yval,environment.prize,environment.damaged,action));
    };

    public double[] toDisplay()
    {
	return featureWeights;
    }
}
