/**
 * This applet demonstrates learning in a simple game. It isn't designed to be general or reusable.
<p>
 * Copyright (C) 2006  <A HREF="http://www.cs.ubc.ca/spider/poole/">David Poole</A>.
<p>
 * This program is the controller for a learning agent. The GUI is in <A
   HREF=SGameGUI.java">SGameGUI.java</A>.  The environemnt code is at
   <A HREF="SGameEnv.java">SGameEnv.java</A>. The controller is at <A
   HREF="SGameController.java">SGameController.java</A>.
<p>
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
<p>
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
<p>
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


 * @author David Poole  poole@cs.ubc.ca
 * @version 0.01 2008-04-17 */

public class LearningAgent
{
    int numActions;
    double[] prob;
    double[] value;
    double actRandomlyWithProbability=0.05;
    int action=0; // the last action

    // Parameters
    Parameter[] parameter = {
	    new Parameter("alpha",0.1),
	    new Parameter("delta",0.0001)
	};
    static int alpha=0;
    static int delta=1;

    LearningAgent(int numActions) {
	this.numActions = numActions;
	prob = new double[numActions];
	value = new double[numActions];
	for (int i=0; i<numActions; i++) {
	    prob[i]=1.0/numActions;
	    value[i]=0.0;
	}
    }

    // sets the strategy to random
    void randomizeStrategy() {
	double probMass = 0.0;
	for (int i=0; i<numActions; i++) {
	    prob[i] = Math.random();
	    probMass += prob[i];
	}
	for (int i=0; i<numActions; i++) {
	    prob[i] = prob[i]/probMass;
	    value[i]=0.0;
	}
    }

	

    int getAction() {
	if (Math.random()<actRandomlyWithProbability) {
	    action= (int)(Math.random()*numActions);  //act randomly
	    return action;
	}
	else { // follow the policy
	    double rand = Math.random();
	    for (action=0; action<numActions; action++) {
		if(rand < prob[action])
		    return action;
		else
		    rand = rand- prob[action];
	    }
	}
	System.out.println("getAction() error. Prob dist is ["+prob[0]+", "+prob[1]+"].");
	return 0; //should never occur
    }


    void tellReward(double payoff) {
	value[action] = value[action]+parameter[alpha].value*(payoff-value[action]);
	// find best action - make sure a random action is chosen if more than one is best
	int start = (int)(Math.random()*numActions);
	int abest= start;
	for (int i=1; i<numActions; i++)
	    if (value[(i+start)%numActions] > value[abest])
		abest=(i+start)%numActions;
	prob[abest] += numActions*parameter[delta].value;
	for (int i=0; i<numActions; i++) {
	    prob[i] -= parameter[delta].value;
	    if(prob[i]<0) {
		prob[abest]+=prob[i];
		prob[i]=0.0;
	    }
	}
    }
}