/**
 * This applet demonstrates learning in a simple game. It isn't designed to be general or reusable.
<p>
 * Copyright (C) 2008  <A HREF="http://www.cs.ubc.ca/spider/poole/">David Poole</A>.
<p>
 * This program is the controller for a learning agent. The GUI is in <A
   HREF=MASLearningGUI.java">MASLearningGUI.java</A>.  There is a learning controller at <A
   HREF="SGameController.java">SGameController.java</A>.
<p>
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
<p>
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
<p>
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


 * @author David Poole  poole@cs.ubc.ca
 * @version 0.01 2008-04-17 */

public class WolfLearningAgent
{
    int numActions;
    double[] prob;
    double[] value;
    int visits = 0;
    double aveValue = 0.0;

    double actRandomlyWithProbability=0.05;
    int action=0; // the last action
    // Parameters
    Parameter[] parameter = {
	    new Parameter("alpha",0.1),
	    new Parameter("deltaWin",0.0001),
	    new Parameter("deltaLose",0.001)
	};
    static int alpha=0;
    static int deltaWin=1;
    static int deltaLose=2;

    WolfLearningAgent(int numActions) {
	this.numActions = numActions;
	prob = new double[numActions];
	value = new double[numActions];
// 	visits = new int[numActions];
// 	aveValue = new double[numActions];

	for (int i=0; i<numActions; i++) {
	    prob[i]=1.0/numActions;
	    value[i]=0.0;
// 	    visits[i]=0;
// 	    aveValue[i]=0.0;
	}
    }

    // sets the strategy to random
    void randomizeStrategy() {
	double probMass = 0.0;
	for (int i=0; i<numActions; i++) {
	    prob[i] = Math.random();
	    probMass += prob[i];
	}
	for (int i=0; i<numActions; i++) {
	    prob[i] = prob[i]/probMass;
	    value[i]=0.0;
	}

	visits=0;
    }

	

    int getAction() {
	if (Math.random()<actRandomlyWithProbability) {
	    action =  (int) (Math.random()*numActions);  //act randomly
	    return action;
	}
	else { // follow the policy
	    double rand = Math.random();
	    for (action=0; action<numActions; action++) {
		if(rand < prob[action])
		    return action;
		else
		    rand = rand- prob[action];
	    }
	}
	System.out.println("getAction() error. Prob dist is ["+prob[0]+", "+prob[1]+"].");
	return 0; //should never occur
    }


    void tellReward(double payoff) {
	// update value
	value[action] = value[action]+parameter[alpha].value*(payoff-value[action]);
	// find the average payoff
	visits++;
	aveValue = aveValue+(1.0/visits)*(payoff-aveValue);
	// find best action - make sure a random action is chosen if more than one is best
	int start = (int)(Math.random()*numActions);
	int abest= start;
	for (int i=1; i<numActions; i++)
	    if (value[(i+start)%numActions] > value[abest])
		abest=(i+start)%numActions;
	double delta;
	if (value[abest]>aveValue)
	    delta = parameter[deltaWin].value;
	else
	    delta = parameter[deltaLose].value;
		
	prob[abest] += numActions*delta;
	for (int i=0; i<numActions; i++) {
	    prob[i] -= delta;
	    if(prob[i]<0) {
		prob[abest]+=prob[i];
		prob[i]=0.0;
	    }
	}
    }
}