
/**
 * This applet demonstrates a simple game. It isn't designed to be general or reusable.
<p>
 * Copyright (C) 2006  <A HREF="http://www.cs.ubc.ca/spider/poole/">David Poole</A>.
<p>
 * This program gives the environment of the simulation. The GUI is in <A HREF=TGameGUI.java">TGameGUI.java</A>. The environemnt code is at <A HREF="TGameEnv.java">TGameEnv.java</A>. The controller is at <A HREF="TGameController.java">TGameController.java</A>.
<p>
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
<p>
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
<p>
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


 * @author David Poole  poole@cs.ubc.ca
 * @version 0.4 2006-12-04 */

public class TGameEnv
{
    public final int xDim = 2;
    public final int yDim = 3;  
    final double crashReward = -1.0;

    public int numberOfSteps=0;
    public double totalReward=0.0;

    public double minReward=0.0;
    public int minStep=0;
    public int zeroCrossing=0;

    public int currX = 0;  // current X position
    public int currY = 2;  // current Y position
    public boolean tracing=false;

    public double reward;


    /**
     * resets the number of steps and the reward.
     */
    public void doreset()
    {     
	numberOfSteps=0;
	totalReward=0.0;
	minReward=0.0;
	minStep=0;
	zeroCrossing=0;
    }

    /**
     * does one step.
     *
     <p>
     The actions are
     <ul>
     <li> 0 is up
     <li> 1 is right
     <li> 2 is careful up
     <li> 3 is left
     </ul>
     * @param action  the action that the agent does
     * @return reward
     */
    public double dostep(int action)  { 
	int actualDirection;
	reward = 0.0;
        int newX,newY;

	// pentalty for careful
	if (action==2)
	    reward = -1.0;

	// determine actual direction
	int rand = (int) (Math.random() * 10); 
	if (action==0 && rand < 1) actualDirection=1;
	else if (action==0 && rand < 2) actualDirection=3;
	else if (action==2)  actualDirection=0;
	else actualDirection=action;
	

	// Determine where the agent ends up (plus crash reward)

	   
		switch (actualDirection) {
		case 0: // Up
		    if (currY==0) {
			newY=currY;
			newX=currX;
			reward +=crashReward;}
		    else {
			newY=currY-1;
			newX=currX;}
		    break;
		case 1: // Right
		    if (currX==xDim-1) {
			newY=currY;
			newX=currX;
			reward+=crashReward;}
		    else {
			newY=currY;
			newX=currX+1;}
		    break;
		case 3: // Left
		    if (currX==0 && currY==0) {
			newY=2;
			newX=0;
			reward+=10;}
		    else if (currX==0 && currY==1) {
			newY=currY;
			newX=currX;
			reward-=100;}
		    else if (currX==0 && currY==2) {
			newY=currY;
			newX=currX;
			reward-=1;}
		    else {
			newY=currY;
			newX=currX-1;}
		    break;
		default:   // should never occur
		    {
			newX=0;
			newY=0;
			reward=0.0;
		    }
		}



	numberOfSteps++;
	totalReward+= reward;
	if (totalReward < minReward)
	    {
		minReward=totalReward;
		minStep=numberOfSteps;
	    }
	if (totalReward>0 && reward>totalReward)
	    zeroCrossing=numberOfSteps;

	if (tracing) 
 	    System.out.println(numberOfSteps+"  "+totalReward);

	currX=newX;
	currY=newY;
	return reward;
    }

}
