% dtlearn(Goal, Examples, Attributes, DT)
%    Given Examples and Attributes induces a decision tree DT for Goal.
%    Splits on the attribute that resuts in the smallest error
%    Works for noise-free features, with Boolean attributes.

dtlearn(Goal, Examples, Atts , Val) <-
   all_examples_agree(Goal, Examples, Val).

dtlearn(Goal, Examples, Atts, if(Cond,YT,NT)) <-
   examples_disagree(Goal, Examples) &
   select_split(Goal, Examples, Atts, Cond, Rem_Atts) &
   split(Examples, Cond, Yes, No) &
   dtlearn(Goal, Yes, Rem_Atts, YT) &
   dtlearn(Goal, No, Rem_Atts, NT).

% all_examples_agree(Goal, Examples, Val) is true if all examples agree that
%      Val is the value of attribute Goal.
all_examples_agree(_,[],_).
all_examples_agree(Att,[Obj|Rest],Val) <-
   val(Obj,Att,Val) &
   all_examples_agree(Att,Rest,Val).


% examples_disagree(Goal, Examples) is true if the Examples disagree
% on a value of the Goal attribute.
examples_disagree(Goal, [Ex|RExs]) <-
   val(Ex,Goal,Val) &
   examples_disagree_on_Val(Goal, RExs ,Val).

% examples_disagree_on_Val(Goal, Examples, Val) is true if there is an
% example in Examples where the Goal attribute has a value different
% than Val.
examples_disagree_on_Val(Goal, [Ex|RExs],Val) <-
   val(Ex,Goal,Val1) &
   Val \= Val1.
examples_disagree_on_Val(Goal, [Ex|RExs],Val) <-
   val(Ex,Goal,Val) &
   examples_disagree_on_Val(Goal, RExs, Val).

% split(Examples,Att,T,F) is true if T is
% the examples in Examples with attribute Att
% having value YesVal and F is the examples with
% attribute Att having the other value.
split([],_,[],[]).
split([Obj|Rest],Cond,[Obj|Yes],No) <-
   val(Obj,Cond,true) &
   split(Rest,Cond,Yes,No).
split([Obj|Rest],Cond,Yes,[Obj|No]) <-
   val(Obj,Cond,false) &
   split(Rest,Cond,Yes,No).

% select_split(Goal, Examples, Atts, Cond,  Rem_Atts)
% is true if Cond is the best attribute in Atts to determine Goal from
% Examples. 

select_split(Goal, Examples, [Att|Ratts], Cond, Rem_Atts) <-
   error(Att,Goal,Examples,Err) &
   find_best_split(Ratts,Att,Err,Goal,Examples,Cond,Rem_Atts).

%%% find_best_split(Ratts,Att,Err,Goal,Examples,Cond,Rem_Atts)
%%%  Ratts is the set of attributes to check
%%%  Att is the current best attribute, with error Err.
%%%  Goal is the goal attribute
%%%  Examples is the set of all examples
%%%  Cond is the best attribute to split on
%%%  Rem_Atts is the list of remaining attributes
%%%  Note: this is notdeterministic. Retrying will find equally
%%%  good attributes to split on.
find_best_split([],BAtt,_,_,_,BAtt,[]).
find_best_split([Att|Ratts],BAtt,BErr,Goal,Examples,Cond,[Att|Rem_Atts]) <-
   error(Att,Goal,Examples,Err) &
   Err >= BErr &
   find_best_split(Ratts,BAtt,BErr,Goal,Examples,Cond,Rem_Atts).
find_best_split([Att|Ratts],BAtt,BErr,Goal,Examples,Cond,[BAtt|Rem_Atts]) <-
   error(Att,Goal,Examples,Err) &
   Err =< BErr &
   find_best_split(Ratts,Att,Err,Goal,Examples,Cond,Rem_Atts).

%%% error(Att,Goal,Examples,Err) is true if Err is the error that will
%%% arise when slpitting the Examples on attribute Att when predicting Goal
error(Att,Goal,Examples,Err) <-
   count(Examples,Att,Goal,TT,TF,FT,FF) &
   Err is min(TT,TF) + min(FT,FF).

%%%   count(Examples,Att,Goal,TT,TF,FT,FF) is true if
%%%  TT is the number of Examples where Att=true, Goal=true
%%%  TF is the number of Examples where Att=true, Goal=false
%%%  FT is the number of Examples where Att=false, Goal=true
%%%  FF is the number of Examples where Att=false, Goal=false
count([],Att,Goal,0,0,0,0).
count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <-
   val(Ex,Att,true) &
   val(Ex,Goal,true) &
   count(Examples,Att,Goal,TT0,TF,FT,FF) &
   TT is TT0+1.
count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <-
   val(Ex,Att,true) &
   val(Ex,Goal,false) &
   count(Examples,Att,Goal,TT,TF0,FT,FF) &
   TF is TF0+1.
count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <-
   val(Ex,Att,false) &
   val(Ex,Goal,true) &
   count(Examples,Att,Goal,TT,TF,FT0,FF) &
   FT is FT0+1.
count([Ex|Examples],Att,Goal,TT,TF,FT,FF) <-
   val(Ex,Att,false) &
   val(Ex,Goal,false) &
   count(Examples,Att,Goal,TT,TF,FT,FF0) &
   FF is FF0+1.

%%% load 'c:/david/book/code-br/code/cilog/ch11/dtlearn_bool.pl'.
%%% load 'c:/david/book/code/cilog/ch11/dtlearn_bool.pl'.
