/*                            APPENDIX  C
                        Section C.1  An Interpreter
*/
/********************************************************************
   An Offline Decision Theoretic Golog Interpreter (non-temporal version)    
                        October, 1999.              

            Do not distribute without permission.
            Include this notice in any copy made.

Permission to use, copy, and modify this software and its documentation 
for non-commercial research purpose is hereby granted without fee,
provided that this permission notice appears in all copies. This
software cannot be used for commercial purposes without written permission.
This software is provided "as is" without express or implied warranty
(including all implied warranties of merchantability and fitness).
No liability is implied for any damages resulting from 
or in connection with the use or performance of this software.

The following paper provides the technical background:
@inproceedings{AAAI,
   author = {Boutilier, C. and  Reiter, R. and
             Soutchanski, M. and Thrun, S.},
  title={Decision-Theoretic, High-level Robot Programming in
           the Situation Calculus},
  booktitle = {Proc. of the 17th National Conference on
    Artificial Intelligence (AAAI'00)},
  address={Austin, Texas},
  publisher = {Available at:  http://www.cs.toronto.edu/~cogrobo },
  year= 2000
}

E-mail questions about the interpreter to Mikhail Soutchanski:
                     mes [at] cs [dot] toronto [dot] edu   

NOTICE: this software works on Unix/Linux machines with Eclipse Prolog that is
        available from IC-PARK (Imperial College, London, UK). It is easy to
        modify this interpreter to work with any other version of Prolog.
        
**********************************************************************/
  
:- dynamic(proc/2).            /* Compiler directives. Be sure   */
:- set_flag(all_dynamic, on).     /* that you load this file first! */
:- set_flag(print_depth,500).
:- pragma(debug).  
 
:- op(800, xfy, [&]).   /* Conjunction */
:- op(850, xfy, [v]).   /* Disjunction */
:- op(870, xfy, [=>]).  /* Implication */
:- op(880,xfy, [<=>]).  /* Equivalence */
:- op(950, xfy, [:]).   /* Action sequence */
:- op(960, xfy, [#]).   /* Nondeterministic action choice */

/* The predicate bp() is the top level call. Add an end-of-program 
   marker "nil" to the tail of program expression E , then compute 
   the best policy that succeeds with probability Prob. The horizon
   H must be a non-negative integer number.
*/

 
bp(E,H,Pol,Util,Prob,File) :- integer(H), H >= 0, 
      cputime(StartT), 
      bestDo(E : nil,s0,H,Pol,Val,Prob),
      cputime(EndT), Elapsed is EndT - StartT,
      Util is float(Val),
      open( File, append, Stream),
      date(Date),
      printf(Stream, "\n\n    This report is started at time %w\n", [Date]),
      ( proc(E,Body) -> 
        printf(Stream, "The Golog program is\n proc(%w,\n  %w)\n",[E,Body]) ;
        printf(Stream, "The Golog program is\n %w\n",[E])
       ),
      printf("\nThe computation took %w seconds",[Elapsed]),
      printf(Stream, "\nTime elapsed is %w seconds\n", [Elapsed]),
       printf(Stream, "The optimal policy is \n %w \n", [Pol]),
       printf(Stream, "The value of the optimal policy is %w\n",[Util]),
    printf(Stream, "The probability of successful termination is %w\n",[Prob]),
       close(Stream).
  

/*  bestDo(E,S,H,Pol,V,Prob) 
  Given a Golog program E and situation S find a policy Pol of the highest
  expected utility Val. The optimal policy covers a set of alternative histories 
  with the total probability Prob. H is a given finite horizon.
*/

bestDo((E1 : E2) : E,S,H,Pol,V,Prob) :-   H >= 0, 
           bestDo(E1 : (E2 : E),S,H,Pol,V,Prob).

bestDo(?(C) : E,S,H,Pol,V,Prob) :-   H >= 0, 
          holds(C,S) ->  bestDo(E,S,H,Pol,V,Prob) ;
                 ( (Prob is 0.0) , Pol = stop, reward(V,S) ).

bestDo((E1 # E2) : E,S,H,Pol,V,Prob) :-  H >= 0, 
     bestDo(E1 : E,S,H,Pol1,V1,Prob1), 
     bestDo(E2 : E,S,H,Pol2,V2,Prob2), 
     ( lesseq(V1,Prob1,V2,Prob2), Pol=Pol2, Prob=Prob2, V=V2 ; 
       greatereq(V1,Prob1,V2,Prob2), Pol=Pol1, Prob=Prob1, V=V1).

bestDo(if(C,E1,E2) : E,S,H,Pol,V,Prob) :-   H >= 0, 
               holds(C,S) -> bestDo(E1 : E,S,H,Pol,V,Prob) ;
                             bestDo(E2 : E,S,H,Pol,V,Prob).

bestDo(while(C,E1) : E,S,H,Pol,V,Prob) :-   H >= 0, 
          holds(-C,S) -> bestDo(E,S,H,Pol,V,Prob) ;
                         bestDo(E1 : while(C,E1) : E,S,H,Pol,V,Prob).

bestDo(ProcName : E,S,H,Pol,V,Prob) :-   H >= 0, 
              proc(ProcName,Body), 
              bestDo(Body : E,S,H,Pol,V,Prob).

/* Non-decision theoretic version of pi: pick a fresh value of X
   and for this value do the complex action E1 followed by E.
*/
bestDo(pi(X,E1) : E,S,H,Pol,V,Prob) :-   H >= 0, 
            sub(X,_,E1,E1_X), bestDo(E1_X : E,S,H,Pol,V,Prob).

/*                   
  Discrete version of pi. pickBest(x,f,e) means: choose the best value 
   of x from the finite non-empty range of values f, and for this x, 
   do the complex action expression e.
*/
bestDo(pickBest(X,F,E) : EF,S,H,Pol,V,Prob) :-  H >= 0,
        range(F,R), 
        ( R=[D],    sub(X,D,E,E_D),
                      bestDo(E_D : EF,S,H,Pol,V,Prob)           ;
          R=[D1,D2], sub(X,D1,E,E_D1), sub(X,D2,E,E_D2), 
                      bestDo((E_D1 # E_D2) : EF,S,H,Pol,V,Prob)  ;
          R=[D1,D2 | Tail], Tail = [D3 | Rest], 
                     sub(X,D1,E,E_D1), sub(X,D2,E,E_D2), 
          bestDo((E_D1 # E_D2 # pickBest(X,Tail,E)) : EF,S,H,Pol,V,Prob) 
         ).

bestDo(A : E,S,H,Pol,V,Prob) :-   H > 0, 
      agentAction(A), deterministic(A,S),
      ( not poss(A,S), Pol = stop, (Prob is 0.0) , reward(V,S) ;
        poss(A,S), Hor is H - 1, 
        bestDo(E,do(A,S),Hor,RestPol,VF,Prob),
        reward(R,S), 
        V is R + VF, 
        ( RestPol = nil,     Pol = A ; 
          not RestPol=nil,   Pol = (A : RestPol) 
         )
       ).

bestDo(A : E,S,H,Pol,V,Prob) :-   H > 0, 
       agentAction(A), nondetActions(A,S,NatOutcomesList),
       Hor is H -1,
       bestDoAux(NatOutcomesList,E,S,Hor,RestPol,VF,Prob),
       reward(R,S),  
       V is R + VF, 
       Pol=(A : senseEffect(A) : (RestPol)).

bestDoAux([N1],E,S,H,Pol,V,Prob) :-   H >= 0, senseCondition(N1,Phi1),
     ( not poss(N1,S), ( Pol = (?(Phi1) : stop ), (Prob is 0.0) , V is 0 ) ;
       poss(N1,S), 
       prob(N1,Pr1,S),
       bestDo(E,do(N1,S),H,Pol1,V1,Prob1), !,
       Pol = ( ?(Phi1) : Pol1 ),
       V is Pr1*V1, 
       Prob is Pr1*Prob1 ).

bestDoAux([N1 | OtherOutcomes],E,S,H,Pol,V,Prob) :-  H >= 0, 
     OtherOutcomes = [Head | Tail],  % there is at least one other outcome
     ( not poss(N1,S) -> bestDoAux(OtherOutcomes,E,S,H,Pol,V,Prob) ;
       poss(N1,S), 
       bestDoAux(OtherOutcomes,E,S,H,PolT,VT,ProbT),
       senseCondition(N1,Phi1),
       prob(N1,Pr1,S),
       bestDo(E,do(N1,S),H,Pol1,V1,Prob1), !,
       Pol = if(Phi1,  % then
                             Pol1,  % else
                                           PolT),
       V is VT + Pr1*V1, 
       Prob is ProbT + Pr1*Prob1 ).


bestDo(nil,S,H,Pol,V,Prob) :- 
                Pol=nil, reward(V,S), (Prob is 1.0) .

bestDo(nil : E,S,H,Pol,V,Prob) :- H > 0, bestDo(E,S,H,Pol,V,Prob).

bestDo(stop : E,S,H,Pol,V,Prob) :-  
                Pol=stop, reward(V,S), (Prob is 0.0) .   

bestDo(E,S,H,Pol,V,Prob) :- H =:= 0, 
	/*  E=(A : Tail), agentAction(A),  */ 
              Pol=nil, reward(V,S), (Prob is 1.0) .


  /* ---- Some useful predicates mentioned in the interpreter ----- */

lesseq(V1,Prob1,V2,Prob2) :-  Pr1 is float(Prob1), (Pr1 = 0.0) ,
         Pr2 is float(Prob2), 
         ( (Pr2 \= 0.0) ; 
           (Pr2 = 0.0) ,  V1 =< V2
          ).

lesseq(V1,Prob1,V2,Prob2) :-  
            (Prob1 \= 0.0) , (Prob2 \= 0.0) , V1 =< V2.

greatereq(V1,Prob1,V2,Prob2) :-   (Prob1 \= 0.0) , (Prob2 = 0.0) .

greatereq(V1,Prob1,V2,Prob2) :-
            (Prob1 \= 0.0) , (Prob2 \= 0.0) , V2 =< V1.


deterministic(A,S) :- not nondetActions(A,S,OutcomesList).


range([D | Tail],[D | Tail]).     % Tail can be []

/* sub(Name,New,Term1,Term2): Term2 is Term1 with Name
   replaced by New. */
 
sub(X1,X2,T1,T2) :- var(T1), T2 = T1.
sub(X1,X2,T1,T2) :- not var(T1), T1 = X1, T2 = X2.
sub(X1,X2,T1,T2) :- not T1 = X1, T1 =..[F|L1], sub_list(X1,X2,L1,L2),
                    T2 =..[F|L2].
sub_list(X1,X2,[],[]).
sub_list(X1,X2,[T1|L1],[T2|L2]) :- sub(X1,X2,T1,T2),
                                   sub_list(X1,X2,L1,L2).

/* The holds predicate implements the revised Lloyd-Topor
   transformations on test conditions.  */
 
holds(P & Q,S) :- holds(P,S), holds(Q,S).
holds(P v Q,S) :- holds(P,S); holds(Q,S).
holds(P => Q,S) :- holds(-P v Q,S).
holds(P <=> Q,S) :- holds((P => Q) & (Q => P),S).
holds(-(-P),S) :- holds(P,S).
holds(-(P & Q),S) :- holds(-P v -Q,S).
holds(-(P v Q),S) :- holds(-P & -Q,S).
holds(-(P => Q),S) :- holds(-(-P v Q),S).
holds(-(P <=> Q),S) :- holds(-((P => Q) & (Q => P)),S).
holds(-all(V,P),S) :- holds(some(V,-P),S).
holds(-some(V,P),S) :- not holds(some(V,P),S).  /* Negation */
holds(-P,S) :- isAtom(P), not holds(P,S).     /* by failure */
holds(all(V,P),S) :- holds(-some(V,-P),S).
holds(some(V,P),S) :- sub(V,_,P,P1), holds(P1,S).
 
/* The following clause treats the holds predicate for all atoms,
   including Prolog system predicates. For this to work properly,
   the GOLOG programmer must provide, for all atoms taking a
   situation argument, a clause giving the result of restoring
   its suppressed situation argument, for example:
          restoreSitArg(ontable(X),S,ontable(X,S)).             */
 
holds(A,S) :- restoreSitArg(A,S,F), F ;
           not restoreSitArg(A,S,F), isAtom(A), A.
 
isAtom(A) :- not (A = -W ; A = (W1 & W2) ; A = (W1 => W2) ;
    A = (W1 <=> W2) ; A = (W1 v W2) ; A = some(X,W) ; A = all(X,W)).