/*            APPENDIX  D     Section D.1                             */
/********************************************************************
      An On-line Decision Theoretic Golog Interpreter based on
        An Incremental Decision Theoretic Golog Interpreter   
                          May, 2000.                 

            Do not distribute without permission.
            Include this notice in any copy made.

Permission to use, copy, and modify this software and its documentation 
for non-commercial research purpose is hereby granted without fee,
provided that this permission notice appears in all copies. This
software cannot be used for commercial purposes without written permission.
This software is provided "as is" without express or implied warranty
(including all implied warranties of merchantability and fitness).
No liability is implied for any damages resulting from 
or in connection with the use or performance of this software.

E-mail questions/comments about the interpreter to Mikhail Soutchanski:
            mes [at] cs [dot] toronto [dot] edu   


NOTICE: this software works only with Eclipse Prolog 3.5.2 available from
        IC-PARK (Imperial College, London, UK) because this interpreter
        relies on a linear programming solver built in Eclipse Prolog 3.5.2. 
        The solver finds solutions for a system of linear inequalities
        and for linear programming problems only if all coefficents
        are *rational* numbers. Contact http://www.icparc.ic.ac.uk/eclipse/
        for a licence (free to academic and research institutions).
      More recent versions of Eclipse Prolog use different set of predicates.

**********************************************************************/


:- lib(r).
:- dynamic(proc/2).            /* Compiler directives. Be sure   */
:- set_flag(all_dynamic, on).  /* that you load this file first! */
:- set_flag(print_depth,500).
/* a toolbox of debugging tricks: put stophere(yes) where necessary. */
:- pragma(debug). 
:- dynamic(stophere/1). 
:- set_flag(stophere/1, spy, on).      %  type debug and hit "l" to leap 
:- set_flag(stophere/1, leash, stop).  %  from one break-point to another
 
 
:- op(800, xfy, [&]).   /* Conjunction */
:- op(850, xfy, [v]).   /* Disjunction */
:- op(870, xfy, [=>]).  /* Implication */
:- op(880,xfy, [<=>]).  /* Equivalence */
:- op(950, xfy, [:]).   /* Action sequence */
:- op(970, xfy, [#]).   /* Nondeterministic action choice */

/* The predicate best() is the top level call.
  Add an end-of-program marker "nil" to the tail of program expression E ,
   compute the best policy starting from s0. Call this predicate to start
  online execution of a Golog program.
*/

best(E,H,Pol,V,File) :- get_flag(unix_time, StartT), 
          online(E : nil,s0,H,Pol,V),
          Val is float(V),
          open( File, append, Stream),
          date(Date),
          printf(Stream, "\n\n    This report is started at time %w\n", [Date]),
         ( proc(E,Body) -> 
           printf(Stream, "The Golog program is\n proc(%w,\n  %w)\n",[E,Body]) ;
           printf(Stream, "The Golog program is\n %w\n",[E])
          ),
          get_flag(unix_time, EndT), Elapsed is EndT - StartT,
          printf("\nThe computation took %w seconds of unix time",[Elapsed]),
          printf(Stream, "\nTime elapsed is %w seconds\n", [Elapsed]),
  %           printf(Stream, "The optimal policy is \n %w \n", [Pol]),
          printf(Stream, "The value of the optimal policy is %w\n",[Val]), 
          close(Stream).

/* 
This predicate is useful only for testing the offline incrBestDo interpreter.
*/
bp(E,H,Pol,Val,Prob,File) :- 
          cputime(StartT), %%%get_flag(unix_time, StartT), 
          incrBestDo(E : nil,s0,H,ER,Pol,V,Prob),
          Val is float(V),
          open( File, append, Stream),
          date(Date),
          printf(Stream, "\n\n    This report is started at time %w\n", [Date]),
         ( proc(E,Body) -> 
           printf(Stream, "The Golog program is\n proc(%w,\n  %w)\n",[E,Body]) ;
           printf(Stream, "The Golog program is\n %w\n",[E])
          ),
          cputime(EndT),  %%%get_flag(unix_time, EndT), 
          Elapsed is 1.0*(EndT - StartT),
    printf("\nThe computation took %w seconds of unix time",[Elapsed]),
        printf(Stream, "\nTime elapsed is %8.4g seconds\n", [Elapsed]),
          printf(Stream, "The optimal policy is \n %w \n", [Pol]),
          printf(Stream, "The value of the optimal policy is %w\n",[Val]), 
printf(Stream,"The probability of successful termination of this policy is %w\n", [Prob]),
          close(Stream).

 

/*  online(Expr,S,H,Pol,V) means the following. 
  Given a Golog program Expr, a situation S and horizon H find an
  optimal sequence of actions Pol with an accumulated expected value V.
  In logical terms, the problem is

   DomainAxioms \=?=  (\exists p,v) online(Expr : Nil, S0,H,p,v) 
 
  and any binding for existentially quantified variables "p","v" 
  obtained as a side-effect of solving this entailment task constitues
  a solution of a decision theoretical problem. 

  incrBestDo(E,S,H,ER,Pol,V,Prob) means the following.
  Given a Golog program expression E, situation S and horizon H find 
  a policy Pol of the highest expected value V and find the sub-program ER
  of E that remains after executing the first action from the optimal
  policy Pol. The probability Prob is the total probability of all those
  alternative branches in Pol which have leaves different from Stop, where
  Stop is a zero-cost action marking a prematurely terminated program E.  
  The program ER is the result of a one-step transition from E along the
  optimal policy Pol. The horizon H must be a non-negative integer number.
*/


online(E,S,H,Pol,U) :-   incrBestDo(E,S,H,ER,Pol1,U1,Prob1),
     %  stophere(yes),
     ( final(ER,S,H,Pol1,U1), Pol=Pol1, U=U1 ;
       reward(V,S),
       ( ER \== nil, Pol1 = (A : Rest) ;  ER == nil, Pol1 = A ), 
       ( 
         (agentAction(A), deterministic(A,S),
            doReally(A),     /* do A in reality or doSimul(A) - ask user*/
            senseExo(do(A,S),Sg),
            decrement(H , Hor),
            !,
            online(ER,Sg,Hor,PolRem,URem), 
            Pol=(A : PolRem), U $= V + URem
          ) ;
         (
          (senseAction(A),
             doReally(A),     /* do sensing in reality or in simulation */
             senseExo(do(A,S),Sg),
             decrement(H , Hor),
             !,
             online(ER,Sg,Hor,PolRem,URem), 
             Pol=(A : PolRem), U $= V + URem
           ) ;
          (agentAction(A), nondetActions(A,S,NatOutcomes),
             doReally(A),     /* do A in reality or doSimul(A) - ask user*/
             senseEffect(A,S,NatOutcomes,SEff),  
                              /* SEff results from real sensing */
             senseExo(SEff,Sg),
             decrement(H , Hor),
             !,
             online(ER,Sg,Hor,PolRem,URem), 
             Pol=(A : PolRem), U $= V + URem
           )
          ) 
        ) 
      ).

doReally(A) :- doSimul(A).

/* Later : connect to the robot */

doSimul(A) :- agentAction(A), not senseAction(A), deterministic(A,S),
            printf("I'm doing the deterministic action %w\n",[A]).

doSimul(A) :- agentAction(A), nondetActions(A,S,NatOutcomesList),
            printf("I'm doing the stochastic action %w\n",[A]).

doSimul(A) :- senseAction(A), printf("Im doing the sensing action %w\n",[A]),
            printf("Type the value returned by the sensor\n",[]),
       %    stophere(yes),
            A =.. [ActionName,SensorName,Value,Time],
            read(Value).
/* Alternative implementation: introduce the predicate 
value(SenseAction,Value) and for each sense action  senseAct
include in the domain axiomatization an axiom similar to
     value( senseAct(X,Val,Time), Val ).
Then we can simply call this predicate "value" in the clause above. 
*/ 


/* In the simplest case, there are no exogenous actions. */

senseExo(S1,S2) :- S2=S1.


/* Domain model includes a pre-determined sequence Seq of sensing actions
   that have to be executed after stochastic action A. The syntax is
   the following (in the case when 3 sensing actions must be executed):
      differentiatingSeq(A, SenseAction1 : (SenseAction2 : SenseAction3)). 
   Each sensing action has 3 arguments: the 1st argument is the constant
   representing what has to be sensed, the 2nd argument represents
   a term (variable) that gets bounded to a certain value returned
   from a sensor at the run-time, the 3rd argument represents the moment
   of time when the sensing action is performed.
*/

senseEffect(A,S,NatOutcomes,SE) :- 
          differentiatingSeq(A,Seq), 
          getSensorInput(do(X,S),Seq, SE), 
             /* find X: an unknown outcome of the stochastic action A */
          diagnose(S,SE,NatOutcomes,X).

getSensorInput(S,A, SE) :-  senseAction(A),
       doReally(A),  /* connect to sensors and get data */
       % doSimul(A)  /* ask user for a sensor value when A is simulated*/
       SE=do(A,S).

getSensorInput(S,Seq, SE) :- Seq = (A : Tail), senseAction(A),
       doReally(A),   /* connect to sensors and get data */
       % doSimul(A)   /* ask user */
       getSensorInput(do(A,S),Tail, SE).

/*
  diagnose(S,SE,OutcomesList,N) is true iff N is Natura's action that 
  actually happened in S and this action belongs to the list of outcomes 
  which might occur in S. It is assumed that the situation SE contains 
  all sensing information required to disambiguate between different 
  possible outcomes.
  Domain model includes a set of statements that characterize mutually 
  exclusive conditions that need to be evaluated to determine which
  outcome occurred in reality. These statements have the following syntax:
      senseCondition(N, phi).
  where phi is an arbitrary pseudo-fluent expression.
*/  
   
diagnose(S,SEff,[N],X) :- senseCondition(N,C),
             ( X=N, holds(C,SEff)  /* nature did the action N */;
               write("Failed to identify an outcome")
              ).

diagnose(S,SEff,[N, NN | Outcomes],X) :- 
  senseCondition(N,C), /* assume that N happened */
  ( X=N, holds(C,SEff) /* verified that nature did action N */ ;
    diagnose(S,SEff,[NN | Outcomes],X) /*if not, consider other outcomes*/
   ).





incrBestDo((E1 : E2) : E,S,H,ER,Pol,V,Prob) :-   positive(H),
           incrBestDo(E1 : (E2 : E),S,H,ER,Pol,V,Prob).

incrBestDo(?(C) : E,S,H,ER,Pol,V,Prob) :-    positive(H),
          holds(C,S) -> incrBestDo(E,S,H,ER,Pol,V,Prob) ;
          (ER=nil, (Prob is 0.0) , Pol = stop, reward(V,S)).

incrBestDo(pi(X,E1) : E,S,H,ER,Pol,V,Prob) :-   positive(H),
            sub(X,_,E1,E1_X), 
            incrBestDo(E1_X : E,S,H,ER,Pol,V,Prob).

incrBestDo((E1 # E2) : E,S,H,ER,Pol,V,Prob) :-    positive(H),
       incrBestDo(E1 : E,S,H,ER1,Pol1,V1,Prob1), 
       incrBestDo(E2 : E,S,H,ER2,Pol2,V2,Prob2), 
       ( lesseq(V1,Prob1,V2,Prob2) -> 
                  (ER=ER2, Pol=Pol2, Prob=Prob2, V $= V2) ; 
                  (ER=ER1, Pol=Pol1, Prob=Prob1, V $= V1)
        ).

incrBestDo(if(C,E1,E2) : E,S,H,ER,Pol,V,Prob) :-   positive(H),
             holds(C,S) -> incrBestDo(E1 : E,S,H,ER,Pol,V,Prob) ;
                           incrBestDo(E2 : E,S,H,ER,Pol,V,Prob).

incrBestDo(while(C,E1) : E,S,H,ER,Pol,V,Prob) :-   positive(H),
            holds(-C,S) -> incrBestDo(E,S,H,ER,Pol,V,Prob) ;
                 incrBestDo(E1 : while(C,E1) : E,S,H,ER,Pol,V,Prob).

incrBestDo(ProcName : E,S,H,ER,Pol,V,Prob) :-   positive(H),
              proc(ProcName,Body), 
              incrBestDo(Body : E,S,H,ER,Pol,V,Prob).

/* 
  Discrete version of pi. pickBest(x,f,e) means: choose the best value 
   of x from the finite range of values f, and for this x, 
   do the complex action e. 
*/
incrBestDo(pickBest(X,F,E) : Tail,S,H,ER,Pol,V,Prob) :-  positive(H),
     range(F,R), 
     ( R=[D],     sub(X,D,E,E_D),
                    incrBestDo(E_D : Tail,S,H,ER,Pol,V,Prob)            ;
       R=[D1,D2], sub(X,D1,E,E_D1), sub(X,D2,E,E_D2), 
                    incrBestDo((E_D1 # E_D2) : Tail,S,H,ER,Pol,V,Prob)  ;
       R=[D1,D2 | List],  List=[D3 | Rest], 
                  sub(X,D1,E,E_D1), sub(X,D2,E,E_D2), 
     incrBestDo((E_D1 # E_D2 # pickBest(X,List,E)) : Tail,S,H,ER,Pol,V,Prob) 
      ).


incrBestDo(A : E,S,H,ER,Pol,V,Prob) :-   positive(H),
      agentAction(A), deterministic(A,S),
      ( not poss(A,S), ER=nil, Pol=stop, (Prob is 0.0) , 
                       reward(V,S);   
        poss(A,S),  
        decrement(H , Hor),
        start(S,T1),  time(A,T2),  T1 $<= T2,
        ER = E,  
        incrBestDo(E,do(A,S),Hor,_,RestPol,VF,Prob),
        reward(R,S), 
        V $= R + VF,   %%% if discount is 0.9: V $= R + (9/10)*VF
        ( RestPol == nil, Pol = A ;
          RestPol \== nil,  Pol = (A : RestPol)
         )
       ).


incrBestDo(A : E,S,H,ER,Pol,V,Prob) :-   positive(H),
       agentAction(A), nondetActions(A,S,NatOutcomesList),
       decrement(H , Hor),
       bestDoAux(NatOutcomesList,E,S,Hor,RestPol,VF,Prob),
       /* RestPol can be stop or a conditional policy */
       reward(R,S),  
   ( RestPol = ( ?(_) : stop ),  ER=nil,  Pol=(A : stop),  V $= R ; 
    RestPol \= ( ?(_) : stop ),  ER=E,    Pol=(A : RestPol), 
           V $= R + VF   %%% if discount is 0.9: V $= R + (9/10)*VF 
        ).

/* Because the decision tree built by this interpreter grows quickly
   with increase in horizon, it is computationally advantageous
   to compute optimal policies from sub-parts of a large Golog program
   and then piece computed sub-policies together into a policy
   for a whole program. To accomplish this, the programmer can
   use constructs which allow to indicate which sub-parts of
   the program can be used individually for computing sub-policies.
   The following construct loc(Prog1) : Prog2 means that a policy Pol1 
   can be computed offline given Prog1 and used later to compute offline
   an optimal policy Pol from the program (Pol1 : Prog2). Note that
   Pol1 is a conditional Golog program without nondeterministic choices.
   Once the policy Pol1 is computed, the construct loc() will not be
   applied later to compute policies from the program that remains
   after executing on-line the first action in the policy Pol.
   Note that Pol1 may have several occurrences of "nil" and "stop".
*/
 incrBestDo(loc(E1) : E,S,H,ER,Pol,V,Prob) :-   positive(H),
             incrBestDo(E1 : nil,S,H,_,Pol1,V1,Prob1),
             incrBestDo(Pol1 : E,S,H,ER,Pol,V,Prob).

/* In the next version the locality operation persists
   until the locally optimized program does not finish on-line.
   The second difference from the previous operator is that
   a policy for on-line execution is computed without taking
   into account the program expression E that follows after
   a locally optimized expression E1.
*/

/* Insert once the end-of-program marker "nil" after 
   the program expression E1 and use "optimize" instead of "limit"
   to indicate that "nil" has beed already added after E1. 
*/

incrBestDo(limit(E1) : E,S,H,ER,Pol,V,Prob) :-   positive(H),
        incrBestDo(optimize(E1 : nil) : E,S,H,ER,Pol,V,Prob).

   
incrBestDo(optimize(Expr1) : E,S,H,ER,Pol,V,Prob) :-   positive(H),
        incrBestDo(Expr1,S,H,ERem1,Pol1,V1,Prob1),
        ( ERem1 \== nil,            
          ER = (optimize(ERem1) : E),
          Pol = Pol1,             % a policy for on-line execution
          V $= V1, Prob=Prob1 ;     
          ERem1 == nil, 
          ( Pol1 \== stop -> incrBestDo(Pol1 : E,S,H,ER,Pol,V,Prob) ;
             ( ER = nil, (Prob is 0.0) , Pol = stop, reward(V,S) )
           )
         ).

incrBestDo(nil : E,S,H,ER,Pol,V,Prob) :-  positive(H),
               incrBestDo(E,S,H,ER,Pol,V,Prob).


/* If the program starts with the "stop" action that is not
   possible to execute, then given the reward V in the current 
   situation S we solve a linear programming problem VL-> max
   with respect to a system of linear inequalities between temporal 
   variables that occur in V. Solving this linear programming problem
   grounds some temporal variables to values of time that provide
   the highest reward with respect to the temporal constraints.
   We seek solutions of similar linear programming problems
   in other branches of the situation tree whenever the directed
   forward search performed by this interpreter gets to a leaf node 
   (a final situation).    
*/
incrBestDo(stop : E,S,H,ER,Pol,V,Prob) :- positive(H), 
                ER=nil, Pol=stop, 
                reward(V,S), VL $<= V, rmax(VL), 
                (Prob is 0.0) .

/* The program is done, but the horizon is still greater than 0. */
incrBestDo(nil,S,H,ER,Pol,V,Prob) :-   positive(H), 
                ER=nil, Pol=nil,
                reward(V,S), VL $<= V, rmax(VL), 
                (Prob is 1.0 ) .  
       
/* There is still something to do, but  horizon is already 0. */
incrBestDo(E,S,H,ER,Pol,V,Prob) :-   H = 0, 
                ER=nil, Pol=nil,
                reward(V,S), VL $<= V, rmax(VL), 
                (Prob is 1.0 ) .



bestDoAux([N1],E,S,H,Pol,V,Prob) :-   poss(N1,S), 
       start(S,T1),  time(N1,T2),  T1 $<= T2,
       prob(N1,Pr1,S),
       incrBestDo(E,do(N1,S),H,_,Pol1,V1,Prob1),
       senseCondition(N1,Phi1),
       Pol = ( ?(Phi1) : Pol1 ),
       rational(Pr1, P_rat),    /* converts Pr1 into the rational number */
       V $= P_rat * V1, /*V1 is linear function with rational coefficients*/
       Prob is Pr1*Prob1.

bestDoAux([N1],E,S,H,Pol,V,Prob) :-   not poss(N1,S), 
       senseCondition(N1,Phi1),
       Pol = ( ?(Phi1) : stop ), (Prob is 0.0) , V $= 0 .

bestDoAux([N1 | OtherOutcomes],E,S,H,Pol,V,Prob) :- not OtherOutcomes= [],
       poss(N1,S), 
       start(S,T1),  time(N1,T2),  T1 $<= T2,
       bestDoAux(OtherOutcomes,E,S,H,PolTree,VTree,ProbTree),
       incrBestDo(E,do(N1,S),H,_,Pol1,V1,Prob1),
       senseCondition(N1,Phi1),
       Pol = if(Phi1,  % then
                             Pol1,  % else
                                           PolTree),
       prob(N1,Pr1,S),
       rational(Pr1, P_rat),   /* converts Pr1 into the rational number */
       V $= VTree + P_rat*V1, 
       Prob is ProbTree + Pr1*Prob1.

bestDoAux([N1 | OtherOutcomes],E,S,H,Pol,V,Prob) :- not OtherOutcomes= [],
       not poss(N1,S), bestDoAux(OtherOutcomes,E,S,H,Pol,V,Prob).



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% final(Program,Sit,Hor,Pol,Val) is true iff 
% Program cannot be continued or if the computed policy Pol
% cannot be executed.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

final(E,S,H,Pol,V) :-  H==0, Pol=nil, 
                reward(V,S), VL $<= V, rmax(VL) .

final(nil,S,H,Pol,V) :-  Pol=nil,
                reward(V,S), VL $<= V, rmax(VL) .

final(stop : E,H,S,Pol,V) :-  Pol=stop, 
                reward(V,S), VL $<= V, rmax(VL).

final(E,S,H,Pol,U) :- (Pol == stop ; Pol == nil),
                reward(V,S), VL $<= U, rmax(VL).



 
    /* ---- Some useful predicates mentioned in the interpreter ----- */

/* Note that evaluation of the predicates "lesseq" and "greatereq"
   can require solving a system of linear inequalities about 
   temporal variables.
*/ 

lesseq(V1,Prob1,V2,Prob2) :-  Pr1 is float(Prob1), Pr2 is float(Prob2),
            (Pr1 \= 0.0 ) , (Pr2 \= 0.0 ) , V1 $<= V2.

lesseq(V1,Prob1,V2,Prob2) :-  Pr1 is float(Prob1), (Pr1 = 0.0 ) ,
         Pr2 is float(Prob2), (Pr2 \= 0.0 ) .
         
lesseq(V1,Prob1,V2,Prob2) :-  Pr1 is float(Prob1), (Pr1 = 0.0 ) ,
         Pr2 is float(Prob2),  (Pr2 = 0.0 ) ,  V1 $<= V2 .

greatereq(V1,Prob1,V2,Prob2) :- Pr1 is float(Prob1), Pr2 is float(Prob2),
            (Pr1 \= 0.0 ) , (Pr2 \= 0.0 ) , V2 $<= V1.

greatereq(V1,Prob1,V2,Prob2) :-  Pr1 is float(Prob1), Pr2 is float(Prob2),
            (Pr1 \= 0.0 ) , (Pr2 = 0.0 ) .

greatereq(V1,Prob1,V2,Prob2) :-  Pr1 is float(Prob1), Pr2 is float(Prob2),
            (Pr1 = 0.0 ) , (Pr2 = 0.0 ) , V2 $<= V1.


/* In the case of episodic decision task, whenever horizon
   is not known in advance and is determined by transition into a
   terminal (absorbing) state, the value of horizon can be defined by
   the constant "episodic". Note that in this case the predicate
   positive(Horizon) is true.
*/
decrement(Input,Output) :- 
      integer(Input), Input > 0, Output is Input - 1.

decrement(Input,Output) :- 
      not integer(Arg), Output = Input.

positive(Arg) :-   integer(Arg), Arg > 0.
positive(Arg) :-   not integer(Arg).
                                      

/* A handy primitive action noOp(T) does not change anything. */
agentAction(noOp(T)).
time(noOp(T), T).
poss(noOp(T), S).
deterministic(noOp(T),S).
deterministic(A,S) :- not nondetActions(A,S,OutcomesList).

range([],[]).
range([H | T], [H | T]).


                   /* Time specific clauses.  */

start(do(A,S),T) :- time(A,T).
start( s0, 0 ).

/* Fix on an earliest solution to the temporal constraints. 
   This predicate is not used in the current implementation.
*/
chooseTimes(s0).
chooseTimes(do(A,S)) :- chooseTimes(S), time(A,T), T $<= Time, rmin(Time).

/* "now" is a synonym for "start". */
now(T,S) :- start(S,T).


/* The time of an instantenous action is its last argument.
time(X,T) :- X =.. [ActName|Args], 
             reverse(Args,[Last | OtherArgs]), Last=T.
*/


/* sub(Name,New,Term1,Term2): Term2 is Term1 with Name
   replaced by New. */
 
sub(X1,X2,T1,T2) :- var(T1), T2 = T1.
sub(X1,X2,T1,T2) :- not var(T1), T1 = X1, T2 = X2.
sub(X1,X2,T1,T2) :- not T1 = X1, T1 =..[F|L1], sub_list(X1,X2,L1,L2),
                    T2 =..[F|L2].
sub_list(X1,X2,[],[]).
sub_list(X1,X2,[T1|L1],[T2|L2]) :- sub(X1,X2,T1,T2),
                                   sub_list(X1,X2,L1,L2).

/* The holds predicate implements the revised Lloyd-Topor
   transformations on test conditions.  */
 
holds(P & Q,S) :- !,   holds(P,S), holds(Q,S).
holds(P v Q,S) :- !,   ( holds(P,S) ; holds(Q,S) ).
holds(P => Q,S) :- holds(-P v Q,S).
holds(P <=> Q,S) :- holds((P => Q) & (Q => P),S).
holds(-(-P),S) :- holds(P,S).
holds(-(P & Q),S) :- holds(-P v -Q,S).
holds(-(P v Q),S) :- holds(-P & -Q,S).
holds(-(P => Q),S) :- holds(-(-P v Q),S).
holds(-(P <=> Q),S) :- holds(-((P => Q) & (Q => P)),S).
holds(-all(V,P),S) :- holds(some(V,-P),S).
holds(-some(V,P),S) :- not holds(some(V,P),S).  /* Negation */
holds(-P,S) :- isAtom(P), not holds(P,S).     /* by failure */
holds(all(V,P),S) :- holds(-some(V,-P),S).
holds(some(V,P),S) :- sub(V,_,P,P1), holds(P1,S).
 
/* The following clause treats the holds predicate for some
   domain independent atoms, including Prolog system predicates. 
   For this to work properly, the GOLOG programmer must provide, 
   for those generic atoms taking a situation argument, 
   a clause giving the result of restoring its suppressed situation 
   argument, for example:
          restoreSitArg(now(Time),S,now(Time,S)). 
*/         
 
holds(A,S) :-  restoreSitArg(A,S,F),  F ;
      isAtom(A), not restoreSitArg(A,S,F),   %  calls standard    
      A.                                     %  Prolog predicates

isAtom(A) :- not (A = tt ;A = -W ; A = (W1 & W2) ; A = (W1 => W2) ;
    A = (W1 <=> W2) ; A = (W1 v W2) ; A = some(X,W) ; A = all(X,W)).
 

/*  It might be convenient to keep the following domain-independent 
    clauses in this file.
*/
	
restoreSitArg(poss(A),S,poss(A,S)).
restoreSitArg(now(T),S,now(T,S)).

/* The representation considered here can be elaborated as follows.
   Modified successor state axioms can be formulated using the 
   predicate "ssa". Specifically, ssa-predicates must have 4 arguments:
     - guard condition Alpha ;   % if it is true, then the axiom applies
     - action term  A ;    
     - fluent name F ;
     - the right hand side Gamma of the successor state axiom.
   The domain axiomatization must include the set of unary axioms
   fluent(X)  specifying fluents.
   The domain axiomatization may include several different ssa 
   for the same fluent, but their guards must be mutually incompatible.
   If A is a sensing action, then its second before the last argument 
   contains data returned from doing sensing in reality. 

holds(F,do(A,S)) :- fluent(F),        
    ssa(Alpha,A,F,Gamma), holds(Alpha,S), holds(Gamma,S).

holds(tt,S).              % the truth value tt  holds in any situation
holds(F,s0) :- ini(F).    % initially, fluent F is true
holds(-F,s0) :- ini(-F).  % initially, fluent F is false

  Note that in such implementation there would be no need to provide
  restoreSitArg  axioms for fluents:
holds(A,S) :-  restoreSitArg(A,S,F),  F ;
      isAtom(A), not restoreSitArg(A,S,F),   %  calls standard    
      not fluent(A), A.                      %  Prolog predicates
*/

/* -----------  cut here  ------------------------ */

stophere(yes).