/*				APPENDIX  C
			Section C.2  An Example: Coins
*/

/* The predicate range(F,List) defines the finite range of values.
   This predicate occurs in the definition of finite version of "pi".
*/
range(coins2,[1,3]).
range(coins5,[1,2,3,4,5]).
range(oddCoins,[1,3,5]).
range(evenCoins,[2,4]).


/* Call
                   bp(prog1, 1, Pol, U, Prob, outputFile).
                   bp(prog2, 1, Pol, U, Prob, outputFile).
   where prog is a Golog program, 1 is the horizon, Pol is an optimal 
   policy, U is its expected utility, Prob is its probability of 
   success. The computed policy will be printed into outputFile.

   Programs (prog1) and (prog2) illustrate some tricky interactions
   between nondeterminism in the program and stochastic actions.
   Note that prog1 and prog2 lead to different policies.
*/
proc(prog1,   flip(1) : (?(head(1)) # ?(-head(1))) ).

proc(prog2, (flip(1) : ?(head(1)) ) # (flip(1) : ?(-head(1)) ) ).


/* Call
                   bp(search,5,Pol,U,Prob,outputFile).
                   bp(constr,5,Pol,U,Prob,outputFile).    
                   bp(best,5,Pol,U,Prob,outputFile).
  
 Choose the best policy of flipping given the horizon 5. According to
 the reward function, the optimal policy follows this sequence: 
               coin 1, coin 3, coin 5, coin 2, coin 4. 
 The programs `search' and `best' do not provide any constraints, but 
 the program `constr' indicates that coins 2 and 4 must be attempted 
 only after coins 1,3,5. Given the program `constr', the computation of 
 an optimal policy takes 1 sec, and given the program `search' 
 (the program `best', respectively), the computation takes 27sec (39sec), 
 on computer with two 300Mhz processors and 128Mb of RAM.

Calls 
                   bp(search,6,Pol,U,Prob,outputFile).
                   bp(search,7,Pol,U,Prob,outputFile).

compute policies for longer horizons.
*/
 
proc(search,
        (flip(1) # flip(2) # flip(3) # flip(4) # flip(5)) : search
).

proc(best,
           pickBest(acoin,coins5,flip(acoin)) : best
).

proc(constr,
         if( head(1) & head(3) & head(5),
                  (flip(2) # flip(4)) : constr,
                      (flip(1) # flip(3) # flip(5)) : constr)
).    
           
proc(odd,
           pickBest(acoin,oddCoins,flip(acoin)) : odd
).

/* Stochastic actions have a finite number of outcomes:
   we list all of them
*/

nondetActions(flip(Coin),S,[flipHead(Coin),flipTail(Coin)]).

/* Using predicate  prob(Outcome,Probability,Situation)
   we specify numerical values of probabilities for each outcome 
*/

prob(flipHead(X), 0.5, S).  prob(flipTail(X), 0.5, S).

/* We formulate precondition axioms using the predicate 
	poss(Outcome, Situation)
The right-hand side of precondition axioms provides conditions
under which Outcome is possible in Situation
*/

poss(flipHead(X),S).  poss(flipTail(X),S).

/* head(C,S) is true if the coin C is heads up in S  */

head(C,do(A,S)) :- A=flipHead(C) ; 
               head(C,S),  A \= flipTail(C).


reward(0,s0).
reward(R,do(A,S)) :- A=flipTail(X), R is 0.
              
reward(R,do(A,S)) :- A = flipHead(1), 
		( head(C,S), R is -10 ; not head(C,S), R is 100 ).

reward(R,do(A,S)) :- A = flipHead(3), 
  (head(1,S), not head(2,S), not head(3,S), not head(4,S), not head(5,S), 
              R is 300 ; R is -30 ).

reward(R,do(A,S)) :- A = flipHead(5),
  (head(1,S), head(3,S), not head(2,S), not head(4,S), not head(5,S), 
                         R is 500 ; R is -50 ).

reward(R,do(A,S)) :- A = flipHead(2), 
  (head(1,S), head(3,S), head(5,S), not head(2,S), not head(4,S), 
                                    R is 200 ; R is -20 ).

reward(R,do(A,S)) :- A = flipHead(4), 
	(head(1,S), head(3,S), head(5,S), head(2,S), not head(4,S), 
                                                    R is 400 ; R is -40 ). 


/*  The predicate  senseCondition(Outcome,Psi) describes what logical
    formula Psi should be evaluated to determine Outcome uniquely
*/

senseCondition(flipHead(X),head(X)).  
senseCondition(flipTail(X),(-head(X))).

/* Agent actions vs nature's actions: the former are those which can be 
  executed by agents, the latter (outcomes) can be executed only by nature
*/
 
agentAction(flip(C)).

restoreSitArg(head(Coin),S,head(Coin,S)).