miquelramirez/wildlife-preserve_01_mdp.rddl

## wildlife-preserve_01_mdp.rddl
/////////////////////////////////////////////////////////////////////////////////
//                                                                             //
//                                                                             //
// RDDL MDP version without interm-fluents of the IPC 2018 Wildlife Preserve   //
// domain for instance 01.                                                     //
//                                                                             //
//                                                                             //
// Created for the probabilistic tracks of IPC 2018 by                         //
//                                                                             //
//        Fei Fang (feifang [at] cmu.edu),                                     //
//        Thanh Hong Nguyen (thanhhng [at] umich.edu) and                      //
//        Thomas Keller (tho.keller [at] unibas.ch)                            //
//                                                                             //
// based on the papers                                                         //
//                                                                             //
//    [1] Fei Fang, Peter Stone and Milind Tambe: "When Security Games Go      //
//        Green: Designing Defender Strategies to Prevent Poaching and Illegal //
//        Fishing". IJCAI 2015.                                                //
//                                                                             //
//    [2] Thanh H. Nguyen, Rong Yang, Amos Azaria, Sarit Kraus and Milind      //
//        Tambe: "Analyzing the Effectiveness of Adversary Modeling in         //
//        Security Games". AAAI 2013.                                          //
//                                                                             //
//                                                                             //
// The aim of the Wildlife Preserve domain is to protect a wildlife preserve   //
// from poachers by sending available ranger to areas. Poachers attack parts   //
// of the preserve depending on their preferences and an expectation where     //
// rangers will likely show up. This expectation is computed by exploiting the //
// assumption typically taken in Stackelberg Security Games that the           //
// defender’s (i.e., rangers) mixed strategy is fully observed by the          //
// attacker, and memorized for a predefined number of steps.                   //
//                                                                             //
// In each step, the planner obtains a reward for each area that has not been  //
// attacked undefended, and a penalty for each area that has. The challenge is //
// to predict where poachers will attack with high probability and to lure the //
// poachers that observe each step of the rangers into attacking an area where //
// they are caught. A poacher that has been caught does not attack in the next //
// step.                                                                       //
//                                                                             //
//                                                                             //
/////////////////////////////////////////////////////////////////////////////////

domain wildlife-preserve_01_mdp {
    requirements {
        reward-deterministic,
        preconditions
    };


    types {
        ranger       : object;
        poacher      : object;
        area         : { @a1, @a2, @a3, @a4 };
        number       : { @1 };
        interm_level : { @level0, @level1 };
    };


    pvariables {
        //////////////////// non-fluents ////////////////////

        // reward for a successfully defended (or unattacked) area
        DEFENDER-REWARD(area)              : { non-fluent, real, default = 10.0 };

        // penalty for a successfully attacked area
        DEFENDER-PENALTY(area)             : { non-fluent, real, default = -10.0 };

        // ATTACK-WEIGHT_x(poacher, area) corresponds to the value of
        // e^(w_1 * eta_i + w_2 * R^a_i + w_3 * P^a_i) as described in [1], where x is
        // the number of times the area has been attacked in the last y steps, where y
        // is the number of rounds the poacher remembers. As the RDDL Exponential
        // keyword was not explicitly mentioned as part of IPC 2018,
        // ATTACK-WEIGHT_x(poacher, area) is precomputed.
        ATTACK-WEIGHT_0(poacher, area)     : { non-fluent, real, default = 0.0 };
        ATTACK-WEIGHT_1(poacher, area)     : { non-fluent, real, default = 0.0 };

        // true if the poacher remembers what the rangers did a number
        // of steps ago equal to the given number
        POACHER-REMEMBERS(poacher, number) : { non-fluent, bool, default = false };


        //////////////////// state-fluents ////////////////////

        // the current level (for interm-fluents compilation)
        current-level                      : { state-fluent, interm_level, default = @level0 };

        // state-fluents that track which action was executed
        // (for interm-fluents compilation)
        defend-executed(area, ranger)      : { state-fluent, bool, default = false };

        // the poacher attacks this area
        poacher-attacks(poacher)           : { state-fluent, area, default = @a1 };

        // a ranger defended this area a number of steps ago equal to the given number
        was-defended(area, number)         : { state-fluent, bool, default = false };

        // if a poacher attacks an area that is defended by a ranger, it is
        // caught in the next step
        poacher-caught(poacher)            : { state-fluent, bool, default = false };


        //////////////////// action-fluents ////////////////////

        // the only available action is to have a ranger defend an area
        defend(area, ranger)               : { action-fluent, bool, default = false };
        proceed-interm-level               : { action-fluent, bool, default = false };
    };


    cpfs {
        // encodes the levels of interm-fluents
        current-level' =
            if (current-level == @level0) then @level1
            else @level0;

        // remembers if defend(?a, ?r) was executed at @level0
        defend-executed'(?a, ?r) =
            if (current-level == @level0) then defend(?a, ?r)
            else defend-executed(?a, ?r);

        // true if ?n is @1 and ?a was defended by a ranger, or if ?a
        // was defended at step ?n-1
        was-defended'(?a, ?n) =
            if (current-level == @level0) then (exists_{?r : ranger} [ defend(?a, ?r) ])
            else was-defended(?a, ?n);

        // a poacher is caught this step if it attacked a defended area last step
        poacher-caught'(?p) =
            if (current-level == @level1) then
                exists_{ ?r : ranger, ?a : area } [ (poacher-attacks(?p) == ?a) & defend-executed(?a, ?r) ]
            else poacher-caught(?p);

        // We use the SUQR model of [1] and [2] to determine which area is attacked
        poacher-attacks'(?p) =
            if (current-level == @level0) then
            Discrete(area,
            @a1 :       (((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, @a1)) +
                         ((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, @a1))) /
                         sum_{?a :area} [
                             (((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, ?a)) +
                              ((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, ?a))) ],
            @a2 :       (((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, @a2)) +
                         ((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, @a2))) /
                         sum_{?a :area} [
                             (((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, ?a)) +
                              ((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, ?a))) ],
            @a3 :       (((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, @a3)) +
                         ((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, @a3))) /
                         sum_{?a :area} [
                             (((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, ?a)) +
                              ((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, ?a))) ],
            @a4 :       (((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, @a4)) +
                         ((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, @a4))) /
                         sum_{?a :area} [
                             (((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0)  * ATTACK-WEIGHT_0(?p, ?a)) +
                              ((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1)  * ATTACK-WEIGHT_1(?p, ?a))) ])
        else poacher-attacks(?p);
    };


    // each area that is successfully attacked yields a penalty,
    // and all other areas yield a reward
    reward = if (current-level == @level1) then
                 ( sum_{?a : area} [ if (~(exists_{?p : poacher} [ (poacher-attacks(?p) == ?a) & ~poacher-caught(?p) ]) |
                                          ( exists_{?r : ranger} [ defend-executed(?a, ?r) ] ) )
                                     then DEFENDER-REWARD(?a) else DEFENDER-PENALTY(?a) ] )
             else 0;

    action-preconditions {
        // each ranger defends one area per step
        (current-level == @level0) => ( forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) == 1 ] );

        // enforce proceed-interm-level at all levels but @level0
        (current-level == @level1) => proceed-interm-level;

        // forbid proceed-interm-level at all other levels but @level0
        proceed-interm-level => (current-level == @level1);

        // allow original actions only at @level0
        ( forall_{ ?r : ranger, ?a : area } [ defend(?a, ?r) => (current-level == @level0) ] );

        // simplifies grounding of action states (follows logically from other preconditions)
        forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) <= 1 ];
    };
}

## wildlife-preserve_inst_mdp__01.rddl
/////////////////////////////////////////////////////////////////////////////////
//                                                                             //
//                                                                             //
// RDDL MDP version of Wildlife Preserve instance #01 for IPC 2018 by Fei Fang //
// (feifang [at] cmu.edu), Thanh Hong Nguyen (thanhhng [at] umich.edu) and     //
// Thomas Keller (tho.keller [at] unibas.ch), based on the papers "When        //
// Security Games Go Green: Designing Defender Strategies to Prevent Poaching  //
// and Illegal Fishing" by Fei Fang, Peter Stone and Milind Tambe (IJCAI 2015) //
// and "Analyzing the Effectiveness of Adversary Modeling in Security Games"   //
// by Thanh H. Nguyen, Rong Yang, Amos Azaria, Sarit Kraus and Milind Tambe    //
// (AAAI 2013).                                                                //
//                                                                             //
//                                                                             //
/////////////////////////////////////////////////////////////////////////////////

instance wildlife-preserve_inst_mdp__01 {
    domain = wildlife-preserve_01_mdp;

    objects {
        ranger  : { r1 };
        poacher : { p1 };
    };

    non-fluents {
        DEFENDER-REWARD(@a1) = 12.50;
        DEFENDER-PENALTY(@a1) = -14.65;
        DEFENDER-REWARD(@a2) = 6.91;
        DEFENDER-PENALTY(@a2) = -5.32;
        DEFENDER-REWARD(@a3) = 9.30;
        DEFENDER-PENALTY(@a3) = -2.20;
        DEFENDER-REWARD(@a4) = 14.46;
        DEFENDER-PENALTY(@a4) = -5.42;

        // correlation between attacker reward and defender penalty as well as
        // attacker penalty and defender reward is 1.00 for all poachers and all areas

        // weights for poacher p1 are: w1 = -25.58, w2 = 0.78, w3 = 0.32
        // reward for poacher p1 in area @a1 is: 14.65
        // penalty for poacher p1 in area @a1 is: -12.50
        // reward for poacher p1 in area @a2 is: 5.32
        // penalty for poacher p1 in area @a2 is: -6.91
        // reward for poacher p1 in area @a3 is: 2.20
        // penalty for poacher p1 in area @a3 is: -9.30
        // reward for poacher p1 in area @a4 is: 5.42
        // penalty for poacher p1 in area @a4 is: -14.46

        ATTACK-WEIGHT_0(p1, @a1) = 1719.61047;
        ATTACK-WEIGHT_1(p1, @a1) = 0.00000;
        ATTACK-WEIGHT_0(p1, @a2) = 7.06053;
        ATTACK-WEIGHT_1(p1, @a2) = 0.00000;
        ATTACK-WEIGHT_0(p1, @a3) = 0.29169;
        ATTACK-WEIGHT_1(p1, @a3) = 0.00000;
        ATTACK-WEIGHT_0(p1, @a4) = 0.69859;
        ATTACK-WEIGHT_1(p1, @a4) = 0.00000;

        POACHER-REMEMBERS(p1, @1);

    };

    init-state {
        ~was-defended(@a1,@1);
    };

    horizon = 60;

    discount = 1.0;
}
	/////////////////////////////////////////////////////////////////////////////////
	// //
	// //
	// RDDL MDP version without interm-fluents of the IPC 2018 Wildlife Preserve //
	// domain for instance 01. //
	// //
	// //
	// Created for the probabilistic tracks of IPC 2018 by //
	// //
	// Fei Fang (feifang [at] cmu.edu), //
	// Thanh Hong Nguyen (thanhhng [at] umich.edu) and //
	// Thomas Keller (tho.keller [at] unibas.ch) //
	// //
	// based on the papers //
	// //
	// [1] Fei Fang, Peter Stone and Milind Tambe: "When Security Games Go //
	// Green: Designing Defender Strategies to Prevent Poaching and Illegal //
	// Fishing". IJCAI 2015. //
	// //
	// [2] Thanh H. Nguyen, Rong Yang, Amos Azaria, Sarit Kraus and Milind //
	// Tambe: "Analyzing the Effectiveness of Adversary Modeling in //
	// Security Games". AAAI 2013. //
	// //
	// //
	// The aim of the Wildlife Preserve domain is to protect a wildlife preserve //
	// from poachers by sending available ranger to areas. Poachers attack parts //
	// of the preserve depending on their preferences and an expectation where //
	// rangers will likely show up. This expectation is computed by exploiting the //
	// assumption typically taken in Stackelberg Security Games that the //
	// defender’s (i.e., rangers) mixed strategy is fully observed by the //
	// attacker, and memorized for a predefined number of steps. //
	// //
	// In each step, the planner obtains a reward for each area that has not been //
	// attacked undefended, and a penalty for each area that has. The challenge is //
	// to predict where poachers will attack with high probability and to lure the //
	// poachers that observe each step of the rangers into attacking an area where //
	// they are caught. A poacher that has been caught does not attack in the next //
	// step. //
	// //
	// //
	/////////////////////////////////////////////////////////////////////////////////

	domain wildlife-preserve_01_mdp {
	requirements {
	reward-deterministic,
	preconditions
	};


	types {
	ranger : object;
	poacher : object;
	area : { @a1, @a2, @a3, @a4 };
	number : { @1 };
	interm_level : { @level0, @level1 };
	};


	pvariables {
	//////////////////// non-fluents ////////////////////

	// reward for a successfully defended (or unattacked) area
	DEFENDER-REWARD(area) : { non-fluent, real, default = 10.0 };

	// penalty for a successfully attacked area
	DEFENDER-PENALTY(area) : { non-fluent, real, default = -10.0 };

	// ATTACK-WEIGHT_x(poacher, area) corresponds to the value of
	// e^(w_1 * eta_i + w_2 * R^a_i + w_3 * P^a_i) as described in [1], where x is
	// the number of times the area has been attacked in the last y steps, where y
	// is the number of rounds the poacher remembers. As the RDDL Exponential
	// keyword was not explicitly mentioned as part of IPC 2018,
	// ATTACK-WEIGHT_x(poacher, area) is precomputed.
	ATTACK-WEIGHT_0(poacher, area) : { non-fluent, real, default = 0.0 };
	ATTACK-WEIGHT_1(poacher, area) : { non-fluent, real, default = 0.0 };

	// true if the poacher remembers what the rangers did a number
	// of steps ago equal to the given number
	POACHER-REMEMBERS(poacher, number) : { non-fluent, bool, default = false };


	//////////////////// state-fluents ////////////////////

	// the current level (for interm-fluents compilation)
	current-level : { state-fluent, interm_level, default = @level0 };

	// state-fluents that track which action was executed
	// (for interm-fluents compilation)
	defend-executed(area, ranger) : { state-fluent, bool, default = false };

	// the poacher attacks this area
	poacher-attacks(poacher) : { state-fluent, area, default = @a1 };

	// a ranger defended this area a number of steps ago equal to the given number
	was-defended(area, number) : { state-fluent, bool, default = false };

	// if a poacher attacks an area that is defended by a ranger, it is
	// caught in the next step
	poacher-caught(poacher) : { state-fluent, bool, default = false };


	//////////////////// action-fluents ////////////////////

	// the only available action is to have a ranger defend an area
	defend(area, ranger) : { action-fluent, bool, default = false };
	proceed-interm-level : { action-fluent, bool, default = false };
	};


	cpfs {
	// encodes the levels of interm-fluents
	current-level' =
	if (current-level == @level0) then @level1
	else @level0;

	// remembers if defend(?a, ?r) was executed at @level0
	defend-executed'(?a, ?r) =
	if (current-level == @level0) then defend(?a, ?r)
	else defend-executed(?a, ?r);

	// true if ?n is @1 and ?a was defended by a ranger, or if ?a
	// was defended at step ?n-1
	was-defended'(?a, ?n) =
	if (current-level == @level0) then (exists_{?r : ranger} [ defend(?a, ?r) ])
	else was-defended(?a, ?n);

	// a poacher is caught this step if it attacked a defended area last step
	poacher-caught'(?p) =
	if (current-level == @level1) then
	exists_{ ?r : ranger, ?a : area } [ (poacher-attacks(?p) == ?a) & defend-executed(?a, ?r) ]
	else poacher-caught(?p);

	// We use the SUQR model of [1] and [2] to determine which area is attacked
	poacher-attacks'(?p) =
	if (current-level == @level0) then
	Discrete(area,
	@a1 : (((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a1)) +
	((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a1))) /
	sum_{?a :area} [
	(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
	((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ],
	@a2 : (((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a2)) +
	((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a2))) /
	sum_{?a :area} [
	(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
	((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ],
	@a3 : (((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a3)) +
	((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a3))) /
	sum_{?a :area} [
	(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
	((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ],
	@a4 : (((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a4)) +
	((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a4))) /
	sum_{?a :area} [
	(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
	((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ])
	else poacher-attacks(?p);
	};


	// each area that is successfully attacked yields a penalty,
	// and all other areas yield a reward
	reward = if (current-level == @level1) then
	( sum_{?a : area} [ if (~(exists_{?p : poacher} [ (poacher-attacks(?p) == ?a) & ~poacher-caught(?p) ]) \|
	( exists_{?r : ranger} [ defend-executed(?a, ?r) ] ) )
	then DEFENDER-REWARD(?a) else DEFENDER-PENALTY(?a) ] )
	else 0;

	action-preconditions {
	// each ranger defends one area per step
	(current-level == @level0) => ( forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) == 1 ] );

	// enforce proceed-interm-level at all levels but @level0
	(current-level == @level1) => proceed-interm-level;

	// forbid proceed-interm-level at all other levels but @level0
	proceed-interm-level => (current-level == @level1);

	// allow original actions only at @level0
	( forall_{ ?r : ranger, ?a : area } [ defend(?a, ?r) => (current-level == @level0) ] );

	// simplifies grounding of action states (follows logically from other preconditions)
	forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) <= 1 ];
	};
	}