Skip to content

Instantly share code, notes, and snippets.

@miquelramirez
Last active February 21, 2020 06:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miquelramirez/67d35b824c01e2919241c774b1dcf737 to your computer and use it in GitHub Desktop.
Save miquelramirez/67d35b824c01e2919241c774b1dcf737 to your computer and use it in GitHub Desktop.
Wildlife Preserve, instance #1 of benchmark suite in the IPPC-18
/////////////////////////////////////////////////////////////////////////////////
// //
// //
// RDDL MDP version without interm-fluents of the IPC 2018 Wildlife Preserve //
// domain for instance 01. //
// //
// //
// Created for the probabilistic tracks of IPC 2018 by //
// //
// Fei Fang (feifang [at] cmu.edu), //
// Thanh Hong Nguyen (thanhhng [at] umich.edu) and //
// Thomas Keller (tho.keller [at] unibas.ch) //
// //
// based on the papers //
// //
// [1] Fei Fang, Peter Stone and Milind Tambe: "When Security Games Go //
// Green: Designing Defender Strategies to Prevent Poaching and Illegal //
// Fishing". IJCAI 2015. //
// //
// [2] Thanh H. Nguyen, Rong Yang, Amos Azaria, Sarit Kraus and Milind //
// Tambe: "Analyzing the Effectiveness of Adversary Modeling in //
// Security Games". AAAI 2013. //
// //
// //
// The aim of the Wildlife Preserve domain is to protect a wildlife preserve //
// from poachers by sending available ranger to areas. Poachers attack parts //
// of the preserve depending on their preferences and an expectation where //
// rangers will likely show up. This expectation is computed by exploiting the //
// assumption typically taken in Stackelberg Security Games that the //
// defender’s (i.e., rangers) mixed strategy is fully observed by the //
// attacker, and memorized for a predefined number of steps. //
// //
// In each step, the planner obtains a reward for each area that has not been //
// attacked undefended, and a penalty for each area that has. The challenge is //
// to predict where poachers will attack with high probability and to lure the //
// poachers that observe each step of the rangers into attacking an area where //
// they are caught. A poacher that has been caught does not attack in the next //
// step. //
// //
// //
/////////////////////////////////////////////////////////////////////////////////
domain wildlife-preserve_01_mdp {
requirements {
reward-deterministic,
preconditions
};
types {
ranger : object;
poacher : object;
area : { @a1, @a2, @a3, @a4 };
number : { @1 };
interm_level : { @level0, @level1 };
};
pvariables {
//////////////////// non-fluents ////////////////////
// reward for a successfully defended (or unattacked) area
DEFENDER-REWARD(area) : { non-fluent, real, default = 10.0 };
// penalty for a successfully attacked area
DEFENDER-PENALTY(area) : { non-fluent, real, default = -10.0 };
// ATTACK-WEIGHT_x(poacher, area) corresponds to the value of
// e^(w_1 * eta_i + w_2 * R^a_i + w_3 * P^a_i) as described in [1], where x is
// the number of times the area has been attacked in the last y steps, where y
// is the number of rounds the poacher remembers. As the RDDL Exponential
// keyword was not explicitly mentioned as part of IPC 2018,
// ATTACK-WEIGHT_x(poacher, area) is precomputed.
ATTACK-WEIGHT_0(poacher, area) : { non-fluent, real, default = 0.0 };
ATTACK-WEIGHT_1(poacher, area) : { non-fluent, real, default = 0.0 };
// true if the poacher remembers what the rangers did a number
// of steps ago equal to the given number
POACHER-REMEMBERS(poacher, number) : { non-fluent, bool, default = false };
//////////////////// state-fluents ////////////////////
// the current level (for interm-fluents compilation)
current-level : { state-fluent, interm_level, default = @level0 };
// state-fluents that track which action was executed
// (for interm-fluents compilation)
defend-executed(area, ranger) : { state-fluent, bool, default = false };
// the poacher attacks this area
poacher-attacks(poacher) : { state-fluent, area, default = @a1 };
// a ranger defended this area a number of steps ago equal to the given number
was-defended(area, number) : { state-fluent, bool, default = false };
// if a poacher attacks an area that is defended by a ranger, it is
// caught in the next step
poacher-caught(poacher) : { state-fluent, bool, default = false };
//////////////////// action-fluents ////////////////////
// the only available action is to have a ranger defend an area
defend(area, ranger) : { action-fluent, bool, default = false };
proceed-interm-level : { action-fluent, bool, default = false };
};
cpfs {
// encodes the levels of interm-fluents
current-level' =
if (current-level == @level0) then @level1
else @level0;
// remembers if defend(?a, ?r) was executed at @level0
defend-executed'(?a, ?r) =
if (current-level == @level0) then defend(?a, ?r)
else defend-executed(?a, ?r);
// true if ?n is @1 and ?a was defended by a ranger, or if ?a
// was defended at step ?n-1
was-defended'(?a, ?n) =
if (current-level == @level0) then (exists_{?r : ranger} [ defend(?a, ?r) ])
else was-defended(?a, ?n);
// a poacher is caught this step if it attacked a defended area last step
poacher-caught'(?p) =
if (current-level == @level1) then
exists_{ ?r : ranger, ?a : area } [ (poacher-attacks(?p) == ?a) & defend-executed(?a, ?r) ]
else poacher-caught(?p);
// We use the SUQR model of [1] and [2] to determine which area is attacked
poacher-attacks'(?p) =
if (current-level == @level0) then
Discrete(area,
@a1 : (((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a1)) +
((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a1))) /
sum_{?a :area} [
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ],
@a2 : (((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a2)) +
((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a2))) /
sum_{?a :area} [
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ],
@a3 : (((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a3)) +
((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a3))) /
sum_{?a :area} [
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ],
@a4 : (((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a4)) +
((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a4))) /
sum_{?a :area} [
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) +
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ])
else poacher-attacks(?p);
};
// each area that is successfully attacked yields a penalty,
// and all other areas yield a reward
reward = if (current-level == @level1) then
( sum_{?a : area} [ if (~(exists_{?p : poacher} [ (poacher-attacks(?p) == ?a) & ~poacher-caught(?p) ]) |
( exists_{?r : ranger} [ defend-executed(?a, ?r) ] ) )
then DEFENDER-REWARD(?a) else DEFENDER-PENALTY(?a) ] )
else 0;
action-preconditions {
// each ranger defends one area per step
(current-level == @level0) => ( forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) == 1 ] );
// enforce proceed-interm-level at all levels but @level0
(current-level == @level1) => proceed-interm-level;
// forbid proceed-interm-level at all other levels but @level0
proceed-interm-level => (current-level == @level1);
// allow original actions only at @level0
( forall_{ ?r : ranger, ?a : area } [ defend(?a, ?r) => (current-level == @level0) ] );
// simplifies grounding of action states (follows logically from other preconditions)
forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) <= 1 ];
};
}
/////////////////////////////////////////////////////////////////////////////////
// //
// //
// RDDL MDP version of Wildlife Preserve instance #01 for IPC 2018 by Fei Fang //
// (feifang [at] cmu.edu), Thanh Hong Nguyen (thanhhng [at] umich.edu) and //
// Thomas Keller (tho.keller [at] unibas.ch), based on the papers "When //
// Security Games Go Green: Designing Defender Strategies to Prevent Poaching //
// and Illegal Fishing" by Fei Fang, Peter Stone and Milind Tambe (IJCAI 2015) //
// and "Analyzing the Effectiveness of Adversary Modeling in Security Games" //
// by Thanh H. Nguyen, Rong Yang, Amos Azaria, Sarit Kraus and Milind Tambe //
// (AAAI 2013). //
// //
// //
/////////////////////////////////////////////////////////////////////////////////
instance wildlife-preserve_inst_mdp__01 {
domain = wildlife-preserve_01_mdp;
objects {
ranger : { r1 };
poacher : { p1 };
};
non-fluents {
DEFENDER-REWARD(@a1) = 12.50;
DEFENDER-PENALTY(@a1) = -14.65;
DEFENDER-REWARD(@a2) = 6.91;
DEFENDER-PENALTY(@a2) = -5.32;
DEFENDER-REWARD(@a3) = 9.30;
DEFENDER-PENALTY(@a3) = -2.20;
DEFENDER-REWARD(@a4) = 14.46;
DEFENDER-PENALTY(@a4) = -5.42;
// correlation between attacker reward and defender penalty as well as
// attacker penalty and defender reward is 1.00 for all poachers and all areas
// weights for poacher p1 are: w1 = -25.58, w2 = 0.78, w3 = 0.32
// reward for poacher p1 in area @a1 is: 14.65
// penalty for poacher p1 in area @a1 is: -12.50
// reward for poacher p1 in area @a2 is: 5.32
// penalty for poacher p1 in area @a2 is: -6.91
// reward for poacher p1 in area @a3 is: 2.20
// penalty for poacher p1 in area @a3 is: -9.30
// reward for poacher p1 in area @a4 is: 5.42
// penalty for poacher p1 in area @a4 is: -14.46
ATTACK-WEIGHT_0(p1, @a1) = 1719.61047;
ATTACK-WEIGHT_1(p1, @a1) = 0.00000;
ATTACK-WEIGHT_0(p1, @a2) = 7.06053;
ATTACK-WEIGHT_1(p1, @a2) = 0.00000;
ATTACK-WEIGHT_0(p1, @a3) = 0.29169;
ATTACK-WEIGHT_1(p1, @a3) = 0.00000;
ATTACK-WEIGHT_0(p1, @a4) = 0.69859;
ATTACK-WEIGHT_1(p1, @a4) = 0.00000;
POACHER-REMEMBERS(p1, @1);
};
init-state {
~was-defended(@a1,@1);
};
horizon = 60;
discount = 1.0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment