Last active
February 21, 2020 06:10
-
-
Save miquelramirez/67d35b824c01e2919241c774b1dcf737 to your computer and use it in GitHub Desktop.
Wildlife Preserve, instance #1 of benchmark suite in the IPPC-18
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///////////////////////////////////////////////////////////////////////////////// | |
// // | |
// // | |
// RDDL MDP version without interm-fluents of the IPC 2018 Wildlife Preserve // | |
// domain for instance 01. // | |
// // | |
// // | |
// Created for the probabilistic tracks of IPC 2018 by // | |
// // | |
// Fei Fang (feifang [at] cmu.edu), // | |
// Thanh Hong Nguyen (thanhhng [at] umich.edu) and // | |
// Thomas Keller (tho.keller [at] unibas.ch) // | |
// // | |
// based on the papers // | |
// // | |
// [1] Fei Fang, Peter Stone and Milind Tambe: "When Security Games Go // | |
// Green: Designing Defender Strategies to Prevent Poaching and Illegal // | |
// Fishing". IJCAI 2015. // | |
// // | |
// [2] Thanh H. Nguyen, Rong Yang, Amos Azaria, Sarit Kraus and Milind // | |
// Tambe: "Analyzing the Effectiveness of Adversary Modeling in // | |
// Security Games". AAAI 2013. // | |
// // | |
// // | |
// The aim of the Wildlife Preserve domain is to protect a wildlife preserve // | |
// from poachers by sending available ranger to areas. Poachers attack parts // | |
// of the preserve depending on their preferences and an expectation where // | |
// rangers will likely show up. This expectation is computed by exploiting the // | |
// assumption typically taken in Stackelberg Security Games that the // | |
// defender’s (i.e., rangers) mixed strategy is fully observed by the // | |
// attacker, and memorized for a predefined number of steps. // | |
// // | |
// In each step, the planner obtains a reward for each area that has not been // | |
// attacked undefended, and a penalty for each area that has. The challenge is // | |
// to predict where poachers will attack with high probability and to lure the // | |
// poachers that observe each step of the rangers into attacking an area where // | |
// they are caught. A poacher that has been caught does not attack in the next // | |
// step. // | |
// // | |
// // | |
///////////////////////////////////////////////////////////////////////////////// | |
domain wildlife-preserve_01_mdp { | |
requirements { | |
reward-deterministic, | |
preconditions | |
}; | |
types { | |
ranger : object; | |
poacher : object; | |
area : { @a1, @a2, @a3, @a4 }; | |
number : { @1 }; | |
interm_level : { @level0, @level1 }; | |
}; | |
pvariables { | |
//////////////////// non-fluents //////////////////// | |
// reward for a successfully defended (or unattacked) area | |
DEFENDER-REWARD(area) : { non-fluent, real, default = 10.0 }; | |
// penalty for a successfully attacked area | |
DEFENDER-PENALTY(area) : { non-fluent, real, default = -10.0 }; | |
// ATTACK-WEIGHT_x(poacher, area) corresponds to the value of | |
// e^(w_1 * eta_i + w_2 * R^a_i + w_3 * P^a_i) as described in [1], where x is | |
// the number of times the area has been attacked in the last y steps, where y | |
// is the number of rounds the poacher remembers. As the RDDL Exponential | |
// keyword was not explicitly mentioned as part of IPC 2018, | |
// ATTACK-WEIGHT_x(poacher, area) is precomputed. | |
ATTACK-WEIGHT_0(poacher, area) : { non-fluent, real, default = 0.0 }; | |
ATTACK-WEIGHT_1(poacher, area) : { non-fluent, real, default = 0.0 }; | |
// true if the poacher remembers what the rangers did a number | |
// of steps ago equal to the given number | |
POACHER-REMEMBERS(poacher, number) : { non-fluent, bool, default = false }; | |
//////////////////// state-fluents //////////////////// | |
// the current level (for interm-fluents compilation) | |
current-level : { state-fluent, interm_level, default = @level0 }; | |
// state-fluents that track which action was executed | |
// (for interm-fluents compilation) | |
defend-executed(area, ranger) : { state-fluent, bool, default = false }; | |
// the poacher attacks this area | |
poacher-attacks(poacher) : { state-fluent, area, default = @a1 }; | |
// a ranger defended this area a number of steps ago equal to the given number | |
was-defended(area, number) : { state-fluent, bool, default = false }; | |
// if a poacher attacks an area that is defended by a ranger, it is | |
// caught in the next step | |
poacher-caught(poacher) : { state-fluent, bool, default = false }; | |
//////////////////// action-fluents //////////////////// | |
// the only available action is to have a ranger defend an area | |
defend(area, ranger) : { action-fluent, bool, default = false }; | |
proceed-interm-level : { action-fluent, bool, default = false }; | |
}; | |
cpfs { | |
// encodes the levels of interm-fluents | |
current-level' = | |
if (current-level == @level0) then @level1 | |
else @level0; | |
// remembers if defend(?a, ?r) was executed at @level0 | |
defend-executed'(?a, ?r) = | |
if (current-level == @level0) then defend(?a, ?r) | |
else defend-executed(?a, ?r); | |
// true if ?n is @1 and ?a was defended by a ranger, or if ?a | |
// was defended at step ?n-1 | |
was-defended'(?a, ?n) = | |
if (current-level == @level0) then (exists_{?r : ranger} [ defend(?a, ?r) ]) | |
else was-defended(?a, ?n); | |
// a poacher is caught this step if it attacked a defended area last step | |
poacher-caught'(?p) = | |
if (current-level == @level1) then | |
exists_{ ?r : ranger, ?a : area } [ (poacher-attacks(?p) == ?a) & defend-executed(?a, ?r) ] | |
else poacher-caught(?p); | |
// We use the SUQR model of [1] and [2] to determine which area is attacked | |
poacher-attacks'(?p) = | |
if (current-level == @level0) then | |
Discrete(area, | |
@a1 : (((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a1)) + | |
((( sum_{?n : number} [ was-defended(@a1, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a1))) / | |
sum_{?a :area} [ | |
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) + | |
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ], | |
@a2 : (((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a2)) + | |
((( sum_{?n : number} [ was-defended(@a2, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a2))) / | |
sum_{?a :area} [ | |
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) + | |
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ], | |
@a3 : (((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a3)) + | |
((( sum_{?n : number} [ was-defended(@a3, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a3))) / | |
sum_{?a :area} [ | |
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) + | |
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ], | |
@a4 : (((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, @a4)) + | |
((( sum_{?n : number} [ was-defended(@a4, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, @a4))) / | |
sum_{?a :area} [ | |
(((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 0) * ATTACK-WEIGHT_0(?p, ?a)) + | |
((( sum_{?n : number} [ was-defended(?a, ?n) & POACHER-REMEMBERS(?p, ?n) ] ) == 1) * ATTACK-WEIGHT_1(?p, ?a))) ]) | |
else poacher-attacks(?p); | |
}; | |
// each area that is successfully attacked yields a penalty, | |
// and all other areas yield a reward | |
reward = if (current-level == @level1) then | |
( sum_{?a : area} [ if (~(exists_{?p : poacher} [ (poacher-attacks(?p) == ?a) & ~poacher-caught(?p) ]) | | |
( exists_{?r : ranger} [ defend-executed(?a, ?r) ] ) ) | |
then DEFENDER-REWARD(?a) else DEFENDER-PENALTY(?a) ] ) | |
else 0; | |
action-preconditions { | |
// each ranger defends one area per step | |
(current-level == @level0) => ( forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) == 1 ] ); | |
// enforce proceed-interm-level at all levels but @level0 | |
(current-level == @level1) => proceed-interm-level; | |
// forbid proceed-interm-level at all other levels but @level0 | |
proceed-interm-level => (current-level == @level1); | |
// allow original actions only at @level0 | |
( forall_{ ?r : ranger, ?a : area } [ defend(?a, ?r) => (current-level == @level0) ] ); | |
// simplifies grounding of action states (follows logically from other preconditions) | |
forall_{?r : ranger} [ ( sum_{?a : area} [ defend(?a, ?r) ] ) <= 1 ]; | |
}; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///////////////////////////////////////////////////////////////////////////////// | |
// // | |
// // | |
// RDDL MDP version of Wildlife Preserve instance #01 for IPC 2018 by Fei Fang // | |
// (feifang [at] cmu.edu), Thanh Hong Nguyen (thanhhng [at] umich.edu) and // | |
// Thomas Keller (tho.keller [at] unibas.ch), based on the papers "When // | |
// Security Games Go Green: Designing Defender Strategies to Prevent Poaching // | |
// and Illegal Fishing" by Fei Fang, Peter Stone and Milind Tambe (IJCAI 2015) // | |
// and "Analyzing the Effectiveness of Adversary Modeling in Security Games" // | |
// by Thanh H. Nguyen, Rong Yang, Amos Azaria, Sarit Kraus and Milind Tambe // | |
// (AAAI 2013). // | |
// // | |
// // | |
///////////////////////////////////////////////////////////////////////////////// | |
instance wildlife-preserve_inst_mdp__01 { | |
domain = wildlife-preserve_01_mdp; | |
objects { | |
ranger : { r1 }; | |
poacher : { p1 }; | |
}; | |
non-fluents { | |
DEFENDER-REWARD(@a1) = 12.50; | |
DEFENDER-PENALTY(@a1) = -14.65; | |
DEFENDER-REWARD(@a2) = 6.91; | |
DEFENDER-PENALTY(@a2) = -5.32; | |
DEFENDER-REWARD(@a3) = 9.30; | |
DEFENDER-PENALTY(@a3) = -2.20; | |
DEFENDER-REWARD(@a4) = 14.46; | |
DEFENDER-PENALTY(@a4) = -5.42; | |
// correlation between attacker reward and defender penalty as well as | |
// attacker penalty and defender reward is 1.00 for all poachers and all areas | |
// weights for poacher p1 are: w1 = -25.58, w2 = 0.78, w3 = 0.32 | |
// reward for poacher p1 in area @a1 is: 14.65 | |
// penalty for poacher p1 in area @a1 is: -12.50 | |
// reward for poacher p1 in area @a2 is: 5.32 | |
// penalty for poacher p1 in area @a2 is: -6.91 | |
// reward for poacher p1 in area @a3 is: 2.20 | |
// penalty for poacher p1 in area @a3 is: -9.30 | |
// reward for poacher p1 in area @a4 is: 5.42 | |
// penalty for poacher p1 in area @a4 is: -14.46 | |
ATTACK-WEIGHT_0(p1, @a1) = 1719.61047; | |
ATTACK-WEIGHT_1(p1, @a1) = 0.00000; | |
ATTACK-WEIGHT_0(p1, @a2) = 7.06053; | |
ATTACK-WEIGHT_1(p1, @a2) = 0.00000; | |
ATTACK-WEIGHT_0(p1, @a3) = 0.29169; | |
ATTACK-WEIGHT_1(p1, @a3) = 0.00000; | |
ATTACK-WEIGHT_0(p1, @a4) = 0.69859; | |
ATTACK-WEIGHT_1(p1, @a4) = 0.00000; | |
POACHER-REMEMBERS(p1, @1); | |
}; | |
init-state { | |
~was-defended(@a1,@1); | |
}; | |
horizon = 60; | |
discount = 1.0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment