use montecarlo trees to evaluate the best mooves for all snakes

This commit is contained in:
Max Känner 2024-10-03 01:43:14 +02:00
parent 30c20b3f54
commit 8fa8282177
5 changed files with 341 additions and 175 deletions

45
Cargo.lock generated
View File

@ -150,7 +150,9 @@ version = "1.0.0"
dependencies = [
"enum-iterator",
"env_logger",
"iter_tools",
"log",
"ordered-float",
"rand",
"rocket",
"serde",
@ -202,6 +204,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clone_dyn_types"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f4f0e34968641cc21b39c159b7d07e8c0f573cbc0ef9cd59e452fe6774c0579"
[[package]]
name = "colorchoice"
version = "1.0.2"
@ -633,6 +641,25 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "iter_tools"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27812bb0a056539d62930a899759af39dfab17ac73a17d5caf58365762657891"
dependencies = [
"clone_dyn_types",
"itertools",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.11"
@ -765,6 +792,15 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
@ -790,6 +826,15 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "ordered-float"
version = "4.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537"
dependencies = [
"num-traits",
]
[[package]]
name = "overload"
version = "0.1.1"

View File

@ -23,3 +23,5 @@ log = "0.4.0"
env_logger = "0.11.5"
rand = "0.8.4"
enum-iterator = "2.1"
iter_tools = "0.21"
ordered-float = "4.3.0"

View File

@ -10,12 +10,21 @@
// To get you started we've included code to prevent your Battlesnake from moving backwards.
// For more info see docs.battlesnake.com
use std::{cmp::Ordering, time::Instant};
use core::f64;
use std::{
cmp::Ordering,
collections::{BTreeMap, BTreeSet},
time::Instant,
};
use log::info;
use ordered_float::OrderedFloat;
use serde_json::{json, Value};
use crate::{simulation, Action, Battlesnake, Board, Direction, Game, MAX_HEALTH};
use crate::{
simulation::{self, SnakeToken},
Action, Battlesnake, Board, Direction, Game, MAX_HEALTH,
};
impl Battlesnake {
fn possible_actions_without_heads<'a>(
@ -110,68 +119,32 @@ pub fn end(_game: &Game, _turn: i32, _board: &Board, _you: &Battlesnake) {
// Valid moves are "up", "down", "left", or "right"
// See https://docs.battlesnake.com/api/example-move for available data
pub fn get_move(game: &Game, turn: i32, board: &Board, you: &Battlesnake) -> Option<Action> {
let id_map = board
.snakes
.iter()
.enumerate()
.map(|(i, snake)| (snake.id.clone(), u8::try_from(i).unwrap()))
.collect();
let board = simulation::Board::from_game_board(board, &id_map, turn);
let my_id = id_map[&you.id];
let my_index = board.snake_index(my_id)?;
let possible_actions = board.possible_actions();
let my_actions = &possible_actions[my_index];
let actions = my_actions
.iter()
.map(|direction| {
let mut actions = vec![None; possible_actions.len()];
actions[my_index] = Some(*direction);
let mut wins = 0;
let mut total_turns = 0;
let start = Instant::now();
for _ in 0..100 {
let mut board = board.clone();
board.simulate_with_initial_until(&actions[..], |board| {
!board.is_alive(my_id)
|| (game.ruleset.name != "solo" && board.alive_snakes() <= 1)
});
if board.is_alive(my_id) {
// we survived
wins += 2;
} else if board.alive_snakes() == 0 {
// no snake is alive. This is a draw
wins += 1;
} else {
// we lost
wins += 0;
}
total_turns += board.turn();
}
let end = Instant::now();
info!(
"Simulation for {direction:?} took {}s",
(end - start).as_secs_f32()
let token_map = SnakeToken::from_board(board);
let board = simulation::Board::from_game_board(
board,
&token_map,
turn,
game.ruleset.settings.food_spawn_chance,
game.ruleset.settings.minimum_food,
);
(direction, wins, total_turns)
})
.collect::<Vec<_>>();
let my_token = token_map[&you.id];
let mut tree = Node::default();
for _ in 0..300 {
let mut board = board.clone();
tree.monte_carlo_step(&mut board);
}
let actions = tree.child_statistics.entry(my_token).or_default();
info!("actions: {actions:?}");
let (&chosen, _, _) =
actions
.into_iter()
.max_by(
|(_, score1, turns1), (_, score2, turns2)| match score1.cmp(score2) {
Ordering::Equal => turns1.cmp(turns2),
order => order,
},
)?;
let chosen = actions
.iter()
.max_by_key(|(_, stat)| OrderedFloat(stat.won as f64 / stat.played as f64))
.map(|(direction, _)| *direction)?;
info!("DIRECTION {}: {:?}", turn, chosen);
Some(Action {
@ -179,3 +152,97 @@ pub fn get_move(game: &Game, turn: i32, board: &Board, you: &Battlesnake) -> Opt
shout: None,
})
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
struct Statistics {
/// Number of times this node was simulated
played: usize,
/// Number of times this node was simulated and the agent has won.
won: BTreeMap<SnakeToken, usize>,
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
struct ActionStatistic {
played: usize,
won: usize,
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
struct Node {
statistic: Statistics,
child_statistics: BTreeMap<SnakeToken, BTreeMap<Direction, ActionStatistic>>,
childs: BTreeMap<BTreeMap<SnakeToken, Direction>, Node>,
}
impl Node {
/// Performs one monte carlo simulation step
///
/// Returns the snake that has won the simulation
fn monte_carlo_step(&mut self, board: &mut simulation::Board) -> Option<SnakeToken> {
let winner = if self.statistic.played == 0 {
// We didn't simulate a game for this node yet. Do that
board.simulate_until(|board| board.alive_snakes() <= 1);
board.snakes().next()
} else {
// select a node to simulate
let possible_actions = board.possible_actions();
let actions = possible_actions
.iter()
.map(|(token, actions)| {
let statistics = self.child_statistics.entry(*token).or_default();
let selected = actions
.iter()
.copied()
.max_by_key(|direction| {
let statistics = statistics.entry(*direction).or_default();
if statistics.played == 0 {
return OrderedFloat(f64::INFINITY);
}
let exploitation = statistics.won as f64 / statistics.played as f64;
let exploration = f64::consts::SQRT_2
* f64::sqrt(
f64::ln(self.statistic.played as f64)
/ statistics.played as f64,
);
OrderedFloat(exploitation + exploration)
})
.unwrap_or_default();
(*token, selected)
})
.collect();
board.simulate_actions(&actions);
let winner = self
.childs
.entry(actions.clone())
.or_default()
.monte_carlo_step(board);
// update child statistics
for (token, action) in &actions {
let entry = self
.child_statistics
.entry(*token)
.or_default()
.entry(*action)
.or_default();
entry.played += 1;
if Some(*token) == winner {
entry.won += 1;
}
}
winner
};
self.statistic.played += 1;
if let Some(token) = winner {
self.statistic
.won
.entry(token)
.and_modify(|won| *won += 1)
.or_insert(1);
}
winner
}
}

View File

@ -18,10 +18,24 @@ const MAX_HEALTH: i32 = 100;
// API and Response Objects
// See https://docs.battlesnake.com/api
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Deserialize, Serialize, Sequence)]
#[derive(
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Clone,
Copy,
Deserialize,
Serialize,
Sequence,
Default,
)]
#[serde(rename_all = "lowercase")]
pub enum Direction {
/// Move left (-x)
#[default]
Left,
/// Move up (+y)
Up,
@ -82,14 +96,14 @@ pub struct Ruleset {
pub struct RulesetSettings {
/// Percentage chance of spawning a new food every round.
#[serde(rename = "foodSpawnChance")]
food_spawn_chance: i32,
food_spawn_chance: u8,
/// Minimum food to keep on the board every turn.
#[serde(rename = "minimumFood")]
minimum_food: i32,
minimum_food: u8,
/// Health damage a snake will take when ending its turn in a hazard. This stacks on top of the
/// regular 1 damage a snake takes per turn.
#[serde(rename = "hazardDamagePerTurn")]
hazard_damage_per_turn: i32,
hazard_damage_per_turn: u8,
/// rules for the royale mode
royale: RulesetRoyale,
/// rules for the squad mode

View File

@ -1,11 +1,39 @@
use std::collections::{BTreeSet, HashMap, VecDeque};
use std::collections::{BTreeMap, BTreeSet, VecDeque};
use rand::seq::SliceRandom;
use iter_tools::Itertools;
use rand::{
seq::{IteratorRandom, SliceRandom},
Rng,
};
use crate::{Coord, Direction};
#[allow(clippy::cast_possible_truncation)]
const MAX_HEALTH: u8 = crate::MAX_HEALTH as u8;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct SnakeToken {
id: u8,
}
impl SnakeToken {
pub fn from_board(board: &crate::Board) -> BTreeMap<String, SnakeToken> {
board
.snakes
.iter()
.enumerate()
.map(|(i, snake)| {
(
snake.id.clone(),
Self {
id: u8::try_from(i).expect("Way to many snakes for a single game"),
},
)
})
.collect()
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Board {
turn: i32,
@ -15,12 +43,22 @@ pub struct Board {
width: i32,
/// Food on the board
food: BTreeSet<Coord>,
/// Chance of new food spawning each round
food_chance: u8,
/// minimum quantity of food that must be on the board
min_food: u8,
/// Alive snakes
snakes: Vec<Battlesnake>,
snakes: BTreeMap<SnakeToken, Battlesnake>,
}
impl Board {
pub fn from_game_board(board: &crate::Board, id_map: &HashMap<String, u8>, turn: i32) -> Self {
pub fn from_game_board(
board: &crate::Board,
token_map: &BTreeMap<String, SnakeToken>,
turn: i32,
food_chance: u8,
min_food: u8,
) -> Self {
let width = board.width;
debug_assert!(width > 0);
let height = board.height;
@ -30,8 +68,8 @@ impl Board {
.snakes
.iter()
.map(|snake| {
let id = id_map[&snake.id];
Battlesnake::from_game_snake(snake, id)
let token = token_map[&snake.id];
(token, Battlesnake::from_game_snake(snake))
})
.collect();
@ -40,6 +78,8 @@ impl Board {
height,
width,
food,
food_chance,
min_food,
snakes,
}
}
@ -48,32 +88,26 @@ impl Board {
self.turn
}
pub fn snake_index(&self, id: u8) -> Option<usize> {
self.snakes
.iter()
.enumerate()
.find(|(_, snake)| snake.id == id)
.map(|(i, _)| i)
}
pub fn is_alive(&self, id: u8) -> bool {
self.snakes.iter().any(|snake| snake.id == id)
pub fn is_alive(&self, token: SnakeToken) -> bool {
self.snakes.contains_key(&token)
}
pub fn alive_snakes(&self) -> usize {
self.snakes.len()
}
pub fn simulate_actions(&mut self, actions: &[Direction]) {
debug_assert_eq!(self.snakes.len(), actions.len());
pub fn snakes(&self) -> impl Iterator<Item = SnakeToken> + '_ {
self.snakes.keys().copied()
}
pub fn simulate_actions(&mut self, actions: &BTreeMap<SnakeToken, Direction>) {
// move snakes
for (snake, direction) in self.snakes.iter_mut().zip(actions.iter()) {
snake.perform_action(*direction);
for (token, snake) in &mut self.snakes {
snake.perform_action(actions.get(token).copied().unwrap_or_default());
}
// feed snakes
for snake in &mut self.snakes {
for snake in &mut self.snakes.values_mut() {
let head = snake.head();
if self.food.remove(head) {
snake.health = MAX_HEALTH;
@ -84,11 +118,11 @@ impl Board {
let alive_ids = self
.snakes
.iter()
.filter(|snake| {
.filter(|(_, snake)| {
// snake must have enough health
snake.health != 0
})
.map(|snake| (snake.id, snake.body.len(), *snake.head()))
.map(|(token, snake)| (*token, snake.body.len(), *snake.head()))
.filter(|(_, _, head)| {
// head in bounds
(0..self.width).contains(&head.x) && (0..self.height).contains(&head.y)
@ -97,77 +131,87 @@ impl Board {
// body collision
!self
.snakes
.iter()
.values()
.flat_map(|snake2| snake2.body.iter().skip(1))
.any(|body| body == head)
})
.filter(|(id, len, head)| {
.filter(|(token, len, head)| {
// head to head collision
!self
.snakes
.iter()
.filter(|snake2| snake2.id != *id && snake2.body.len() >= *len)
.any(|snake2| snake2.head() == head)
.filter(|(token2, snake2)| *token2 != token && snake2.body.len() >= *len)
.any(|(_, snake2)| snake2.head() == head)
})
.map(|(id, _, _)| id)
.map(|(token, _, _)| token)
.collect::<Vec<_>>();
self.snakes.retain(|snake| alive_ids.contains(&snake.id));
self.snakes.retain(|token, _| alive_ids.contains(token));
// spawn new food
if self.food.len() < usize::from(self.min_food)
|| rand::thread_rng().gen_ratio(u32::from(self.food_chance), 100)
{
let free_fields = (0..self.width)
.flat_map(|x| (0..self.height).map(move |y| Coord { x, y }))
.filter(|coord| {
!self
.snakes
.values()
.flat_map(|snake| snake.body.iter())
.any(|body| body == coord)
})
.filter(|coord| self.food.contains(coord));
if let Some(field) = free_fields.choose(&mut rand::thread_rng()) {
self.food.insert(field);
}
}
self.turn += 1;
}
pub fn simulate_with_initial_until(
&mut self,
actions: &[Option<Direction>],
exit: impl Fn(&Self) -> bool,
) {
debug_assert_eq!(actions.len(), self.snakes.len());
let possible_actions = self.possible_actions();
let actions = actions
.iter()
.enumerate()
.map(|(i, direction)| {
direction.unwrap_or_else(|| {
possible_actions[i]
.choose(&mut rand::thread_rng())
.copied()
.unwrap_or(Direction::Up)
})
})
.collect::<Vec<_>>();
self.simulate_actions(&actions);
pub fn simulate_until(&mut self, exit: impl Fn(&Self) -> bool) {
while !exit(self) {
let actions = self
.possible_actions()
.iter()
.map(|actions| {
.map(|(token, actions)| {
(
*token,
actions
.iter()
.choose(&mut rand::thread_rng())
.copied()
.unwrap_or(Direction::Up)
.unwrap_or_default(),
)
})
.collect::<Vec<_>>();
.collect();
self.simulate_actions(&actions);
}
}
pub fn possible_actions(&self) -> Vec<Vec<Direction>> {
let possible_actions = self
pub fn possible_actions(&self) -> BTreeMap<SnakeToken, BTreeSet<Direction>> {
let mut actions: BTreeMap<_, BTreeSet<_>> = self
.snakes
.iter()
.map(|snake| {
enum_iterator::all::<Direction>()
.map(|direction| (direction, snake.head().move_to(direction)))
.filter(|(_, target)| {
.keys()
.map(|&token| (token, enum_iterator::all::<Direction>().collect()))
.collect();
for (token, actions) in &mut actions {
let snake = &self.snakes[token];
let head = snake.head();
actions.retain(|direction| {
let target = head.move_to(*direction);
// don't move out of bounds
(0..self.width).contains(&target.x) && (0..self.height).contains(&target.y)
})
.filter(|(_, target)| {
if !((0..self.width).contains(&target.x) && (0..self.height).contains(&target.y)) {
return false;
}
// don't collide with other snakes
!self
.snakes
.iter()
.values()
.flat_map(|snake| {
let has_eaten = snake.health == MAX_HEALTH;
snake
@ -175,51 +219,45 @@ impl Board {
.iter()
.take(snake.body.len() - usize::from(!has_eaten))
})
.any(|coord| coord == target)
})
.map(|(direction, _)| direction)
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
.any(|coord| *coord == target)
});
}
// don't move into bigger snakes heads with only one movement option
possible_actions
.iter()
.enumerate()
.map(|(i, actions)| {
let snake = &self.snakes[i];
let length = snake.body.len();
let head = snake.head();
actions
.iter()
.copied()
.filter(|direction| {
let target = head.move_to(*direction);
!self
let bigger_snakes = self
.snakes
.iter()
.enumerate()
.filter(|(_, snake)| {
// only snakes that are longer
snake.body.len() > length
.sorted_unstable_by(|(_, snake1), (_, snake2)| snake2.health.cmp(&snake1.health))
.map(|(token, snake)| {
if actions[token].len() == 1 {
(
snake.body.len(),
Some(snake.head().move_to(*actions[token].first().unwrap())),
)
} else {
(snake.body.len(), None)
}
})
.filter_map(|(i, snake)| match &possible_actions[i][..] {
// only snakes that have a single action option
[direction] => Some(snake.head().move_to(*direction)),
_ => None,
})
.any(|coord| coord == target)
})
.collect()
})
.collect()
.collect::<Vec<_>>();
for (token, actions) in &mut actions {
let snake = &self.snakes[token];
let head = snake.head();
actions.retain(|direction| {
let target = head.move_to(*direction);
!bigger_snakes
.iter()
.take_while(|(length, _)| *length > snake.body.len())
.any(|(_, coord)| coord.map_or(false, |coord| coord == target))
});
}
actions
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Battlesnake {
/// Id of the snake. Unique inside a game
id: u8,
/// health points
health: u8,
/// Body of the snake. The head is the first element in the queue
@ -227,12 +265,12 @@ pub struct Battlesnake {
}
impl Battlesnake {
pub fn from_game_snake(snake: &crate::Battlesnake, id: u8) -> Self {
pub fn from_game_snake(snake: &crate::Battlesnake) -> Self {
let body: VecDeque<_> = snake.body.iter().copied().collect();
debug_assert_eq!(body.len(), usize::try_from(snake.length).unwrap());
debug_assert!(snake.health <= crate::MAX_HEALTH);
let health = u8::try_from(snake.health).expect("max health is 100");
Self { id, health, body }
Self { health, body }
}
pub fn perform_action(&mut self, direction: Direction) {