diff --git a/Cargo.lock b/Cargo.lock index b880e3a..67e06f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,7 +150,9 @@ version = "1.0.0" dependencies = [ "enum-iterator", "env_logger", + "iter_tools", "log", + "ordered-float", "rand", "rocket", "serde", @@ -202,6 +204,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clone_dyn_types" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f4f0e34968641cc21b39c159b7d07e8c0f573cbc0ef9cd59e452fe6774c0579" + [[package]] name = "colorchoice" version = "1.0.2" @@ -633,6 +641,25 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "iter_tools" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27812bb0a056539d62930a899759af39dfab17ac73a17d5caf58365762657891" +dependencies = [ + "clone_dyn_types", + "itertools", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -765,6 +792,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -790,6 +826,15 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "ordered-float" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537" +dependencies = [ + "num-traits", +] + [[package]] name = "overload" version = "0.1.1" diff --git a/battlesnake/Cargo.toml b/battlesnake/Cargo.toml index 91c69ba..aba55f9 100644 --- a/battlesnake/Cargo.toml +++ b/battlesnake/Cargo.toml @@ -23,3 +23,5 @@ log = "0.4.0" env_logger = "0.11.5" rand = "0.8.4" enum-iterator = "2.1" +iter_tools = "0.21" +ordered-float = "4.3.0" diff --git a/battlesnake/src/logic.rs b/battlesnake/src/logic.rs index 1400341..93ce660 100644 --- a/battlesnake/src/logic.rs +++ b/battlesnake/src/logic.rs @@ -10,12 +10,21 @@ // To get you started we've included code to prevent your Battlesnake from moving backwards. // For more info see docs.battlesnake.com -use std::{cmp::Ordering, time::Instant}; +use core::f64; +use std::{ + cmp::Ordering, + collections::{BTreeMap, BTreeSet}, + time::Instant, +}; use log::info; +use ordered_float::OrderedFloat; use serde_json::{json, Value}; -use crate::{simulation, Action, Battlesnake, Board, Direction, Game, MAX_HEALTH}; +use crate::{ + simulation::{self, SnakeToken}, + Action, Battlesnake, Board, Direction, Game, MAX_HEALTH, +}; impl Battlesnake { fn possible_actions_without_heads<'a>( @@ -110,68 +119,32 @@ pub fn end(_game: &Game, _turn: i32, _board: &Board, _you: &Battlesnake) { // Valid moves are "up", "down", "left", or "right" // See https://docs.battlesnake.com/api/example-move for available data pub fn get_move(game: &Game, turn: i32, board: &Board, you: &Battlesnake) -> Option { - let id_map = board - .snakes - .iter() - .enumerate() - .map(|(i, snake)| (snake.id.clone(), u8::try_from(i).unwrap())) - .collect(); - let board = simulation::Board::from_game_board(board, &id_map, turn); + let token_map = SnakeToken::from_board(board); + let board = simulation::Board::from_game_board( + board, + &token_map, + turn, + game.ruleset.settings.food_spawn_chance, + game.ruleset.settings.minimum_food, + ); - let my_id = id_map[&you.id]; - let my_index = board.snake_index(my_id)?; + let my_token = token_map[&you.id]; - let possible_actions = board.possible_actions(); + let mut tree = Node::default(); - let my_actions = &possible_actions[my_index]; + for _ in 0..300 { + let mut board = board.clone(); + tree.monte_carlo_step(&mut board); + } - let actions = my_actions - .iter() - .map(|direction| { - let mut actions = vec![None; possible_actions.len()]; - actions[my_index] = Some(*direction); - let mut wins = 0; - let mut total_turns = 0; - let start = Instant::now(); - for _ in 0..100 { - let mut board = board.clone(); - board.simulate_with_initial_until(&actions[..], |board| { - !board.is_alive(my_id) - || (game.ruleset.name != "solo" && board.alive_snakes() <= 1) - }); - if board.is_alive(my_id) { - // we survived - wins += 2; - } else if board.alive_snakes() == 0 { - // no snake is alive. This is a draw - wins += 1; - } else { - // we lost - wins += 0; - } - total_turns += board.turn(); - } - let end = Instant::now(); - info!( - "Simulation for {direction:?} took {}s", - (end - start).as_secs_f32() - ); - - (direction, wins, total_turns) - }) - .collect::>(); + let actions = tree.child_statistics.entry(my_token).or_default(); info!("actions: {actions:?}"); - let (&chosen, _, _) = - actions - .into_iter() - .max_by( - |(_, score1, turns1), (_, score2, turns2)| match score1.cmp(score2) { - Ordering::Equal => turns1.cmp(turns2), - order => order, - }, - )?; + let chosen = actions + .iter() + .max_by_key(|(_, stat)| OrderedFloat(stat.won as f64 / stat.played as f64)) + .map(|(direction, _)| *direction)?; info!("DIRECTION {}: {:?}", turn, chosen); Some(Action { @@ -179,3 +152,97 @@ pub fn get_move(game: &Game, turn: i32, board: &Board, you: &Battlesnake) -> Opt shout: None, }) } + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +struct Statistics { + /// Number of times this node was simulated + played: usize, + /// Number of times this node was simulated and the agent has won. + won: BTreeMap, +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +struct ActionStatistic { + played: usize, + won: usize, +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +struct Node { + statistic: Statistics, + child_statistics: BTreeMap>, + childs: BTreeMap, Node>, +} + +impl Node { + /// Performs one monte carlo simulation step + /// + /// Returns the snake that has won the simulation + fn monte_carlo_step(&mut self, board: &mut simulation::Board) -> Option { + let winner = if self.statistic.played == 0 { + // We didn't simulate a game for this node yet. Do that + board.simulate_until(|board| board.alive_snakes() <= 1); + board.snakes().next() + } else { + // select a node to simulate + let possible_actions = board.possible_actions(); + + let actions = possible_actions + .iter() + .map(|(token, actions)| { + let statistics = self.child_statistics.entry(*token).or_default(); + let selected = actions + .iter() + .copied() + .max_by_key(|direction| { + let statistics = statistics.entry(*direction).or_default(); + if statistics.played == 0 { + return OrderedFloat(f64::INFINITY); + } + let exploitation = statistics.won as f64 / statistics.played as f64; + let exploration = f64::consts::SQRT_2 + * f64::sqrt( + f64::ln(self.statistic.played as f64) + / statistics.played as f64, + ); + OrderedFloat(exploitation + exploration) + }) + .unwrap_or_default(); + (*token, selected) + }) + .collect(); + + board.simulate_actions(&actions); + let winner = self + .childs + .entry(actions.clone()) + .or_default() + .monte_carlo_step(board); + + // update child statistics + for (token, action) in &actions { + let entry = self + .child_statistics + .entry(*token) + .or_default() + .entry(*action) + .or_default(); + entry.played += 1; + if Some(*token) == winner { + entry.won += 1; + } + } + + winner + }; + self.statistic.played += 1; + if let Some(token) = winner { + self.statistic + .won + .entry(token) + .and_modify(|won| *won += 1) + .or_insert(1); + } + winner + } +} diff --git a/battlesnake/src/main.rs b/battlesnake/src/main.rs index feaa545..cafd627 100644 --- a/battlesnake/src/main.rs +++ b/battlesnake/src/main.rs @@ -18,10 +18,24 @@ const MAX_HEALTH: i32 = 100; // API and Response Objects // See https://docs.battlesnake.com/api -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Deserialize, Serialize, Sequence)] +#[derive( + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Clone, + Copy, + Deserialize, + Serialize, + Sequence, + Default, +)] #[serde(rename_all = "lowercase")] pub enum Direction { /// Move left (-x) + #[default] Left, /// Move up (+y) Up, @@ -82,14 +96,14 @@ pub struct Ruleset { pub struct RulesetSettings { /// Percentage chance of spawning a new food every round. #[serde(rename = "foodSpawnChance")] - food_spawn_chance: i32, + food_spawn_chance: u8, /// Minimum food to keep on the board every turn. #[serde(rename = "minimumFood")] - minimum_food: i32, + minimum_food: u8, /// Health damage a snake will take when ending its turn in a hazard. This stacks on top of the /// regular 1 damage a snake takes per turn. #[serde(rename = "hazardDamagePerTurn")] - hazard_damage_per_turn: i32, + hazard_damage_per_turn: u8, /// rules for the royale mode royale: RulesetRoyale, /// rules for the squad mode diff --git a/battlesnake/src/simulation.rs b/battlesnake/src/simulation.rs index 99cdee6..92cab80 100644 --- a/battlesnake/src/simulation.rs +++ b/battlesnake/src/simulation.rs @@ -1,11 +1,39 @@ -use std::collections::{BTreeSet, HashMap, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, VecDeque}; -use rand::seq::SliceRandom; +use iter_tools::Itertools; +use rand::{ + seq::{IteratorRandom, SliceRandom}, + Rng, +}; use crate::{Coord, Direction}; +#[allow(clippy::cast_possible_truncation)] const MAX_HEALTH: u8 = crate::MAX_HEALTH as u8; +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub struct SnakeToken { + id: u8, +} + +impl SnakeToken { + pub fn from_board(board: &crate::Board) -> BTreeMap { + board + .snakes + .iter() + .enumerate() + .map(|(i, snake)| { + ( + snake.id.clone(), + Self { + id: u8::try_from(i).expect("Way to many snakes for a single game"), + }, + ) + }) + .collect() + } +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct Board { turn: i32, @@ -15,12 +43,22 @@ pub struct Board { width: i32, /// Food on the board food: BTreeSet, + /// Chance of new food spawning each round + food_chance: u8, + /// minimum quantity of food that must be on the board + min_food: u8, /// Alive snakes - snakes: Vec, + snakes: BTreeMap, } impl Board { - pub fn from_game_board(board: &crate::Board, id_map: &HashMap, turn: i32) -> Self { + pub fn from_game_board( + board: &crate::Board, + token_map: &BTreeMap, + turn: i32, + food_chance: u8, + min_food: u8, + ) -> Self { let width = board.width; debug_assert!(width > 0); let height = board.height; @@ -30,8 +68,8 @@ impl Board { .snakes .iter() .map(|snake| { - let id = id_map[&snake.id]; - Battlesnake::from_game_snake(snake, id) + let token = token_map[&snake.id]; + (token, Battlesnake::from_game_snake(snake)) }) .collect(); @@ -40,6 +78,8 @@ impl Board { height, width, food, + food_chance, + min_food, snakes, } } @@ -48,32 +88,26 @@ impl Board { self.turn } - pub fn snake_index(&self, id: u8) -> Option { - self.snakes - .iter() - .enumerate() - .find(|(_, snake)| snake.id == id) - .map(|(i, _)| i) - } - - pub fn is_alive(&self, id: u8) -> bool { - self.snakes.iter().any(|snake| snake.id == id) + pub fn is_alive(&self, token: SnakeToken) -> bool { + self.snakes.contains_key(&token) } pub fn alive_snakes(&self) -> usize { self.snakes.len() } - pub fn simulate_actions(&mut self, actions: &[Direction]) { - debug_assert_eq!(self.snakes.len(), actions.len()); + pub fn snakes(&self) -> impl Iterator + '_ { + self.snakes.keys().copied() + } + pub fn simulate_actions(&mut self, actions: &BTreeMap) { // move snakes - for (snake, direction) in self.snakes.iter_mut().zip(actions.iter()) { - snake.perform_action(*direction); + for (token, snake) in &mut self.snakes { + snake.perform_action(actions.get(token).copied().unwrap_or_default()); } // feed snakes - for snake in &mut self.snakes { + for snake in &mut self.snakes.values_mut() { let head = snake.head(); if self.food.remove(head) { snake.health = MAX_HEALTH; @@ -84,11 +118,11 @@ impl Board { let alive_ids = self .snakes .iter() - .filter(|snake| { + .filter(|(_, snake)| { // snake must have enough health snake.health != 0 }) - .map(|snake| (snake.id, snake.body.len(), *snake.head())) + .map(|(token, snake)| (*token, snake.body.len(), *snake.head())) .filter(|(_, _, head)| { // head in bounds (0..self.width).contains(&head.x) && (0..self.height).contains(&head.y) @@ -97,129 +131,133 @@ impl Board { // body collision !self .snakes - .iter() + .values() .flat_map(|snake2| snake2.body.iter().skip(1)) .any(|body| body == head) }) - .filter(|(id, len, head)| { + .filter(|(token, len, head)| { // head to head collision !self .snakes .iter() - .filter(|snake2| snake2.id != *id && snake2.body.len() >= *len) - .any(|snake2| snake2.head() == head) + .filter(|(token2, snake2)| *token2 != token && snake2.body.len() >= *len) + .any(|(_, snake2)| snake2.head() == head) }) - .map(|(id, _, _)| id) + .map(|(token, _, _)| token) .collect::>(); - self.snakes.retain(|snake| alive_ids.contains(&snake.id)); + self.snakes.retain(|token, _| alive_ids.contains(token)); + + // spawn new food + if self.food.len() < usize::from(self.min_food) + || rand::thread_rng().gen_ratio(u32::from(self.food_chance), 100) + { + let free_fields = (0..self.width) + .flat_map(|x| (0..self.height).map(move |y| Coord { x, y })) + .filter(|coord| { + !self + .snakes + .values() + .flat_map(|snake| snake.body.iter()) + .any(|body| body == coord) + }) + .filter(|coord| self.food.contains(coord)); + if let Some(field) = free_fields.choose(&mut rand::thread_rng()) { + self.food.insert(field); + } + } self.turn += 1; } - pub fn simulate_with_initial_until( - &mut self, - actions: &[Option], - exit: impl Fn(&Self) -> bool, - ) { - debug_assert_eq!(actions.len(), self.snakes.len()); - let possible_actions = self.possible_actions(); - let actions = actions - .iter() - .enumerate() - .map(|(i, direction)| { - direction.unwrap_or_else(|| { - possible_actions[i] - .choose(&mut rand::thread_rng()) - .copied() - .unwrap_or(Direction::Up) - }) - }) - .collect::>(); - - self.simulate_actions(&actions); + pub fn simulate_until(&mut self, exit: impl Fn(&Self) -> bool) { while !exit(self) { let actions = self .possible_actions() .iter() - .map(|actions| { - actions - .choose(&mut rand::thread_rng()) - .copied() - .unwrap_or(Direction::Up) + .map(|(token, actions)| { + ( + *token, + actions + .iter() + .choose(&mut rand::thread_rng()) + .copied() + .unwrap_or_default(), + ) }) - .collect::>(); + .collect(); self.simulate_actions(&actions); } } - pub fn possible_actions(&self) -> Vec> { - let possible_actions = self + pub fn possible_actions(&self) -> BTreeMap> { + let mut actions: BTreeMap<_, BTreeSet<_>> = self .snakes - .iter() - .map(|snake| { - enum_iterator::all::() - .map(|direction| (direction, snake.head().move_to(direction))) - .filter(|(_, target)| { - // don't move out of bounds - (0..self.width).contains(&target.x) && (0..self.height).contains(&target.y) - }) - .filter(|(_, target)| { - // don't collide with other snakes - !self - .snakes + .keys() + .map(|&token| (token, enum_iterator::all::().collect())) + .collect(); + + for (token, actions) in &mut actions { + let snake = &self.snakes[token]; + let head = snake.head(); + + actions.retain(|direction| { + let target = head.move_to(*direction); + + // don't move out of bounds + if !((0..self.width).contains(&target.x) && (0..self.height).contains(&target.y)) { + return false; + } + + // don't collide with other snakes + !self + .snakes + .values() + .flat_map(|snake| { + let has_eaten = snake.health == MAX_HEALTH; + snake + .body .iter() - .flat_map(|snake| { - let has_eaten = snake.health == MAX_HEALTH; - snake - .body - .iter() - .take(snake.body.len() - usize::from(!has_eaten)) - }) - .any(|coord| coord == target) + .take(snake.body.len() - usize::from(!has_eaten)) }) - .map(|(direction, _)| direction) - .collect::>() - }) - .collect::>(); + .any(|coord| *coord == target) + }); + } // don't move into bigger snakes heads with only one movement option - possible_actions + let bigger_snakes = self + .snakes .iter() - .enumerate() - .map(|(i, actions)| { - let snake = &self.snakes[i]; - let length = snake.body.len(); - let head = snake.head(); - actions - .iter() - .copied() - .filter(|direction| { - let target = head.move_to(*direction); - !self - .snakes - .iter() - .enumerate() - .filter(|(_, snake)| { - // only snakes that are longer - snake.body.len() > length - }) - .filter_map(|(i, snake)| match &possible_actions[i][..] { - // only snakes that have a single action option - [direction] => Some(snake.head().move_to(*direction)), - _ => None, - }) - .any(|coord| coord == target) - }) - .collect() + .sorted_unstable_by(|(_, snake1), (_, snake2)| snake2.health.cmp(&snake1.health)) + .map(|(token, snake)| { + if actions[token].len() == 1 { + ( + snake.body.len(), + Some(snake.head().move_to(*actions[token].first().unwrap())), + ) + } else { + (snake.body.len(), None) + } }) - .collect() + .collect::>(); + for (token, actions) in &mut actions { + let snake = &self.snakes[token]; + let head = snake.head(); + + actions.retain(|direction| { + let target = head.move_to(*direction); + !bigger_snakes + .iter() + .take_while(|(length, _)| *length > snake.body.len()) + .any(|(_, coord)| coord.map_or(false, |coord| coord == target)) + }); + } + + actions } } #[derive(Debug, PartialEq, Eq, Clone)] pub struct Battlesnake { - /// Id of the snake. Unique inside a game - id: u8, /// health points health: u8, /// Body of the snake. The head is the first element in the queue @@ -227,12 +265,12 @@ pub struct Battlesnake { } impl Battlesnake { - pub fn from_game_snake(snake: &crate::Battlesnake, id: u8) -> Self { + pub fn from_game_snake(snake: &crate::Battlesnake) -> Self { let body: VecDeque<_> = snake.body.iter().copied().collect(); debug_assert_eq!(body.len(), usize::try_from(snake.length).unwrap()); debug_assert!(snake.health <= crate::MAX_HEALTH); let health = u8::try_from(snake.health).expect("max health is 100"); - Self { id, health, body } + Self { health, body } } pub fn perform_action(&mut self, direction: Direction) {