diff --git a/ninemensmorris/AlphaZero_Ninemensmorris.ipynb b/ninemensmorris/AlphaZero_Ninemensmorris.ipynb new file mode 100644 index 000000000..eabaaf4f4 --- /dev/null +++ b/ninemensmorris/AlphaZero_Ninemensmorris.ipynb @@ -0,0 +1,2523 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wUoCbnDQ4if0" + }, + "outputs": [], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IztmffhnwcXM" + }, + "source": [ + "# util" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7u7k_6UT76UX" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "Author: Surag or other contributor to the repo\n", + "https://github.com/suragnair/alpha-zero-general\n", + "\"\"\"\n", + "class AverageMeter(object):\n", + "\n", + " def __init__(self):\n", + " self.val = 0\n", + " self.avg = 0\n", + " self.sum = 0\n", + " self.count = 0\n", + "\n", + " def __repr__(self):\n", + " return f'{self.avg:.2e}'\n", + "\n", + " def update(self, val, n=1):\n", + " self.val = val\n", + " self.sum += val * n\n", + " self.count += n\n", + " self.avg = self.sum / self.count\n", + "\n", + "\n", + "class dotdict(dict):\n", + " def __getattr__(self, name):\n", + " return self[name]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NDUh9fyzwiqN" + }, + "source": [ + "#mcts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IwVClOT-Zret" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "Author: Surag or other contributor to the repo\n", + "https://github.com/suragnair/alpha-zero-general\n", + "\"\"\"\n", + "import logging\n", + "import math\n", + "\n", + "import numpy as np\n", + "\n", + "EPS = 1e-8\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "\n", + "class MCTS():\n", + " \"\"\"\n", + " This class handles the MCTS tree.\n", + " \"\"\"\n", + "\n", + " def __init__(self, game, nnet, args):\n", + " self.game = game\n", + " self.nnet = nnet\n", + " self.args = args\n", + " self.Qsa = {} # stores Q values for s,a (as defined in the paper)\n", + " self.Nsa = {} # stores #times edge s,a was visited\n", + " self.Ns = {} # stores #times board s was visited\n", + " self.Ps = {} # stores initial policy (returned by neural net)\n", + "\n", + " self.Es = {} # stores game.getGameEnded ended for board s\n", + " self.Vs = {} # stores game.getValidMoves for board s\n", + "\n", + " def getActionProb(self, canonicalBoard, temp=1):\n", + " \"\"\"\n", + " This function performs numMCTSSims simulations of MCTS starting from\n", + " canonicalBoard.\n", + "\n", + " Returns:\n", + " probs: a policy vector where the probability of the ith action is\n", + " proportional to Nsa[(s,a)]**(1./temp)\n", + " \"\"\"\n", + " for i in range(self.args.numMCTSSims):\n", + " self.search(canonicalBoard)\n", + "\n", + " s = self.game.stringRepresentation(canonicalBoard)\n", + " counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]\n", + "\n", + " if temp == 0:\n", + " bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten()\n", + " bestA = np.random.choice(bestAs)\n", + " probs = [0] * len(counts)\n", + " probs[bestA] = 1\n", + " return probs\n", + "\n", + " counts = [x ** (1. / temp) for x in counts]\n", + " counts_sum = float(sum(counts))\n", + " probs = [x / counts_sum for x in counts]\n", + " return probs\n", + "\n", + " def search(self, canonicalBoard):\n", + " \"\"\"\n", + " This function performs one iteration of MCTS. It is recursively called\n", + " till a leaf node is found. The action chosen at each node is one that\n", + " has the maximum upper confidence bound as in the paper.\n", + "\n", + " Once a leaf node is found, the neural network is called to return an\n", + " initial policy P and a value v for the state. This value is propagated\n", + " up the search path. In case the leaf node is a terminal state, the\n", + " outcome is propagated up the search path. The values of Ns, Nsa, Qsa are\n", + " updated.\n", + "\n", + " NOTE: the return values are the negative of the value of the current\n", + " state. This is done since v is in [-1,1] and if v is the value of a\n", + " state for the current player, then its value is -v for the other player.\n", + "\n", + " Returns:\n", + " v: the negative of the value of the current canonicalBoard\n", + " \"\"\"\n", + "\n", + " s = self.game.stringRepresentation(canonicalBoard)\n", + "\n", + " if s not in self.Es:\n", + " self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)\n", + " if self.Es[s] != 0:\n", + " # terminal node\n", + " return -self.Es[s]\n", + "\n", + " if s not in self.Ps:\n", + " # leaf node\n", + " self.Ps[s], v = self.nnet.predict(canonicalBoard)\n", + " valids = self.game.getValidMoves(canonicalBoard, 1)\n", + " self.Ps[s] = self.Ps[s] * valids # masking invalid moves\n", + " sum_Ps_s = np.sum(self.Ps[s])\n", + " if sum_Ps_s > 0:\n", + " self.Ps[s] /= sum_Ps_s # renormalize\n", + " else:\n", + " # if all valid moves were masked make all valid moves equally probable\n", + "\n", + " # NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.\n", + " # If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.\n", + " log.error(\"All valid moves were masked, doing a workaround.\")\n", + " self.Ps[s] = self.Ps[s] + valids\n", + " self.Ps[s] /= np.sum(self.Ps[s])\n", + "\n", + " self.Vs[s] = valids\n", + " self.Ns[s] = 0\n", + " return -v\n", + "\n", + " valids = self.Vs[s]\n", + " cur_best = -float('inf')\n", + " best_act = -1\n", + "\n", + " # pick the action with the highest upper confidence bound\n", + " for a in range(self.game.getActionSize()):\n", + " if valids[a]:\n", + " if (s, a) in self.Qsa:\n", + " u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (\n", + " 1 + self.Nsa[(s, a)])\n", + " else:\n", + " u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS) # Q = 0 ?\n", + "\n", + " if u > cur_best:\n", + " cur_best = u\n", + " best_act = a\n", + "\n", + " a = best_act\n", + " next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)\n", + " next_s = self.game.getCanonicalForm(next_s, next_player)\n", + "\n", + " v = self.search(next_s)\n", + "\n", + " if (s, a) in self.Qsa:\n", + " self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)\n", + " self.Nsa[(s, a)] += 1\n", + "\n", + " else:\n", + " self.Qsa[(s, a)] = v\n", + " self.Nsa[(s, a)] = 1\n", + "\n", + " self.Ns[s] += 1\n", + " return -v" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rH5qchTXwmyd" + }, + "source": [ + "#ninemensmorris logic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n8TVlH0TZx4N" + }, + "outputs": [], + "source": [ + "'''\n", + "Author: Jonas Jakob\n", + "Created: May 31, 2023\n", + "\n", + "Implementation of the NineMensMorris Game Logic\n", + "'''\n", + "\n", + "class Board():\n", + "\n", + " \"\"\"\n", + " A Ninemensmorris Board is represented as a array of (25)\n", + " The item on board[24] represents the placing phase. \"0\" if\n", + " the phase is not over yet, \"1\" if it is.\n", + "\n", + " Board logic:\n", + "\n", + " The pieces are represented as\n", + " - 1 for player one (black), 1 for player 2 (white) and 0 if there is no\n", + " piece on the position (for the canonical Board the\n", + " current players pieces are always shown as 1 and the\n", + " opponents as -1). The initial board:\n", + "\n", + " board shape:\n", + " [0,0,0,0,0,0,0,0, -> outer ring\n", + " 0,0,0,0,0,0,0,0, -> middle ring\n", + " 0,0,0,0,0,0,0,0] -> inner ring\n", + "\n", + "\n", + "\n", + " Locations:\n", + "\n", + " Locations are given as the index in the board array.\n", + "\n", + " Actions:\n", + "\n", + " Actions are stored in a list of tuples of the form:\n", + " action = [piece_location, move_location, remove_piece]\n", + " \"\"\"\n", + "\n", + " \"\"\"\n", + " 6x6 configuration\n", + " 24 spots for pieces\n", + " 1 spot to count the placed pieces\n", + " 1 spot to count the current moves without mills\n", + "\n", + " -> need to be in the board itself, since only the board is\n", + " \"\"\"\n", + " def __init__(self):\n", + " \"Set up initial board configuration.\"\n", + " self.n = 6\n", + " self.pieces = np.zeros((6,6), dtype=int)\n", + "\n", + " \"\"\"\n", + " currently not used\n", + " \"\"\"\n", + " def __getitem__(self, index):\n", + " return self.pieces[index]\n", + "\n", + "\n", + " \"\"\"\n", + " returns a vector of ones and zeros, marking all the legal moves for the\n", + " current board state\n", + " \"\"\"\n", + " def get_legal_move_vector(self, player, all_moves):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + " all_moves: list with all possible moves\n", + "\n", + " Returns:\n", + " legal_move_vector: vector of length = all_moves with ones and zeros\n", + " \"\"\"\n", + " legal_moves = self.get_legal_moves(player)\n", + " legal_move_vector = [0] * len(all_moves)\n", + "\n", + " for move in legal_moves:\n", + " index = all_moves.index(move)\n", + " legal_move_vector[index] = 1\n", + " return legal_move_vector\n", + "\n", + " \"\"\"\n", + " Transforms the array form of the NineMensMorris board into a Image, that\n", + " can be used as Input for the Neural Network\n", + " \"\"\"\n", + " def arrayToImage(self, array, placements_and_moves):\n", + " \"\"\"\n", + " Input:\n", + " array: list with all 24 board positions\n", + " placements_and_moves: Tuple containing the placed pieces in phase\n", + " zero and the current number of moves without a mill\n", + "\n", + " Returns:\n", + " legal_move_vector: vector of length = all_moves with ones and zeros\n", + " \"\"\"\n", + " board_image = np.zeros((6,6), dtype=int)\n", + " boardx = 0\n", + " boardy = 0\n", + " count_placements, current_moves = placements_and_moves\n", + " assert(len(array) == 24)\n", + " assert(0 <= count_placements <= 18)\n", + " index = 0\n", + " while index < 24:\n", + "\n", + " board_image[boardx][boardy] = np.copy(array[index])\n", + " if boardy == 5:\n", + " boardx += 1\n", + " boardy = 0\n", + " else:\n", + " boardy += 1\n", + " index += 1\n", + "\n", + "\n", + " board_image[4][0] = count_placements\n", + " board_image[4][1] = current_moves\n", + " assert(0 <= board_image[4][0] <= 18)\n", + "\n", + " return board_image\n", + "\n", + " \"\"\"\n", + " Transforms the Image form used in the training of the Neural Network into an\n", + " Array of the board and a Tuple containing the placed pieces in phase zero\n", + " and the current number of moves without a mill.\n", + " \"\"\"\n", + " def piecesToArray(self):\n", + " \"\"\"\n", + " Returns:\n", + " re_board: list with all 24 board positions\n", + " placements_and_moves: Tuple containing the placed pieces in phase\n", + " zero and the current number of moves without a mill\n", + " \"\"\"\n", + " re_board = []\n", + " re_board.extend(self.pieces[0])\n", + " re_board.extend(self.pieces[1])\n", + " re_board.extend(self.pieces[2])\n", + " re_board.extend(self.pieces[3])\n", + "\n", + "\n", + " assert(0 <= self.pieces[4][0] <= 18)\n", + " assert(len(re_board) == 24)\n", + " placements_and_moves = (self.pieces[4][0], self.pieces[4][1])\n", + "\n", + " return (re_board, placements_and_moves)\n", + "\n", + " \"\"\"\n", + " Gets the current game phase for the current player, then calls the\n", + " right method to retrieve the legal moves for the specific game phase, board\n", + " and player. Returns a list\n", + " \"\"\"\n", + " def get_legal_moves(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " legal_move_vector: list with all the move Tuples that are legal for\n", + " the current board state\n", + " \"\"\"\n", + " game_phase = self.get_game_phase(player)\n", + " assert(0 <= game_phase <= 2)\n", + " if game_phase == 0:\n", + " return list(self.get_legal_moves_0(player))\n", + "\n", + " elif game_phase == 1:\n", + " return list(self.get_legal_moves_1(player))\n", + " elif game_phase == 2:\n", + " return list(self.get_legal_moves_2(player))\n", + "\n", + " \"\"\"\n", + " Gets the current game phase for the current player and board\n", + " \"\"\"\n", + " def get_game_phase(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " number: number representing the game phase\n", + " \"\"\"\n", + "\n", + " array, placements_and_moves = self.piecesToArray()\n", + " assert(0 <= placements_and_moves[0] <= 18)\n", + "\n", + " if placements_and_moves[0] < 18:\n", + " return 0\n", + " elif len(self.get_player_pieces(player)) <= 3:\n", + " return 2\n", + " else:\n", + " return 1\n", + "\n", + " \"\"\"\n", + " Gets all positions for the given players pieces in the array form of\n", + " the board\n", + " \"\"\"\n", + " def get_player_pieces(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " locations: list of the locations for all the pieces of the given player\n", + " \"\"\"\n", + " board, placements = self.piecesToArray()\n", + " locations = []\n", + "\n", + " index = 0\n", + " while index < len(board):\n", + " if board[index] == player:\n", + " locations.append(index)\n", + " index += 1\n", + " if locations == []:\n", + " return []\n", + " else:\n", + " return list(locations)\n", + "\n", + " \"\"\"\n", + " Gets all the positions on the board that are empty\n", + " \"\"\"\n", + " def get_empty_positions(self):\n", + " \"\"\"\n", + " Returns:\n", + " locations: list of all empty positions\n", + " \"\"\"\n", + " board, placements = self.piecesToArray()\n", + " assert(0 <= placements[0] <= 18)\n", + " assert(len(board) == 24)\n", + "\n", + " locations = []\n", + "\n", + " index = 0\n", + " while index < len(board):\n", + " if board[index] == 0:\n", + " locations.append(index)\n", + " index += 1\n", + "\n", + " return list(locations)\n", + "\n", + " \"\"\"\n", + " Checks for each possible move, if a new mill is formed.\n", + " Each check makes sure, that the origin of the move, isnt one of the pieces in the\n", + " potentially new mill.\n", + " Returns a list of all move Tuples that form a new mill.\n", + " \"\"\"\n", + " def get_possible_mills(self, move_locations, player):\n", + " \"\"\"\n", + " Input:\n", + " move_locations: list of Tuples with (origin, destination)\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " number: list of all moves that form a mill on the board\n", + " \"\"\"\n", + " board, placements = self.piecesToArray()\n", + " assert(0 <= placements[0] <= 18)\n", + " assert(len(board) == 24)\n", + " move_forms_mill = []\n", + "\n", + " for move in move_locations:\n", + " if (move != None) and (move[1] < 24) and (move[1] >= 0) :\n", + " if (move[1] % 2) == 0: #move is in a corner\n", + " if (move[1] % 8) == 0: # move is in the top left corner of a ring\n", + " if (([move[1] + 7] == player) and (board[move[1] + 6] == player) and\n", + " (move[1] + 7 != move[0]) and (move[1] + 6 != move[0])): #check down\n", + " move_forms_mill.append(move)\n", + " if ((board[move[1] + 1] == player) and (board[move[1] + 2] == player) and\n", + " (move[1] + 1 != move[0]) and (move[1] + 2 != move[0])): #check right\n", + " move_forms_mill.append(move)\n", + " elif move in [6,14,22]: #move is in the bottom left corner of a ring\n", + " if ((board[move[1] + 1] == player) and (board[move[1] - 6] == player) and\n", + " (move[1] + 1 != move[0])and (move[1] - 6 != move[0])): #check up\n", + " move_forms_mill.append(move)\n", + " if ((board[move[1] - 1] == player) and (board[move[1] - 2] == player) and\n", + " (move[1] - 1 != move[0]) and (move[1] - 2 != move[0])): #check right\n", + " move_forms_mill.append(move)\n", + " elif move in [2,10,18,4,12,20]: #move is in the bottom or top right corner of a ring\n", + " if ((board[move[1] + 1] == player) and (board[move[1] + 2] == player) and\n", + " (move[1] + 1 != move[0]) and (move[1] + 2 != move[0])): #check down/ left\n", + " move_forms_mill.append(move)\n", + " if ((board[move[1] - 1] == player) and (board[move[1] - 2] == player) and\n", + " (move[1] - 1 != move[0]) and (move[1] - 2 != move[0])): #check left/ up\n", + " move_forms_mill.append(move)\n", + "\n", + " else: #move is in the middle of a row\n", + " if move[1] in [1,3,5,7]: #outer ring\n", + " if move[1] == 7:\n", + " if ((board[move[1] - 7] == player) and (board[move[1] - 1] == player) and\n", + " (move[1] - 7 != move[0]) and (move[1] - 1 != move[0])): #check ring\n", + " move_forms_mill.append(move)\n", + " else:\n", + " if ((board[move[1] - 1] == player) and (board[move[1] + 1] == player) and\n", + " (move[1] - 1 != move[0]) and (move[1] + 1 != move[0])): #check ring\n", + " move_forms_mill.append(move)\n", + " if ((board[move[1] + 8] == player) and (board[move[1] + 16] == player) and\n", + " (move[1] + 8 != move[0]) and (move[1] + 16 != move[0])): #check intersections\n", + " move_forms_mill.append(move)\n", + "\n", + " elif move[1] in [9,11,13,15]: #middle ring\n", + " if move[1] == 15:\n", + " if ((board[move[1] - 7] == player) and (board[move[1] - 1] == player) and\n", + " (move[1] - 7 != move[0]) and (move[1] - 1 != move[0])): #check ring\n", + " move_forms_mill.append(move)\n", + " else:\n", + " if ((board[move[1] - 1] == player) and (board[move[1] + 1] == player) and\n", + " (move[1] - 1 != move[0]) and (move[1] + 1 != move[0])): #check ring\n", + " move_forms_mill.append(move)\n", + " if ((board[move[1] + 8] == player) and (board[move[1] - 8] == player) and\n", + " (move[1] + 8 != move[0]) and (move[1] - 8 != move[0])): #check intersections\n", + " move_forms_mill.append(move)\n", + "\n", + " elif move[1] in [17,19,21,23]: #inner ring\n", + " if move[1] == 23:\n", + " if ((board[move[1] - 7] == player) and (board[move[1] - 1] == player) and\n", + " (move[1] - 7 != move[0]) and (move[1] - 1 != move[0])): #check ring\n", + " move_forms_mill.append(move)\n", + " else:\n", + " if ((board[move[1] - 1] == player) and (board[move[1] + 1] == player) and\n", + " (move[1] - 1 != move[0]) and (move[1] + 1 != move[0])): #check ring\n", + " move_forms_mill.append(move)\n", + " if ((board[move[1] - 8] == player) and (board[move[1] - 16] == player) and\n", + " (move[1] - 8 != move[0]) and (move[1] - 16 != move[0])): #check intersections\n", + " move_forms_mill.append(move)\n", + "\n", + " return list(move_forms_mill)\n", + "\n", + " \"\"\"\n", + " Looks at the board and returns all current mills for a given player,\n", + " in tuples of their coordinates\n", + " \"\"\"\n", + " def check_for_mills(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " current_mills: all mills for the current player\n", + " \"\"\"\n", + "\n", + " current_mills = []\n", + " board, placements = self.piecesToArray()\n", + " assert(0 <= placements[0] <= 18)\n", + " assert(len(board) == 24)\n", + "\n", + " index = 0\n", + "\n", + " while index < 23: #check rings\n", + " if (index in [6,14,22]):\n", + " if (board[index] == board[index + 1] == board[index - 6] == player):\n", + " current_mills.append((index, index + 1, index - 6))\n", + " elif (board[index] == board[index + 1] == board[index + 2] == player):\n", + " current_mills.append((index, index + 1, index + 2))\n", + "\n", + " index += 2\n", + "\n", + " index = 1\n", + "\n", + " while index < 8: #check intersections\n", + " if (board[index] == board[index + 8] == board[index + 16] == player):\n", + " current_mills.append((index, index + 8, index + 16))\n", + "\n", + " index += 2\n", + "\n", + " return list(current_mills)\n", + "\n", + " \"\"\"\n", + " Gets all neighbour postions for a position on the board\n", + " \"\"\"\n", + " def get_neighbours(self, position):\n", + " \"\"\"\n", + " Input:\n", + " position: postion index on the board\n", + "\n", + " Returns:\n", + " neighbours: Tuple of all neighbours\n", + " \"\"\"\n", + " assert(0 <= position <= 23)\n", + " if position != None:\n", + " if (position % 2) == 0: #position is in a corner\n", + "\n", + " if (position % 8) == 0: # position is in the top left corner of a ring\n", + " return (position + 1, position + 7)\n", + "\n", + " else: #position is in top right, or bottom corners\n", + " return (position - 1, position + 1)\n", + "\n", + " else: #position is in a intersection\n", + " if position in [1,3,5,7]: #outer ring\n", + " if position == 7:\n", + " return (0, 6, 15)\n", + " else:\n", + " return (position - 1, position + 1, position + 8)\n", + "\n", + "\n", + " elif position in [9,11,13,15]: #middle ring\n", + " if position == 15:\n", + " return (7, 8, 14, 23)\n", + " else:\n", + " return (position - 8, position - 1, position + 1, position + 8)\n", + "\n", + " elif position in [17,19,21,23]: #outer ring\n", + " if position == 23:\n", + " return (15, 16, 22)\n", + " else:\n", + " return (position - 8, position - 1, position + 1)\n", + "\n", + "\n", + " return\n", + "\n", + " \"\"\"\n", + " Gets all pieces that are outside of mills for the given player and the\n", + " current board\n", + " \"\"\"\n", + " def get_pieces_outside_mills(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " pieces: all pieces for the given player outside of mills\n", + " \"\"\"\n", + " all_pieces = self.get_player_pieces(player)\n", + "\n", + " mills = self.check_for_mills(player)\n", + "\n", + " remaining_pieces = self.get_player_pieces(player)\n", + "\n", + " for piece in all_pieces:\n", + " if len(mills) != 0:\n", + " for mill in mills:\n", + " if piece in mill and piece in remaining_pieces:\n", + " remaining_pieces.remove(piece)\n", + "\n", + "\n", + " return list(remaining_pieces)\n", + "\n", + " \"\"\"\n", + " Looks at the board, given the current player and identifies all\n", + " legal moves for the current gamestate, given that the player is\n", + " in Phase 0\n", + " \"\"\"\n", + " def get_legal_moves_0(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " moves: list of move tuples that are legal for the given player,\n", + " the players game phase and the current board\n", + " \"\"\"\n", + " #get enemy pieces that can be taken if a mill is formed\n", + " enemies_outside_mills = self.get_pieces_outside_mills(-player)\n", + " if len(enemies_outside_mills) > 0:\n", + " enemies_to_take = enemies_outside_mills\n", + " else:\n", + " enemies_to_take = self.get_player_pieces(-player)\n", + "\n", + "\n", + " #get empty positions, they represent all possible move locations for phase zero\n", + " empty_locations = []\n", + " for position in self.get_empty_positions():\n", + " empty_locations.append(('none',position))\n", + "\n", + " #get moves -> for each move_location, check if a mill is formed (check row(s))\n", + " mill_moves = self.get_possible_mills(empty_locations, player)\n", + "\n", + "\n", + " #generate action tuples\n", + " moves = []\n", + "\n", + " for move in empty_locations:\n", + " if move in mill_moves:\n", + " for enemy in enemies_to_take:\n", + " moves.append(('none',move[1],enemy))\n", + " else:\n", + " moves.append(('none',move[1],'none'))\n", + "\n", + "\n", + " return list(moves)\n", + "\n", + "\n", + " \"\"\"\n", + " Looks at the board, given the current player and identifies all\n", + " legal moves for the current gamestate, given that the player is\n", + " in Phase 1\n", + " \"\"\"\n", + " def get_legal_moves_1(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " moves: list of move tuples that are legal for the given player,\n", + " the players game phase and the current board\n", + " \"\"\"\n", + " moves = []\n", + " board, placements = self.piecesToArray()\n", + " assert(placements[0] == 18)\n", + " assert(len(board) == 24)\n", + "\n", + " #get enemy pieces that can be taken if a mill is formed\n", + " enemies_outside_mills = self.get_pieces_outside_mills(-player)\n", + " if len(enemies_outside_mills) > 0:\n", + " enemies_to_take = enemies_outside_mills\n", + " else:\n", + " enemies_to_take = self.get_player_pieces(-player)\n", + "\n", + " #get the current players pieces that will be moved\n", + " current_positions = self.get_player_pieces(player)\n", + "\n", + " #creating the first part of the moves\n", + " part_moves = []\n", + "\n", + " for position in current_positions:\n", + " neighbours = self.get_neighbours(position)\n", + " index = 0\n", + " while index < len(neighbours):\n", + " if board[neighbours[index]] == 0:\n", + " part_moves.append((position, neighbours[index]))\n", + " index += 1\n", + "\n", + " #finding the part moves that create mills, then pairing them accordingly with enemy pieces to beat\n", + " #get moves -> for each move_location, check if a mill is formed (check row(s))\n", + " mill_moves = self.get_possible_mills(part_moves, player)\n", + "\n", + " for move in part_moves:\n", + " if move in mill_moves:\n", + " for enemy in enemies_to_take:\n", + " moves.append((move[0],move[1],enemy))\n", + " else:\n", + " moves.append((move[0],move[1],'none'))\n", + "\n", + "\n", + "\n", + " return list(moves)\n", + "\n", + "\n", + " \"\"\"\n", + " Looks at the board, given the current player and identifies all\n", + " legal moves for the current gamestate, given that the player is\n", + " in Phase 2\n", + " \"\"\"\n", + " def get_legal_moves_2(self, player):\n", + " \"\"\"\n", + " Input:\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " moves: list of move tuples that are legal for the given player,\n", + " the players game phase and the current board\n", + " \"\"\"\n", + " moves = []\n", + "\n", + " #get enemy pieces that can be taken if a mill is formed\n", + " enemies_outside_mills = self.get_pieces_outside_mills(-player)\n", + " if len(enemies_outside_mills) > 0:\n", + " enemies_to_take = enemies_outside_mills\n", + " else:\n", + " enemies_to_take = self.get_player_pieces(-player)\n", + "\n", + " #get the current players pieces that will be moved\n", + " current_positions = self.get_player_pieces(player)\n", + "\n", + " #creating the first part of the moves\n", + " part_moves = []\n", + "\n", + " empty_locations = self.get_empty_positions()\n", + "\n", + " #pair the locations of current positions with all empty locations on the board\n", + " for position in current_positions:\n", + " for location in empty_locations:\n", + " part_moves.append((position, location))\n", + "\n", + " #finding the part moves that create mills, then pairing them accordingly with enemy pieces to beat\n", + " #get moves -> for each move_location, check if a mill is formed (check row(s))\n", + " mill_moves = self.get_possible_mills(part_moves, player)\n", + "\n", + " for move in part_moves:\n", + " if move in mill_moves:\n", + " for enemy in enemies_to_take:\n", + " moves.append((move[0],move[1],enemy))\n", + " else:\n", + " moves.append((move[0],move[1],'none'))\n", + "\n", + " return list(moves)\n", + "\n", + " \"\"\"\n", + " checks if the given player has any legal moves on the current board\n", + " \"\"\"\n", + " def has_legal_moves(self, player):\n", + " \"\"\"\n", + " Returns:\n", + " Boolean: has legal moves\n", + " \"\"\"\n", + " if (len(self.get_legal_moves(player)) > 0):\n", + " return True\n", + " return False\n", + "\n", + " '''\n", + " Rotates the board three times, each time creating a pair of the rotated\n", + " board and the rotated vector of legal moves.\n", + " Uses a shift vector for the board to calculate the new position for each\n", + " index in the array and a lookup list for the vector of legal moves.\n", + " '''\n", + " def get_board_rotations(self, pi, all_moves, policy_rotation_vector):\n", + " \"\"\"\n", + " Input:\n", + " pi: the legal move vector\n", + " all_moves: list with all legal moves\n", + " policy_rotation_vector: lookup list for the vector of legal moves\n", + "\n", + " Returns:\n", + " rotated_results: list of Tuples (image, legal_moves)\n", + " \"\"\"\n", + " #vector to rotate the board 90 degrees -> move each ring by two positions\n", + " rot90_vector = [2,2,2,2,2,2,-6,-6,2,2,2,2,2,2,-6,-6,2,2,2,2,2,2,-6,-6]\n", + "\n", + " old_board, placements = self.piecesToArray()\n", + " new_board = np.zeros((24), dtype = int)\n", + " new_pi = np.zeros((len(all_moves)), dtype = int)\n", + "\n", + " rotated_results = []\n", + "\n", + " #rotates the board 3 times\n", + " for i in range(3):\n", + " index = 0\n", + " while index < 24:\n", + " new_board[index+rot90_vector[index]]= np.copy(old_board[index])\n", + " index+=1\n", + "\n", + " index = 0\n", + " while index < len(all_moves):\n", + " new_pi[policy_rotation_vector[index]] = np.copy(pi[index])\n", + " index += 1\n", + "\n", + " rotated_results += [(self.arrayToImage(new_board, placements),new_pi)]\n", + " #print(\"rotating\")\n", + " #print(old_board)\n", + " old_board = np.copy(new_board)\n", + " #print(new_board)\n", + " pi = np.copy(new_pi)\n", + "\n", + " i+=1\n", + "\n", + " return rotated_results\n", + "\n", + "\n", + " \"\"\"\n", + " Exectues a move on the current board for the given player\n", + " \"\"\"\n", + " def execute_move(self, player, move_index, all_moves):\n", + " \"\"\"\n", + " Input:\n", + " player: the legal move vector\n", + " move_index: index for the move in the all_moves list\n", + " all_moves: list with all legal moves\n", + " \"\"\"\n", + " move = all_moves[move_index]\n", + " assert(len(move)==3) #move is a tuple of length 3\n", + " board, placements = self.piecesToArray()\n", + " assert(0 <= placements[0] <= 18)\n", + " assert(len(board) == 24)\n", + "\n", + " count_placements, current_moves = placements\n", + " if self.get_game_phase(player) == 0:\n", + " count_placements += 1\n", + " if move[0] != 'none':\n", + " board[move[0]] = 0\n", + " if move[2] != 'none':\n", + " board[move[2]] = 0\n", + " current_moves = 0\n", + " elif move[2] == 'none':\n", + " current_moves += 1\n", + " board[move[1]] = player\n", + " if current_moves > 50:\n", + " print(current_moves)\n", + "\n", + " placements = (count_placements, current_moves)\n", + "\n", + " image = self.arrayToImage(board, placements)\n", + " self.pieces = np.copy(image)\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3NLjJT-FwtFO" + }, + "source": [ + "#game abstract" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nyza2si6d_2m" + }, + "outputs": [], + "source": [ + "class Game():\n", + " \"\"\"\n", + " This class specifies the base Game class. To define your own game, subclass\n", + " this class and implement the functions below. This works when the game is\n", + " two-player, adversarial and turn-based.\n", + "\n", + " Use 1 for player1 and -1 for player2.\n", + "\n", + " See othello/OthelloGame.py for an example implementation.\n", + " \"\"\"\n", + " def __init__(self):\n", + " pass\n", + "\n", + " def getInitBoard(self):\n", + " \"\"\"\n", + " Returns:\n", + " startBoard: a representation of the board (ideally this is the form\n", + " that will be the input to your neural network)\n", + " \"\"\"\n", + " pass\n", + "\n", + " def getBoardSize(self):\n", + " \"\"\"\n", + " Returns:\n", + " (x,y): a tuple of board dimensions\n", + " \"\"\"\n", + " pass\n", + "\n", + " def getActionSize(self):\n", + " \"\"\"\n", + " Returns:\n", + " actionSize: number of all possible actions\n", + " \"\"\"\n", + " pass\n", + "\n", + " def getNextState(self, board, player, action):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + " player: current player (1 or -1)\n", + " action: action taken by current player\n", + "\n", + " Returns:\n", + " nextBoard: board after applying action\n", + " nextPlayer: player who plays in the next turn (should be -player)\n", + " \"\"\"\n", + " pass\n", + "\n", + " def getValidMoves(self, board, player):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + " player: current player\n", + "\n", + " Returns:\n", + " validMoves: a binary vector of length self.getActionSize(), 1 for\n", + " moves that are valid from the current board and player,\n", + " 0 for invalid moves\n", + " \"\"\"\n", + " pass\n", + "\n", + " def getGameEnded(self, board, player):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " r: 0 if game has not ended. 1 if player won, -1 if player lost,\n", + " small non-zero value for draw.\n", + "\n", + " \"\"\"\n", + " pass\n", + "\n", + " def getCanonicalForm(self, board, player):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + " player: current player (1 or -1)\n", + "\n", + " Returns:\n", + " canonicalBoard: returns canonical form of board. The canonical form\n", + " should be independent of player. For e.g. in chess,\n", + " the canonical form can be chosen to be from the pov\n", + " of white. When the player is white, we can return\n", + " board as is. When the player is black, we can invert\n", + " the colors and return the board.\n", + " \"\"\"\n", + " pass\n", + "\n", + " def getSymmetries(self, board, pi):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + " pi: policy vector of size self.getActionSize()\n", + "\n", + " Returns:\n", + " symmForms: a list of [(board,pi)] where each tuple is a symmetrical\n", + " form of the board and the corresponding pi vector. This\n", + " is used when training the neural network from examples.\n", + " \"\"\"\n", + " pass\n", + "\n", + " def stringRepresentation(self, board):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + "\n", + " Returns:\n", + " boardString: a quick conversion of board to a string format.\n", + " Required by MCTS for hashing.\n", + " \"\"\"\n", + " pass\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ThoL-2gCwwmV" + }, + "source": [ + "#ninemensmorris game" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ff2tHe0-abqC" + }, + "outputs": [], + "source": [ + "from __future__ import print_function\n", + "import sys\n", + "sys.path.append('..')\n", + "import numpy as np\n", + "import copy\n", + "\n", + "'''\n", + "Author: Jonas Jakob\n", + "Created: May 31, 2023\n", + "\n", + "Implementation of the Game Class for NineMensMorris\n", + "Many of these functions are based on those from OthelloGame.py:\n", + "https://github.com/suragnair/alpha-zero-general/blob/master/othello/OthelloGame.py\n", + "\n", + "'''\n", + "class NineMensMorrisGame(Game):\n", + "\n", + " \"\"\"\n", + " inititalizes the list of all possible moves, the policy rotation vector and\n", + " the number of moves without a mill to determine a draw\n", + " \"\"\"\n", + " def __init__(self):\n", + " self.n = 5\n", + " self.all_moves = self.get_all_moves()\n", + " self.policy_rotation_vector = self.get_policy_roation90()\n", + " self.MAX_MOVES_WITHOUT_MILL = 200\n", + "\n", + " \"\"\"\n", + " Gets the list of all possible moves\n", + " \"\"\"\n", + " def get_all_moves(self):\n", + " \"\"\"\n", + " Returns:\n", + " moves: A list with all possible moves for the game\n", + " \"\"\"\n", + " moves = self.get_all_moves_phase_zero() + self.get_all_moves_phase_one_and_two()\n", + " return list(moves)\n", + "\n", + " \"\"\"\n", + " Gets the lookup list for the rotation of the vector of legal moves\n", + " \"\"\"\n", + " def get_policy_roation90(self):\n", + " \"\"\"\n", + " Returns:\n", + " rotation90: lookup list for the rotation of the legal moves vector\n", + " \"\"\"\n", + "\n", + " rotation90 = [-1] * len(self.all_moves)\n", + "\n", + " i = 0\n", + " while i < len(self.all_moves):\n", + "\n", + " move = self.all_moves[i]\n", + " rotatedmove = self.rotate(move)\n", + " newindex = self.all_moves.index(rotatedmove)\n", + " rotation90[i] = newindex\n", + "\n", + " i+=1\n", + "\n", + " return rotation90\n", + "\n", + " \"\"\"\n", + " Rotates a move by 90 degrees\n", + " \"\"\"\n", + " def rotate(self, move):\n", + " \"\"\"\n", + " Input:\n", + " move: Tuple (origin, destination, piece to take)\n", + " Returns:\n", + " rot_move: Tuple (neworigin, newdestination, newpiece to take)\n", + " \"\"\"\n", + " if move[0] == 'none':\n", + " neworigin = 'none'\n", + "\n", + " elif move[0] in [6,7,14,15,22,23]:\n", + " neworigin = move[0] - 6\n", + "\n", + " else:\n", + " neworigin = move[0] + 2\n", + "\n", + " if move[1] in [6,7,14,15,22,23]:\n", + " newdestination = move[1] - 6\n", + "\n", + " else:\n", + " newdestination = move[1] + 2\n", + "\n", + " if move[2] == 'none':\n", + " newenemy = 'none'\n", + "\n", + " elif move[2] in [6,7,14,15,22,23]:\n", + " newenemy = move[2] - 6\n", + "\n", + " else:\n", + " newenemy = move[2] + 2\n", + "\n", + " return (neworigin, newdestination, newenemy)\n", + "\n", + " \"\"\"\n", + " Generates all possible moves for game phase zero\n", + " \"\"\"\n", + " def get_all_moves_phase_zero(self):\n", + " \"\"\"\n", + " Returns:\n", + " moves: list of all possible move Tuples\n", + " \"\"\"\n", + "\n", + " moves = []\n", + " index = 0\n", + "\n", + " while index < 24:\n", + "\n", + " moves.append((\"none\",index,\"none\"))\n", + " count = 0\n", + "\n", + " while count < 24:\n", + "\n", + " if count != index:\n", + "\n", + " moves.append((\"none\",index,count))\n", + "\n", + " count += 1\n", + "\n", + " index += 1\n", + "\n", + " return list(moves)\n", + "\n", + " \"\"\"\n", + " Generates all possible moves for game phase one and two\n", + " \"\"\"\n", + " def get_all_moves_phase_one_and_two(self):\n", + " \"\"\"\n", + " Returns:\n", + " moves: list of all possible move Tuples\n", + " \"\"\"\n", + "\n", + " moves = []\n", + " index_origin = 0\n", + "\n", + " while index_origin < 24:\n", + "\n", + " index_move = 0\n", + "\n", + " while index_move < 24:\n", + "\n", + " if index_move != index_origin:\n", + "\n", + " moves.append((index_origin,index_move,\"none\"))\n", + "\n", + " count = 0\n", + "\n", + " while count <24:\n", + "\n", + " if (count != index_move)and(count != index_origin):\n", + "\n", + " moves.append((index_origin,index_move,count))\n", + "\n", + " count += 1\n", + "\n", + " index_move += 1\n", + "\n", + " index_origin += 1\n", + "\n", + " return list(moves)\n", + " \"\"\"\n", + " based on Othellogame.py\n", + " Gets the initial form of the board in game phase zero\n", + " \"\"\"\n", + " def getInitBoard(self):\n", + " \"\"\"\n", + " Returns:\n", + " board: the initial board configuration\n", + " \"\"\"\n", + " b = Board()\n", + "\n", + " return np.array(b.pieces)\n", + "\n", + " \"\"\"\n", + " based on Othellogame.py\n", + " Gets the size of the board image in a Tuple (x, y)\n", + " \"\"\"\n", + " def getBoardSize(self):\n", + " \"\"\"\n", + " Returns:\n", + " dimensions: a Tuple with the board dimensions\n", + " \"\"\"\n", + " return (6, 6)\n", + "\n", + " \"\"\"\n", + " based on Othellogame.py\n", + " Gets the number of all possible actions\n", + " \"\"\"\n", + " def getActionSize(self):\n", + " \"\"\"\n", + " Returns:\n", + " actionssize: number of all moves\n", + " \"\"\"\n", + " return len(self.all_moves)\n", + "\n", + " \"\"\"\n", + " based on Othellogame.py\n", + " Returns the next state to given a board, player and move\n", + " \"\"\"\n", + " def getNextState(self, board, player, move):\n", + " \"\"\"\n", + " Input:\n", + " board: current board image\n", + " player: current player (1 or -1)\n", + " move: move Tuple\n", + "\n", + " Returns:\n", + " new_state: Tuple (new board, next player)\n", + " \"\"\"\n", + " b = Board()\n", + " b.pieces = np.copy(board)\n", + "\n", + " b.execute_move(player, move, self.all_moves)\n", + "\n", + " return (b.pieces, -player)\n", + "\n", + " \"\"\"\n", + " based on Othellogame.py\n", + " Gets a vector of size == ActionSize that marks legal moves for the current\n", + " board and player with 1\n", + " \"\"\"\n", + " def getValidMoves(self, board, player):\n", + " \"\"\"\n", + " Input:\n", + " board: current board image\n", + " player current player (1 or -1)\n", + " Returns:\n", + " valid_moves: np array of ones and zeros marking the legal moves\n", + " \"\"\"\n", + " b = Board()\n", + " b.pieces = np.copy(board)\n", + "\n", + " valid_moves = b.get_legal_move_vector(player, self.all_moves)\n", + "\n", + " return np.array(valid_moves)\n", + "\n", + " \"\"\"\n", + " based on Othellogame.py\n", + " Determines if the game has ended for the given board and player.\n", + " \"\"\"\n", + " def getGameEnded(self, board, player):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + " player: current player (1 or -1)\n", + " Returns:\n", + " game_ended: 0 if game has not ended. 1 if player won, -1 if player\n", + " lost, small non-zero value for draw.\n", + " \"\"\"\n", + " assert(not isinstance(board, str))\n", + "\n", + " b = Board()\n", + " b.pieces = np.copy(board)\n", + "\n", + " if b.pieces[4][1] >= 50:\n", + " return 0.0001\n", + " elif not b.has_legal_moves(player):\n", + " return -1\n", + " elif not b.has_legal_moves(-player):\n", + " return 1\n", + " elif len(b.get_player_pieces(player)) < 3 and b.pieces[4][0] == 18:\n", + " return -1\n", + " elif len(b.get_player_pieces(-player)) < 3 and b.pieces[4][0] == 18:\n", + " return 1\n", + " elif b.has_legal_moves(-player) and b.has_legal_moves(player):\n", + " return 0\n", + "\n", + " \"\"\"\n", + " Based on Othellogame.py\n", + " Multiplies each element with the given player, resulting in a canonical\n", + " board from the perspective of the given player. The given players pieces\n", + " are always represented as 1 in the Canonical Form.\n", + " Note: no true canonical form\n", + " \"\"\"\n", + " def getCanonicalForm(self, board, player):\n", + " \"\"\"\n", + " Input:\n", + " board: current board\n", + " player: current player (1 or -1)\n", + " Returns:\n", + " b: canonical board\n", + " \"\"\"\n", + " b = np.zeros((6,6), dtype=int)\n", + " count_placements = copy.deepcopy(board[4][0])\n", + " current_moves = copy.deepcopy(board[4][1])\n", + " index = 0\n", + " while index < 4:\n", + " item = 0\n", + " while item < 6:\n", + " b[index][item] = board[index][item] * player\n", + " item += 1\n", + " index += 1\n", + "\n", + " b[4][0] = count_placements\n", + " b[4][1] = current_moves\n", + " return b\n", + " \"\"\"\n", + " Based on Othellogame.py\n", + " Gets some Symmetries by rotating the board three times, each time also\n", + " adapting the legal moves vector to the new board\n", + " \"\"\"\n", + " def getSymmetries(self, board, pi):\n", + " \"\"\"\n", + " Input:\n", + " board: the current board\n", + " pi: the legal moves vector for the current board\n", + " Returns:\n", + " results: three board rotations\n", + " \"\"\"\n", + "\n", + " assert(len(pi) == len(self.all_moves))\n", + " b = Board()\n", + " b.pieces = np.copy(board)\n", + "\n", + " results = b.get_board_rotations(pi, self.all_moves, self.policy_rotation_vector)\n", + "\n", + " return results\n", + "\n", + " \"\"\"\n", + " Gets a String representation for the board, used for hashing in mcts\n", + " \"\"\"\n", + " def stringRepresentation(self, board):\n", + " \"\"\"\n", + " Input:\n", + " board: the current board\n", + " Returns:\n", + " board_s: String representation of the board\n", + " \"\"\"\n", + " board_s = \"\"\n", + " index = 0\n", + " i = 0\n", + " while i < 4:\n", + " while index < 6:\n", + " board_s = board_s + str(board[i][index]) + \",\"\n", + " index += 1\n", + " index = 0\n", + " i += 1\n", + " board_s = board_s + str(board[4][0]) + \",\"\n", + " board_s = board_s + str(board[4][1])\n", + "\n", + " return board_s\n", + "\n", + " \"\"\"\n", + " Gets a readable String representation for the board\n", + " \"\"\"\n", + " def stringRepresentationReadable(self, board):\n", + " \"\"\"\n", + " Input:\n", + " board: the current board\n", + " Returns:\n", + " board_s: String representation of the board\n", + " \"\"\"\n", + " board_s = \"\"\n", + " index = 0\n", + " i = 0\n", + " while i < 4:\n", + " while index < 6:\n", + " board_s = board_s + str(board[i][index]) + \",\"\n", + " index += 1\n", + " index = 0\n", + " i += 1\n", + " board_s = board_s + str(board[4][0]) + \",\"\n", + " board_s = board_s + str(board[4][1])\n", + "\n", + " return board_s\n", + "\n", + " @staticmethod\n", + " def display(boardd):\n", + " board = Board()\n", + " board.pieces = np.copy(boardd)\n", + " board, stuff = board.piecesToArray()\n", + " assert(0 <= stuff[0] <= 18)\n", + " assert(len(board) == 24)\n", + "\n", + " print('{}________ {} ________{}').format(board[0], board[1], board[2]))\n", + " print('| | | ')\n", + " print(' {} {} {} '.format(board[8], board[9], board[10]))\n", + " print('| | | | | ')\n", + " print('| | {}__ {} __{} '.format(board[16], board[17], board[18]))\n", + " print('| | | | | | ')\n", + " print('{}-{}-{} {}-{}-{}'.format(board[7], board[15], board[23], board[19], board[11], board[3]))\n", + " print('| | | | | | ')\n", + " print('| | {}__ {} __{} '.format(board[22], board[21], board[20]))\n", + " print('| | | | | ')\n", + " print('| {}_____ {} _____{} '.format(board[14], board[13], board[12]))\n", + " print('| | | ', )\n", + " print('{} _______ {} ______ {} '.format(board[6], board[5], board[4]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7O2ZjnfNw6vG" + }, + "source": [ + "#neuralnet abstract" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZdA-JX5tjw_a" + }, + "outputs": [], + "source": [ + "class NeuralNet():\n", + " \"\"\"\n", + " This class specifies the base NeuralNet class. To define your own neural\n", + " network, subclass this class and implement the functions below. The neural\n", + " network does not consider the current player, and instead only deals with\n", + " the canonical form of the board.\n", + "\n", + " See othello/NNet.py for an example implementation.\n", + " \"\"\"\n", + "\n", + " def __init__(self, game):\n", + " pass\n", + "\n", + " def train(self, examples):\n", + " \"\"\"\n", + " This function trains the neural network with examples obtained from\n", + " self-play.\n", + "\n", + " Input:\n", + " examples: a list of training examples, where each example is of form\n", + " (board, pi, v). pi is the MCTS informed policy vector for\n", + " the given board, and v is its value. The examples has\n", + " board in its canonical form.\n", + " \"\"\"\n", + " pass\n", + "\n", + " def predict(self, board):\n", + " \"\"\"\n", + " Input:\n", + " board: current board in its canonical form.\n", + "\n", + " Returns:\n", + " pi: a policy vector for the current board- a numpy array of length\n", + " game.getActionSize\n", + " v: a float in [-1,1] that gives the value of the current board\n", + " \"\"\"\n", + " pass\n", + "\n", + " def save_checkpoint(self, folder, filename):\n", + " \"\"\"\n", + " Saves the current neural network (with its parameters) in\n", + " folder/filename\n", + " \"\"\"\n", + " pass\n", + "\n", + " def load_checkpoint(self, folder, filename):\n", + " \"\"\"\n", + " Loads parameters of the neural network from folder/filename\n", + " \"\"\"\n", + " pass\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zq62-4gQw2VG" + }, + "source": [ + "#ninemensmorris nnet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IEJTGFxjYqdb", + "outputId": "6711281c-74b4-4a7c-d8ee-6d688a8cc4e0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: tensorflow in /usr/local/lib/python3.10/dist-packages (2.12.0)\n", + "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.4.0)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=2.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (23.5.26)\n", + "Requirement already satisfied: gast<=0.4.0,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.4.0)\n", + "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.2.0)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.56.0)\n", + "Requirement already satisfied: h5py>=2.9.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.8.0)\n", + "Requirement already satisfied: jax>=0.3.15 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.4.13)\n", + "Requirement already satisfied: keras<2.13,>=2.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.12.0)\n", + "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (16.0.6)\n", + "Requirement already satisfied: numpy<1.24,>=1.22 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.22.4)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.3.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorflow) (23.1)\n", + "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.20.3)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tensorflow) (67.7.2)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0)\n", + "Requirement already satisfied: tensorboard<2.13,>=2.12 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.12.3)\n", + "Requirement already satisfied: tensorflow-estimator<2.13,>=2.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.12.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.3.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (4.7.1)\n", + "Requirement already satisfied: wrapt<1.15,>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.14.1)\n", + "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.32.0)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow) (0.40.0)\n", + "Requirement already satisfied: ml-dtypes>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from jax>=0.3.15->tensorflow) (0.2.0)\n", + "Requirement already satisfied: scipy>=1.7 in /usr/local/lib/python3.10/dist-packages (from jax>=0.3.15->tensorflow) (1.10.1)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (2.17.3)\n", + "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (1.0.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (3.4.3)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (2.27.1)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (0.7.1)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.13,>=2.12->tensorflow) (2.3.6)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (5.3.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow) (1.3.1)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (2023.5.7)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow) (3.4)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard<2.13,>=2.12->tensorflow) (2.1.3)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow) (0.5.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow) (3.2.2)\n" + ] + } + ], + "source": [ + "pip install tensorflow" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wWPJXi4RXf1a" + }, + "source": [ + "keras" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XQpQagYH-o0f" + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('..')\n", + "\n", + "import argparse\n", + "import tensorflow as tf\n", + "\n", + "\"\"\"\n", + "Copied from:\n", + "https://github.com/suragnair/alpha-zero-general/blob/master/othello/keras/OthelloNNet.py\n", + "\"\"\"\n", + "class NOTACTIVENineMensMorrisNNet():\n", + " def __init__(self, game, args):\n", + " # game params\n", + " self.board_x, self.board_y = game.getBoardSize()\n", + " self.action_size = game.getActionSize()\n", + " self.args = args\n", + "\n", + " # Neural Net\n", + " self.input_boards = tf.keras.Input(shape=(self.board_x, self.board_y)) # s: batch_size x board_x x board_y\n", + "\n", + " x_image = tf.keras.layers.Reshape((self.board_x, self.board_y, 1))(self.input_boards) # batch_size x board_x x board_y x 1\n", + " h_conv1 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(args.num_channels, 3, padding='same', use_bias=False)(x_image))) # batch_size x board_x x board_y x num_channels\n", + " h_conv2 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(args.num_channels, 3, padding='same', use_bias=False)(h_conv1))) # batch_size x board_x x board_y x num_channels\n", + " h_conv3 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(args.num_channels, 3, padding='valid', use_bias=False)(h_conv2))) # batch_size x (board_x-2) x (board_y-2) x num_channels\n", + " h_conv4 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(args.num_channels, 3, padding='valid', use_bias=False)(h_conv3))) # batch_size x (board_x-4) x (board_y-4) x num_channels\n", + " h_conv4_flat = tf.keras.layers.Flatten()(h_conv4)\n", + " s_fc1 = tf.keras.layers.Dropout(args.dropout)(tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=1)(tf.keras.layers.Dense(1024, use_bias=False)(h_conv4_flat)))) # batch_size x 1024\n", + " s_fc2 = tf.keras.layers.Dropout(args.dropout)(tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=1)(tf.keras.layers.Dense(512, use_bias=False)(s_fc1)))) # batch_size x 1024\n", + " self.pi = tf.keras.layers.Dense(self.action_size, activation='softmax', name='pi')(s_fc2) # batch_size x self.action_size\n", + " self.v = tf.keras.layers.Dense(1, activation='tanh', name='v')(s_fc2) # batch_size x 1\n", + "\n", + " self.model = tf.keras.Model(inputs=self.input_boards, outputs=[self.pi, self.v])\n", + " self.model.compile(loss=['categorical_crossentropy','mean_squared_error'], optimizer=tf.keras.optimizers.Adam(args.lr))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T-Yaw5VTXhuu" + }, + "source": [ + "pytorch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Fz3UF6nZhPHX" + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('..')\n", + "\n", + "import argparse\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "\n", + "\"\"\"\n", + "Copied from:\n", + "https://github.com/suragnair/alpha-zero-general/blob/master/othello/pytorch/OthelloNNet.py\n", + "\"\"\"\n", + "class NineMensMorrisNNet(nn.Module):\n", + " def __init__(self, game, args):\n", + " # game params\n", + " self.board_x, self.board_y = game.getBoardSize()\n", + " self.action_size = game.getActionSize()\n", + " self.args = args\n", + "\n", + " super(NineMensMorrisNNet, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, args.num_channels, 3, stride=1, padding=1)\n", + " self.conv2 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)\n", + " self.conv3 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1)\n", + " self.conv4 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1)\n", + "\n", + " self.bn1 = nn.BatchNorm2d(args.num_channels)\n", + " self.bn2 = nn.BatchNorm2d(args.num_channels)\n", + " self.bn3 = nn.BatchNorm2d(args.num_channels)\n", + " self.bn4 = nn.BatchNorm2d(args.num_channels)\n", + "\n", + " self.fc1 = nn.Linear(args.num_channels*(self.board_x-4)*(self.board_y-4), 1024)\n", + " self.fc_bn1 = nn.BatchNorm1d(1024)\n", + "\n", + " self.fc2 = nn.Linear(1024, 512)\n", + " self.fc_bn2 = nn.BatchNorm1d(512)\n", + "\n", + " self.fc3 = nn.Linear(512, self.action_size)\n", + "\n", + " self.fc4 = nn.Linear(512, 1)\n", + "\n", + " def forward(self, s):\n", + " # s: batch_size x board_x x board_y\n", + " s = s.view(-1, 1, self.board_x, self.board_y) # batch_size x 1 x board_x x board_y\n", + " s = F.relu(self.bn1(self.conv1(s))) # batch_size x num_channels x board_x x board_y\n", + " s = F.relu(self.bn2(self.conv2(s))) # batch_size x num_channels x board_x x board_y\n", + " s = F.relu(self.bn3(self.conv3(s))) # batch_size x num_channels x (board_x-2) x (board_y-2)\n", + " s = F.relu(self.bn4(self.conv4(s))) # batch_size x num_channels x (board_x-4) x (board_y-4)\n", + " s = s.view(-1, self.args.num_channels*(self.board_x-4)*(self.board_y-4))\n", + "\n", + " s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.args.dropout, training=self.training) # batch_size x 1024\n", + " s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.args.dropout, training=self.training) # batch_size x 512\n", + "\n", + " pi = self.fc3(s) # batch_size x action_size\n", + " v = self.fc4(s) # batch_size x 1\n", + "\n", + " return F.log_softmax(pi, dim=1), torch.tanh(v)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vnsjORg2w-9v" + }, + "source": [ + "#nnetwrapper" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "68YfEWCyX_En" + }, + "source": [ + "keras" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i8rIGM28YBSh" + }, + "outputs": [], + "source": [ + "import argparse\n", + "import os\n", + "import shutil\n", + "import time\n", + "import random\n", + "import numpy as np\n", + "import math\n", + "import sys\n", + "sys.path.append('../..')\n", + "\n", + "import argparse\n", + "\n", + "\n", + "\n", + "# args = dotdict({\n", + "# 'lr': 0.001,\n", + "# 'dropout': 0.3,\n", + "# 'epochs': 10,\n", + "# 'batch_size': 64,\n", + "# 'cuda': False,\n", + "# 'num_channels': 512,\n", + "# })\n", + "\n", + "\"\"\"\n", + "Copied from:\n", + "https://github.com/suragnair/alpha-zero-general/blob/master/othello/keras/NNet.py\n", + "\"\"\"\n", + "class NOTACTIVENNetWrapper(NeuralNet):\n", + " def __init__(self, game):\n", + " self.nnet = NineMensMorrisNNet(game, args)\n", + " self.board_x, self.board_y = game.getBoardSize()\n", + " self.action_size = game.getActionSize()\n", + "\n", + " def train(self, examples):\n", + " \"\"\"\n", + " examples: list of examples, each example is of form (board, pi, v)\n", + " \"\"\"\n", + "\n", + " input_boards, target_pis, target_vs = list(zip(*examples))\n", + " input_boards = np.asarray(input_boards)\n", + " target_pis = np.asarray(target_pis)\n", + " target_vs = np.asarray(target_vs)\n", + " self.nnet.model.fit(x = input_boards, y = [target_pis, target_vs], batch_size = args.batch_size, epochs = args.epochs)\n", + "\n", + " def predict(self, board):\n", + " \"\"\"\n", + " board: np array with board\n", + " \"\"\"\n", + " # timing\n", + " start = time.time()\n", + "\n", + " # preparing input\n", + " board = board[np.newaxis, :, :]\n", + "\n", + " # run\n", + "\n", + " pi, v = self.nnet.model.predict(board, verbose=False)\n", + "\n", + " print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start))\n", + " return pi[0], v[0]\n", + "\n", + " def save_checkpoint(self, folder, filename):\n", + " # change extension\n", + " filename = filename.split(\".\")[0] + \".h5\"\n", + "\n", + " filepath = folder + filename\n", + " if not os.path.exists(filepath):\n", + " print(\"Checkpoint Directory does not exist! Making directory {}\".format(folder))\n", + " os.mkdir(filepath)\n", + " else:\n", + " print(\"Checkpoint Directory exists! \")\n", + " self.nnet.model.save_weights(filepath)\n", + "\n", + " def load_checkpoint(self, folder, filename):\n", + " # change extension\n", + " filename = filename.split(\".\")[0] + \".h5\"\n", + "\n", + " # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98\n", + " filepath = folder + filename\n", + " if not os.path.exists(filepath):\n", + " raise(\"No model in path {}\".format(filepath))\n", + "\n", + " self.nnet.model.load_weights(filepath)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MmIrV-WxX88H" + }, + "source": [ + "pytorch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mLarZIHfjpxi" + }, + "outputs": [], + "source": [ + "\n", + "import os\n", + "import sys\n", + "import time\n", + "\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "\n", + "sys.path.append('../../')\n", + "\n", + "import torch\n", + "import torch.optim as optim\n", + "\n", + "args = dotdict({\n", + " 'lr': 0.001,\n", + " 'dropout': 0.3,\n", + " 'epochs': 10,\n", + " 'batch_size': 64,\n", + " 'cuda': torch.cuda.is_available(),\n", + " 'num_channels': 512,\n", + "})\n", + "\n", + "\"\"\"\n", + "Copied from:\n", + "https://github.com/suragnair/alpha-zero-general/blob/master/othello/pytorch/NNet.py\n", + "Adapted the load and save methods to save and load in google drive from colab\n", + "\"\"\"\n", + "class NNetWrapper(NeuralNet):\n", + " def __init__(self, game):\n", + " self.nnet = NineMensMorrisNNet(game, args)\n", + " self.board_x, self.board_y = game.getBoardSize()\n", + " self.action_size = game.getActionSize()\n", + "\n", + " if args.cuda:\n", + " #self.nnet.cuda.set_device({\"cuda:0\"})\n", + " self.nnet.to(\"cuda:0\")\n", + "\n", + " def train(self, examples):\n", + " \"\"\"\n", + " examples: list of examples, each example is of form (board, pi, v)\n", + " \"\"\"\n", + " optimizer = optim.Adam(self.nnet.parameters())\n", + "\n", + " for epoch in range(args.epochs):\n", + " print('EPOCH ::: ' + str(epoch + 1))\n", + " self.nnet.train()\n", + " pi_losses = AverageMeter()\n", + " v_losses = AverageMeter()\n", + "\n", + " batch_count = int(len(examples) / args.batch_size)\n", + "\n", + " t = tqdm(range(batch_count), desc='Training Net')\n", + " for _ in t:\n", + " sample_ids = np.random.randint(len(examples), size=args.batch_size)\n", + " boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))\n", + " boards = torch.FloatTensor(np.array(boards).astype(np.float64))\n", + " target_pis = torch.FloatTensor(np.array(pis))\n", + " target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))\n", + "\n", + " # predict\n", + " if args.cuda:\n", + " boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()\n", + "\n", + " # compute output\n", + " out_pi, out_v = self.nnet(boards)\n", + " l_pi = self.loss_pi(target_pis, out_pi)\n", + " l_v = self.loss_v(target_vs, out_v)\n", + " total_loss = l_pi + l_v\n", + "\n", + " # record loss\n", + " pi_losses.update(l_pi.item(), boards.size(0))\n", + " v_losses.update(l_v.item(), boards.size(0))\n", + " t.set_postfix(Loss_pi=pi_losses, Loss_v=v_losses)\n", + "\n", + " # compute gradient and do SGD step\n", + " optimizer.zero_grad()\n", + " total_loss.backward()\n", + " optimizer.step()\n", + "\n", + " def predict(self, board):\n", + " \"\"\"\n", + " board: np array with board\n", + " \"\"\"\n", + " # timing\n", + " start = time.time()\n", + "\n", + " # preparing input\n", + " board = torch.FloatTensor(board.astype(np.float64))\n", + " if args.cuda: board = board.contiguous().cuda()\n", + " board = board.view(1, self.board_x, self.board_y)\n", + " self.nnet.eval()\n", + " with torch.no_grad():\n", + " pi, v = self.nnet(board)\n", + "\n", + " # print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start))\n", + " return torch.exp(pi).data.cpu().numpy()[0], v.data.cpu().numpy()[0]\n", + "\n", + " def loss_pi(self, targets, outputs):\n", + " return -torch.sum(targets * outputs) / targets.size()[0]\n", + "\n", + " def loss_v(self, targets, outputs):\n", + " return torch.sum((targets - outputs.view(-1)) ** 2) / targets.size()[0]\n", + "\n", + " def save_checkpoint(self, folder, filename):\n", + " filepath = folder + filename\n", + " if not os.path.exists(folder):\n", + " print(\"Checkpoint Directory does not exist! Making directory {}\".format(folder))\n", + " os.mkdir(folder)\n", + " else:\n", + " print(\"Checkpoint Directory exists! \")\n", + " print(\"saving to path '%s\", filepath)\n", + " torch.save({\n", + " 'state_dict': self.nnet.state_dict(),\n", + " }, filepath)\n", + "\n", + " def load_checkpoint(self, folder, filename):\n", + " # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98\n", + " filepath = folder + filename\n", + " print(\"Trying to load checkpoint\")\n", + " if not os.path.exists(filepath):\n", + " raise (\"No model in path {}\".format(filepath))\n", + " map_location = None if args.cuda else 'cpu'\n", + " checkpoint = torch.load(filepath, map_location=map_location)\n", + " self.nnet.load_state_dict(checkpoint['state_dict'])\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9yVzg3aNxCjq" + }, + "source": [ + "#arena" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "imADM0ifebT7" + }, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "from tqdm import tqdm\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "\"\"\"\n", + "Copied from:\n", + "https://github.com/suragnair/alpha-zero-general/blob/master/Arena.py\n", + "\"\"\"\n", + "class Arena():\n", + " \"\"\"\n", + " An Arena class where any 2 agents can be pit against each other.\n", + " \"\"\"\n", + "\n", + " def __init__(self, player1, player2, game, display=None):\n", + " \"\"\"\n", + " Input:\n", + " player 1,2: two functions that takes board as input, return action\n", + " game: Game object\n", + " display: a function that takes board as input and prints it (e.g.\n", + " display in othello/OthelloGame). Is necessary for verbose\n", + " mode.\n", + "\n", + " see othello/OthelloPlayers.py for an example. See pit.py for pitting\n", + " human players/other baselines with each other.\n", + " \"\"\"\n", + " self.player1 = player1\n", + " self.player2 = player2\n", + " self.game = game\n", + " self.display = display\n", + "\n", + " def playGame(self, verbose=False):\n", + " \"\"\"\n", + " Executes one episode of a game.\n", + "\n", + " Returns:\n", + " either\n", + " winner: player who won the game (1 if player1, -1 if player2)\n", + " or\n", + " draw result returned from the game that is neither 1, -1, nor 0.\n", + " \"\"\"\n", + " players = [self.player2, None, self.player1]\n", + " curPlayer = 1\n", + " board = self.game.getInitBoard()\n", + " it = 0\n", + " while self.game.getGameEnded(board, curPlayer) == 0:\n", + " it += 1\n", + " if verbose:\n", + " assert self.display\n", + " print(\"Turn \", str(it), \"Player \", str(curPlayer))\n", + " self.display(board)\n", + " action = players[curPlayer + 1](self.game.getCanonicalForm(board, curPlayer))\n", + "\n", + " valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer), 1)\n", + "\n", + " if valids[action] == 0:\n", + " log.error(f'Action {action} is not valid!')\n", + " log.debug(f'valids = {valids}')\n", + " assert valids[action] > 0\n", + " board, curPlayer = self.game.getNextState(board, curPlayer, action)\n", + " if verbose:\n", + " assert self.display\n", + " print(\"Game over: Turn \", str(it), \"Result \", str(self.game.getGameEnded(board, 1)))\n", + " self.display(board)\n", + " return curPlayer * self.game.getGameEnded(board, curPlayer)\n", + "\n", + " def playGames(self, num, verbose=False):\n", + " \"\"\"\n", + " Plays num games in which player1 starts num/2 games and player2 starts\n", + " num/2 games.\n", + "\n", + " Returns:\n", + " oneWon: games won by player1\n", + " twoWon: games won by player2\n", + " draws: games won by nobody\n", + " \"\"\"\n", + "\n", + " num = int(num / 2)\n", + " oneWon = 0\n", + " twoWon = 0\n", + " draws = 0\n", + " for _ in tqdm(range(num), desc=\"Arena.playGames (1)\"):\n", + " gameResult = self.playGame(verbose=verbose)\n", + " if gameResult == 1:\n", + " oneWon += 1\n", + " elif gameResult == -1:\n", + " twoWon += 1\n", + " else:\n", + " draws += 1\n", + "\n", + " self.player1, self.player2 = self.player2, self.player1\n", + "\n", + " for _ in tqdm(range(num), desc=\"Arena.playGames (2)\"):\n", + " gameResult = self.playGame(verbose=verbose)\n", + " if gameResult == -1:\n", + " oneWon += 1\n", + " elif gameResult == 1:\n", + " twoWon += 1\n", + " else:\n", + " draws += 1\n", + "\n", + " return oneWon, twoWon, draws\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PGxgTxQ-xE89" + }, + "source": [ + "#coach" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ezuDXRMAagtw" + }, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import sys\n", + "from collections import deque\n", + "from pickle import Pickler, Unpickler\n", + "from random import shuffle\n", + "\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "\"\"\"\n", + "Copied from:\n", + "https://github.com/suragnair/alpha-zero-general/blob/master/Coach.py\n", + "\"\"\"\n", + "class Coach():\n", + " \"\"\"\n", + " This class executes the self-play + learning. It uses the functions defined\n", + " in Game and NeuralNet. args are specified in main.py.\n", + " \"\"\"\n", + "\n", + " def __init__(self, game, nnet, args):\n", + " self.game = game\n", + " self.nnet = nnet\n", + " self.pnet = self.nnet.__class__(self.game) # the competitor network\n", + " self.args = args\n", + " self.mcts = MCTS(self.game, self.nnet, self.args)\n", + " self.trainExamplesHistory = [] # history of examples from args.numItersForTrainExamplesHistory latest iterations\n", + " self.skipFirstSelfPlay = False # can be overriden in loadTrainExamples()\n", + "\n", + " def executeEpisode(self):\n", + " \"\"\"\n", + " This function executes one episode of self-play, starting with player 1.\n", + " As the game is played, each turn is added as a training example to\n", + " trainExamples. The game is played till the game ends. After the game\n", + " ends, the outcome of the game is used to assign values to each example\n", + " in trainExamples.\n", + "\n", + " It uses a temp=1 if episodeStep < tempThreshold, and thereafter\n", + " uses temp=0.\n", + "\n", + " Returns:\n", + " trainExamples: a list of examples of the form (canonicalBoard, currPlayer, pi,v)\n", + " pi is the MCTS informed policy vector, v is +1 if\n", + " the player eventually won the game, else -1.\n", + " \"\"\"\n", + " trainExamples = []\n", + " board = self.game.getInitBoard()\n", + " self.curPlayer = 1\n", + " episodeStep = 0\n", + "\n", + " while True:\n", + " episodeStep += 1\n", + " canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer)\n", + " temp = int(episodeStep < self.args.tempThreshold)\n", + "\n", + " pi = self.mcts.getActionProb(canonicalBoard, temp=temp)\n", + " sym = self.game.getSymmetries(canonicalBoard, pi)\n", + " for b, p in sym:\n", + " trainExamples.append([b, self.curPlayer, p, None])\n", + "\n", + " action = np.random.choice(len(pi), p=pi)\n", + " board, self.curPlayer = self.game.getNextState(board, self.curPlayer, action)\n", + "\n", + " r = self.game.getGameEnded(board, self.curPlayer)\n", + "\n", + " if r != 0:\n", + " return [(x[0], x[2], r * ((-1) ** (x[1] != self.curPlayer))) for x in trainExamples]\n", + "\n", + " def learn(self):\n", + " \"\"\"\n", + " Performs numIters iterations with numEps episodes of self-play in each\n", + " iteration. After every iteration, it retrains neural network with\n", + " examples in trainExamples (which has a maximum length of maxlenofQueue).\n", + " It then pits the new neural network against the old one and accepts it\n", + " only if it wins >= updateThreshold fraction of games.\n", + " \"\"\"\n", + "\n", + " for i in range(1, self.args.numIters + 1):\n", + " # bookkeeping\n", + " log.info(f'Starting Iter #{i} ...')\n", + " # examples of the iteration\n", + " if not self.skipFirstSelfPlay or i > 1:\n", + " iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)\n", + "\n", + " for _ in tqdm(range(self.args.numEps), desc=\"Self Play\"):\n", + " self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree\n", + " iterationTrainExamples += self.executeEpisode()\n", + "\n", + " # save the iteration examples to the history\n", + " self.trainExamplesHistory.append(iterationTrainExamples)\n", + "\n", + " if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:\n", + " log.warning(\n", + " f\"Removing the oldest entry in trainExamples. len(trainExamplesHistory) = {len(self.trainExamplesHistory)}\")\n", + " self.trainExamplesHistory.pop(0)\n", + " # backup history to a file\n", + " # NB! the examples were collected using the model from the previous iteration, so (i-1)\n", + " self.saveTrainExamples(i - 1)\n", + "\n", + " # shuffle examples before training\n", + " trainExamples = []\n", + " for e in self.trainExamplesHistory:\n", + " trainExamples.extend(e)\n", + " shuffle(trainExamples)\n", + "\n", + " # training new network, keeping a copy of the old one\n", + " self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')\n", + " self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')\n", + " pmcts = MCTS(self.game, self.pnet, self.args)\n", + "\n", + " self.nnet.train(trainExamples)\n", + " nmcts = MCTS(self.game, self.nnet, self.args)\n", + "\n", + " log.info('PITTING AGAINST PREVIOUS VERSION')\n", + " arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),\n", + " lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)\n", + " pwins, nwins, draws = arena.playGames(self.args.arenaCompare)\n", + "\n", + " log.info('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))\n", + " if pwins + nwins == 0 or float(nwins) / (pwins + nwins) < self.args.updateThreshold:\n", + " log.info('REJECTING NEW MODEL')\n", + " self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')\n", + " else:\n", + " log.info('ACCEPTING NEW MODEL')\n", + " self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))\n", + " self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')\n", + "\n", + " def getCheckpointFile(self, iteration):\n", + " return 'checkpoint_' + str(iteration) + '.pth.tar'\n", + "\n", + " def saveTrainExamples(self, iteration):\n", + " folder = self.args.checkpoint\n", + " log.warning('trying to save')\n", + " if not os.path.exists(folder):\n", + " os.makedirs(folder)\n", + " filename = os.path.join(folder, self.getCheckpointFile(iteration) + \".examples\")\n", + " with open(filename, \"wb+\") as f:\n", + " Pickler(f).dump(self.trainExamplesHistory)\n", + " f.closed\n", + "\n", + " def loadTrainExamples(self):\n", + " log.warning('trying to load examples')\n", + " modelFile = os.path.join(self.args.load_folder_file[0], self.args.load_folder_file[1])\n", + " examplesFile = modelFile + \".examples\"\n", + " if not os.path.isfile(examplesFile):\n", + " log.warning(f'File \"{examplesFile}\" with trainExamples not found!')\n", + " r = input(\"Continue? [y|n]\")\n", + " if r != \"y\":\n", + " sys.exit()\n", + " else:\n", + " log.info(\"File with trainExamples found. Loading it...\")\n", + " with open(examplesFile, \"rb\") as f:\n", + " self.trainExamplesHistory = Unpickler(f).load()\n", + " log.info('Loading done!')\n", + "\n", + " # examples based on the model were already collected (loaded)\n", + " self.skipFirstSelfPlay = True\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LUTd1ujbxKkn" + }, + "source": [ + "install log package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T03CQAKwwErN", + "outputId": "e46bec37-3e67-44f8-8a8d-79725bf630e0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (15.0.1)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs) (10.0)\n" + ] + } + ], + "source": [ + "pip install coloredlogs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rL_bh5IdxPRo" + }, + "source": [ + "#main" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3oX53YeGZT4Q" + }, + "outputs": [], + "source": [ + "import logging\n", + "import coloredlogs\n", + "#NOTE -> TO SWITCH BETWEEN KERAS AND PYTORCH, CHANGE NAMES FROM NNET AND NNETWRAPPER\n", + "log = logging.getLogger(__name__)\n", + "\n", + "coloredlogs.install(level='INFO') # Change this to DEBUG to see more info.\n", + "\n", + "args = dotdict({\n", + " 'numIters': 10, # default 1000 -> takes too long\n", + " 'numEps': 1000, # Number of complete self-play games to simulate during a new iteration. default 100\n", + " 'tempThreshold': 15, # default 15\n", + " 'updateThreshold': 0.55, # During arena playoff, new neural net will be accepted if threshold or more of games are won. default 0.6\n", + " 'maxlenOfQueue': 200000, # Number of game examples to train the neural networks. default 200000\n", + " 'numMCTSSims': 25, # Number of games moves for MCTS to simulate. default 25\n", + " 'arenaCompare': 40, # Number of games to play during arena play to determine if new net will be accepted. default 40\n", + " 'cpuct': 1, # default 1\n", + "\n", + " 'checkpoint': '/content/drive/My Drive/training/checkpoint',\n", + " 'load_model': True,\n", + " 'load_folder_file': ('/content/drive/My Drive/training/20it/','best.pth.tar'),\n", + " 'numItersForTrainExamplesHistory': 20,\n", + "\n", + " 'lr': 0.005, #default 0.001\n", + " 'dropout': 0.3,\n", + " 'epochs': 10, #default 10\n", + " 'batch_size': 64,\n", + " #'cuda': False,\n", + " 'cuda': torch.cuda.is_available(),\n", + " 'num_channels': 512,\n", + "\n", + "})\n", + "\n", + "\n", + "def main():\n", + " log.info('Loading %s...', NineMensMorrisGame.__name__)\n", + " g = NineMensMorrisGame()\n", + "\n", + " log.info('Loading %s...', NNetWrapper.__name__)\n", + " nnet = NNetWrapper(g)\n", + " log.info('cuda available \"%s\"', torch.cuda.is_available())\n", + " if args.load_model:\n", + " log.info('Loading checkpoint \"%s/%s\"...', args.load_folder_file[0], args.load_folder_file[1])\n", + " nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])\n", + " else:\n", + " log.warning('Not loading a checkpoint!')\n", + "\n", + " log.info('Loading the Coach...')\n", + " c = Coach(g, nnet, args)\n", + "\n", + " if args.load_model:\n", + " log.info(\"Loading 'trainExamples' from file...\")\n", + " c.loadTrainExamples()\n", + "\n", + " log.info('Starting the learning process 🎉')\n", + " c.learn()\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hYtTsimZ1arH" + }, + "source": [ + "#players" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z321Bzza1cfw" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import random\n", + "\n", + "\n", + "class RandomPlayer():\n", + " def __init__(self, game):\n", + " self.game = game\n", + "\n", + " def find_indices(self, list_to_check, item_to_find):\n", + " indices = []\n", + " for idx, value in enumerate(list_to_check):\n", + " if value == item_to_find:\n", + " indices.append(idx)\n", + " return indices\n", + "\n", + " def play(self, board):\n", + " valids = self.game.getValidMoves(board, 1)\n", + " indices = self.find_indices(valids, 1)\n", + " a = random.choice(indices)\n", + " return a\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2CKzIisn1MR_" + }, + "source": [ + "#pit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VpYpGlio1L8Y" + }, + "outputs": [], + "source": [ + "\n", + "\n", + "\"\"\"\n", + "use this script to play any two agents against each other, or play manually with\n", + "any agent.\n", + "\"\"\"\n", + "human_vs_cpu = False\n", + "\n", + "g = NineMensMorrisGame()\n", + "\n", + "# all players\n", + "rp = RandomPlayer(g).play\n", + "\n", + "# nnet players\n", + "n1 = NNetWrapper(g)\n", + "n1.load_checkpoint('/content/drive/My Drive/training/20it/','best.pth.tar')\n", + "args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})\n", + "mcts1 = MCTS(g, n1, args1)\n", + "n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))\n", + "\n", + "player2 = rp # Player 2 is neural network if it's cpu vs cpu.\n", + "\n", + "arena = Arena(n1p, player2, g, display=NineMensMorrisGame.display)\n", + "\n", + "print(arena.playGames(20, verbose=True))\n" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyPK4K/DQsOrPPCBwnVL1DmX", + "gpuType": "A100", + "include_colab_link": true, + "machine_shape": "hm", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/ninemensmorris/NINEMENSMORRIS_README.md b/ninemensmorris/NINEMENSMORRIS_README.md new file mode 100644 index 000000000..0c007fa38 --- /dev/null +++ b/ninemensmorris/NINEMENSMORRIS_README.md @@ -0,0 +1,24 @@ +# Nine Mens Morris Rules and Structures + +Nine Mens Morris, is a two player board game, the rules of the game can be found here: +http://www.move-this.com/spielregeln/ +I used additional tournament rules, that determine, that after 50 moves without a mill, the game ends in a draw + +### Using this implementation +This Implementation works like the other games in the repository. Pick the game in main.py to start training a model or use the pretrained model to test it out. I trained the pretrained model for 32 Iterations, 100 Episodes and 15 Epochs, all other Parameters were left on the default values. The model wins about 79% of the games against a random player, I think thats fairly impressive, regarding the number of possible moves of the game. The game has 13272 possible moves, so it should be trained with a higher number of episodes and for more iterations. + +I also created a Colab Notebook, where you can run the code and save the models to your google drive. Before doing that, you got to create the folders, where you want the models and checkpoints to be saved. + +To start the training, run the main.py file with the right game selected, and if you want to test it out, there is a random player available and the player that was trained for 32 iterations. +The easiest way to try it out, is to use the notebook. + +Both the Keras NNet and the PyTorch NNet are copied from othello, it can surely be optimized. I worked with the PyTorch version. + +### Understanding this implementation +The important files for the Game Logic are NineMensMorrisGame.py and NineMensMorrisLogic.py. + +### Training a model +To train a model, change the Game and NNetWrapper in the main.py file to the Ninemensmorris versions. +Or you can use the Notebook (NOTE: I used Google Drive to save the checkpoints, but had some issues in the process with saving checkpoints) + + diff --git a/ninemensmorris/NineMensMorrisGame.py b/ninemensmorris/NineMensMorrisGame.py new file mode 100644 index 000000000..41852267a --- /dev/null +++ b/ninemensmorris/NineMensMorrisGame.py @@ -0,0 +1,392 @@ +from __future__ import print_function +import sys +sys.path.append('..') +from Game import Game +from .NineMensMorrisLogic import Board +import numpy as np +import copy + +''' +Author: Jonas Jakob +Created: May 31, 2023 + +Implementation of the Game Class for NineMensMorris +Many of these functions are based on those from OthelloGame.py: +https://github.com/suragnair/alpha-zero-general/blob/master/othello/OthelloGame.py + +''' +class NineMensMorrisGame(Game): + + """ + inititalizes the list of all possible moves, the policy rotation vector and + the number of moves without a mill to determine a draw + """ + def __init__(self): + self.n = 6 + self.all_moves = self.get_all_moves() + self.policy_rotation_vector = self.get_policy_roation90() + self.MAX_MOVES_WITHOUT_MILL = 200 + + """ + Gets the list of all possible moves + """ + def get_all_moves(self): + """ + Returns: + moves: A list with all possible moves for the game + """ + moves = self.get_all_moves_phase_zero() + self.get_all_moves_phase_one_and_two() + return list(moves) + + """ + Gets the lookup list for the rotation of the vector of legal moves + """ + def get_policy_roation90(self): + """ + Returns: + rotation90: lookup list for the rotation of the legal moves vector + """ + + rotation90 = [-1] * len(self.all_moves) + + i = 0 + while i < len(self.all_moves): + + move = self.all_moves[i] + rotatedmove = self.rotate(move) + newindex = self.all_moves.index(rotatedmove) + rotation90[i] = newindex + + i+=1 + + return rotation90 + + """ + Rotates a move by 90 degrees + """ + def rotate(self, move): + """ + Input: + move: Tuple (origin, destination, piece to take) + Returns: + rot_move: Tuple (neworigin, newdestination, newpiece to take) + """ + if move[0] == 'none': + neworigin = 'none' + + elif move[0] in [6,7,14,15,22,23]: + neworigin = move[0] - 6 + + else: + neworigin = move[0] + 2 + + if move[1] in [6,7,14,15,22,23]: + newdestination = move[1] - 6 + + else: + newdestination = move[1] + 2 + + if move[2] == 'none': + newenemy = 'none' + + elif move[2] in [6,7,14,15,22,23]: + newenemy = move[2] - 6 + + else: + newenemy = move[2] + 2 + + return (neworigin, newdestination, newenemy) + + """ + Generates all possible moves for game phase zero + """ + def get_all_moves_phase_zero(self): + """ + Returns: + moves: list of all possible move Tuples + """ + + moves = [] + index = 0 + + while index < 24: + + moves.append(("none",index,"none")) + count = 0 + + while count < 24: + + if count != index: + + moves.append(("none",index,count)) + + count += 1 + + index += 1 + + return list(moves) + + """ + Generates all possible moves for game phase one and two + """ + def get_all_moves_phase_one_and_two(self): + """ + Returns: + moves: list of all possible move Tuples + """ + + moves = [] + index_origin = 0 + + while index_origin < 24: + + index_move = 0 + + while index_move < 24: + + if index_move != index_origin: + + moves.append((index_origin,index_move,"none")) + + count = 0 + + while count <24: + + if (count != index_move)and(count != index_origin): + + moves.append((index_origin,index_move,count)) + + count += 1 + + index_move += 1 + + index_origin += 1 + + return list(moves) + """ + based on Othellogame.py + Gets the initial form of the board in game phase zero + """ + def getInitBoard(self): + """ + Returns: + board: the initial board configuration + """ + b = Board() + + return np.array(b.pieces) + + """ + based on Othellogame.py + Gets the size of the board image in a Tuple (x, y) + """ + def getBoardSize(self): + """ + Returns: + dimensions: a Tuple with the board dimensions + """ + return (6, 6) + + """ + based on Othellogame.py + Gets the number of all possible actions + """ + def getActionSize(self): + """ + Returns: + actionssize: number of all moves + """ + return len(self.all_moves) + + """ + based on Othellogame.py + Returns the next state to given a board, player and move + """ + def getNextState(self, board, player, move): + """ + Input: + board: current board image + player: current player (1 or -1) + move: move Tuple + + Returns: + new_state: Tuple (new board, next player) + """ + b = Board() + b.pieces = np.copy(board) + + b.execute_move(player, move, self.all_moves) + + return (b.pieces, -player) + + """ + based on Othellogame.py + Gets a vector of size == ActionSize that marks legal moves for the current + board and player with 1 + """ + def getValidMoves(self, board, player): + """ + Input: + board: current board image + player current player (1 or -1) + Returns: + valid_moves: np array of ones and zeros marking the legal moves + """ + b = Board() + b.pieces = np.copy(board) + + valid_moves = b.get_legal_move_vector(player, self.all_moves) + + return np.array(valid_moves) + + """ + based on Othellogame.py + Determines if the game has ended for the given board and player. + """ + def getGameEnded(self, board, player): + """ + Input: + board: current board + player: current player (1 or -1) + Returns: + game_ended: 0 if game has not ended. 1 if player won, -1 if player + lost, small non-zero value for draw. + """ + assert(not isinstance(board, str)) + + b = Board() + b.pieces = np.copy(board) + + if b.pieces[4][1] >= 50: + return 0.0001 + elif not b.has_legal_moves(player): + return -1 + elif not b.has_legal_moves(-player): + return 1 + elif len(b.get_player_pieces(player)) < 3 and b.pieces[4][0] == 18: + return -1 + elif len(b.get_player_pieces(-player)) < 3 and b.pieces[4][0] == 18: + return 1 + elif b.has_legal_moves(-player) and b.has_legal_moves(player): + return 0 + + """ + Based on Othellogame.py + Multiplies each element with the given player, resulting in a canonical + board from the perspective of the given player. The given players pieces + are always represented as 1 in the Canonical Form. + Note: no true canonical form + """ + def getCanonicalForm(self, board, player): + """ + Input: + board: current board + player: current player (1 or -1) + Returns: + b: canonical board + """ + b = np.zeros((6,6), dtype=int) + count_placements = copy.deepcopy(board[4][0]) + current_moves = copy.deepcopy(board[4][1]) + index = 0 + while index < 4: + item = 0 + while item < 6: + b[index][item] = board[index][item] * player + item += 1 + index += 1 + + b[4][0] = count_placements + b[4][1] = current_moves + return b + """ + Based on Othellogame.py + Gets some Symmetries by rotating the board three times, each time also + adapting the legal moves vector to the new board + """ + def getSymmetries(self, board, pi): + """ + Input: + board: the current board + pi: the legal moves vector for the current board + Returns: + results: three board rotations + """ + + assert(len(pi) == len(self.all_moves)) + b = Board() + b.pieces = np.copy(board) + + results = b.get_board_rotations(pi, self.all_moves, self.policy_rotation_vector) + + return results + + """ + Gets a String representation for the board, used for hashing in mcts + """ + def stringRepresentation(self, board): + """ + Input: + board: the current board + Returns: + board_s: String representation of the board + """ + board_s = "" + index = 0 + i = 0 + while i < 4: + while index < 6: + board_s = board_s + str(board[i][index]) + "," + index += 1 + index = 0 + i += 1 + board_s = board_s + str(board[4][0]) + "," + board_s = board_s + str(board[4][1]) + + return board_s + + """ + Gets a readable String representation for the board + """ + def stringRepresentationReadable(self, board): + """ + Input: + board: the current board + Returns: + board_s: String representation of the board + """ + board_s = "" + index = 0 + i = 0 + while i < 4: + while index < 6: + board_s = board_s + str(board[i][index]) + "," + index += 1 + index = 0 + i += 1 + board_s = board_s + str(board[4][0]) + "," + board_s = board_s + str(board[4][1]) + + return board_s + + @staticmethod + def display(boardd): + board = Board() + board.pieces = np.copy(boardd) + board, stuff = board.piecesToArray() + assert(0 <= stuff[0] <= 18) + assert(len(board) == 24) + + print('{}________ {} ________{}'.format(board[0], board[1], board[2])) + print('| | | ') + print(' {} {} {} '.format(board[8], board[9], board[10])) + print('| | | | | ') + print('| | {}__ {} __{} '.format(board[16], board[17], board[18])) + print('| | | | | | ') + print('{}-{}-{} {}-{}-{}'.format(board[7], board[15], board[23], board[19], board[11], board[3])) + print('| | | | | | ') + print('| | {}__ {} __{} '.format(board[22], board[21], board[20])) + print('| | | | | ') + print('| {}_____ {} _____{} '.format(board[14], board[13], board[12])) + print('| | | ', ) + print('{} _______ {} ______ {} '.format(board[6], board[5], board[4])) diff --git a/ninemensmorris/NineMensMorrisLogic.py b/ninemensmorris/NineMensMorrisLogic.py new file mode 100644 index 000000000..96d685bb5 --- /dev/null +++ b/ninemensmorris/NineMensMorrisLogic.py @@ -0,0 +1,684 @@ +''' +Author: Jonas Jakob +Created: May 31, 2023 + +Implementation of the NineMensMorris Game Logic +''' +import numpy as np + +class Board(): + + """ + A Ninemensmorris Board is represented as a array of (25) + The item on board[24] represents the placing phase. "0" if + the phase is not over yet, "1" if it is. + + Board logic: + + The pieces are represented as + - 1 for player one (black), 1 for player 2 (white) and 0 if there is no + piece on the position (for the canonical Board the + current players pieces are always shown as 1 and the + opponents as -1). The initial board: + + board shape: + [0,0,0,0,0,0,0,0, -> outer ring + 0,0,0,0,0,0,0,0, -> middle ring + 0,0,0,0,0,0,0,0] -> inner ring + + + + Locations: + + Locations are given as the index in the board array. + + Actions: + + Actions are stored in a list of tuples of the form: + action = [piece_location, move_location, remove_piece] + """ + + """ + 6x6 configuration + 24 spots for pieces + 1 spot to count the placed pieces + 1 spot to count the current moves without mills + + -> need to be in the board itself, since only the board is + """ + def __init__(self): + "Set up initial board configuration." + self.n = 6 + self.pieces = np.zeros((6,6), dtype=int) + + """ + currently not used + """ + def __getitem__(self, index): + return self.pieces[index] + + + """ + returns a vector of ones and zeros, marking all the legal moves for the + current board state + """ + def get_legal_move_vector(self, player, all_moves): + """ + Input: + player: current player (1 or -1) + all_moves: list with all possible moves + + Returns: + legal_move_vector: vector of length = all_moves with ones and zeros + """ + legal_moves = self.get_legal_moves(player) + legal_move_vector = [0] * len(all_moves) + + for move in legal_moves: + index = all_moves.index(move) + legal_move_vector[index] = 1 + return legal_move_vector + + """ + Transforms the array form of the NineMensMorris board into a Image, that + can be used as Input for the Neural Network + """ + def arrayToImage(self, array, placements_and_moves): + """ + Input: + array: list with all 24 board positions + placements_and_moves: Tuple containing the placed pieces in phase + zero and the current number of moves without a mill + + Returns: + legal_move_vector: vector of length = all_moves with ones and zeros + """ + board_image = np.zeros((6,6), dtype=int) + boardx = 0 + boardy = 0 + count_placements, current_moves = placements_and_moves + assert(len(array) == 24) + assert(0 <= count_placements <= 18) + index = 0 + while index < 24: + + board_image[boardx][boardy] = np.copy(array[index]) + if boardy == 5: + boardx += 1 + boardy = 0 + else: + boardy += 1 + index += 1 + + + board_image[4][0] = count_placements + board_image[4][1] = current_moves + assert(0 <= board_image[4][0] <= 18) + + return board_image + + """ + Transforms the Image form used in the training of the Neural Network into an + Array of the board and a Tuple containing the placed pieces in phase zero + and the current number of moves without a mill. + """ + def piecesToArray(self): + """ + Returns: + re_board: list with all 24 board positions + placements_and_moves: Tuple containing the placed pieces in phase + zero and the current number of moves without a mill + """ + re_board = [] + re_board.extend(self.pieces[0]) + re_board.extend(self.pieces[1]) + re_board.extend(self.pieces[2]) + re_board.extend(self.pieces[3]) + + + assert(0 <= self.pieces[4][0] <= 18) + assert(len(re_board) == 24) + placements_and_moves = (self.pieces[4][0], self.pieces[4][1]) + + return (re_board, placements_and_moves) + + """ + Gets the current game phase for the current player, then calls the + right method to retrieve the legal moves for the specific game phase, board + and player. Returns a list + """ + def get_legal_moves(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + legal_move_vector: list with all the move Tuples that are legal for + the current board state + """ + game_phase = self.get_game_phase(player) + assert(0 <= game_phase <= 2) + if game_phase == 0: + return list(self.get_legal_moves_0(player)) + + elif game_phase == 1: + return list(self.get_legal_moves_1(player)) + elif game_phase == 2: + return list(self.get_legal_moves_2(player)) + + """ + Gets the current game phase for the current player and board + """ + def get_game_phase(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + number: number representing the game phase + """ + + array, placements_and_moves = self.piecesToArray() + assert(0 <= placements_and_moves[0] <= 18) + + if placements_and_moves[0] < 18: + return 0 + elif len(self.get_player_pieces(player)) <= 3: + return 2 + else: + return 1 + + """ + Gets all positions for the given players pieces in the array form of + the board + """ + def get_player_pieces(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + locations: list of the locations for all the pieces of the given player + """ + board, placements = self.piecesToArray() + locations = [] + + index = 0 + while index < len(board): + if board[index] == player: + locations.append(index) + index += 1 + if locations == []: + return [] + else: + return list(locations) + + """ + Gets all the positions on the board that are empty + """ + def get_empty_positions(self): + """ + Returns: + locations: list of all empty positions + """ + board, placements = self.piecesToArray() + assert(0 <= placements[0] <= 18) + assert(len(board) == 24) + + locations = [] + + index = 0 + while index < len(board): + if board[index] == 0: + locations.append(index) + index += 1 + + return list(locations) + + """ + Checks for each possible move, if a new mill is formed. + Each check makes sure, that the origin of the move, isnt one of the pieces in the + potentially new mill. + Returns a list of all move Tuples that form a new mill. + """ + def get_possible_mills(self, move_locations, player): + """ + Input: + move_locations: list of Tuples with (origin, destination) + player: current player (1 or -1) + + Returns: + number: list of all moves that form a mill on the board + """ + board, placements = self.piecesToArray() + assert(0 <= placements[0] <= 18) + assert(len(board) == 24) + move_forms_mill = [] + + for move in move_locations: + if (move != None) and (move[1] < 24) and (move[1] >= 0) : + if (move[1] % 2) == 0: #move is in a corner + if (move[1] % 8) == 0: # move is in the top left corner of a ring + if (([move[1] + 7] == player) and (board[move[1] + 6] == player) and + (move[1] + 7 != move[0]) and (move[1] + 6 != move[0])): #check down + move_forms_mill.append(move) + if ((board[move[1] + 1] == player) and (board[move[1] + 2] == player) and + (move[1] + 1 != move[0]) and (move[1] + 2 != move[0])): #check right + move_forms_mill.append(move) + elif move in [6,14,22]: #move is in the bottom left corner of a ring + if ((board[move[1] + 1] == player) and (board[move[1] - 6] == player) and + (move[1] + 1 != move[0])and (move[1] - 6 != move[0])): #check up + move_forms_mill.append(move) + if ((board[move[1] - 1] == player) and (board[move[1] - 2] == player) and + (move[1] - 1 != move[0]) and (move[1] - 2 != move[0])): #check right + move_forms_mill.append(move) + elif move in [2,10,18,4,12,20]: #move is in the bottom or top right corner of a ring + if ((board[move[1] + 1] == player) and (board[move[1] + 2] == player) and + (move[1] + 1 != move[0]) and (move[1] + 2 != move[0])): #check down/ left + move_forms_mill.append(move) + if ((board[move[1] - 1] == player) and (board[move[1] - 2] == player) and + (move[1] - 1 != move[0]) and (move[1] - 2 != move[0])): #check left/ up + move_forms_mill.append(move) + + else: #move is in the middle of a row + if move[1] in [1,3,5,7]: #outer ring + if move[1] == 7: + if ((board[move[1] - 7] == player) and (board[move[1] - 1] == player) and + (move[1] - 7 != move[0]) and (move[1] - 1 != move[0])): #check ring + move_forms_mill.append(move) + else: + if ((board[move[1] - 1] == player) and (board[move[1] + 1] == player) and + (move[1] - 1 != move[0]) and (move[1] + 1 != move[0])): #check ring + move_forms_mill.append(move) + if ((board[move[1] + 8] == player) and (board[move[1] + 16] == player) and + (move[1] + 8 != move[0]) and (move[1] + 16 != move[0])): #check intersections + move_forms_mill.append(move) + + elif move[1] in [9,11,13,15]: #middle ring + if move[1] == 15: + if ((board[move[1] - 7] == player) and (board[move[1] - 1] == player) and + (move[1] - 7 != move[0]) and (move[1] - 1 != move[0])): #check ring + move_forms_mill.append(move) + else: + if ((board[move[1] - 1] == player) and (board[move[1] + 1] == player) and + (move[1] - 1 != move[0]) and (move[1] + 1 != move[0])): #check ring + move_forms_mill.append(move) + if ((board[move[1] + 8] == player) and (board[move[1] - 8] == player) and + (move[1] + 8 != move[0]) and (move[1] - 8 != move[0])): #check intersections + move_forms_mill.append(move) + + elif move[1] in [17,19,21,23]: #inner ring + if move[1] == 23: + if ((board[move[1] - 7] == player) and (board[move[1] - 1] == player) and + (move[1] - 7 != move[0]) and (move[1] - 1 != move[0])): #check ring + move_forms_mill.append(move) + else: + if ((board[move[1] - 1] == player) and (board[move[1] + 1] == player) and + (move[1] - 1 != move[0]) and (move[1] + 1 != move[0])): #check ring + move_forms_mill.append(move) + if ((board[move[1] - 8] == player) and (board[move[1] - 16] == player) and + (move[1] - 8 != move[0]) and (move[1] - 16 != move[0])): #check intersections + move_forms_mill.append(move) + + return list(move_forms_mill) + + """ + Looks at the board and returns all current mills for a given player, + in tuples of their coordinates + """ + def check_for_mills(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + current_mills: all mills for the current player + """ + + current_mills = [] + board, placements = self.piecesToArray() + assert(0 <= placements[0] <= 18) + assert(len(board) == 24) + + index = 0 + + while index < 23: #check rings + if (index in [6,14,22]): + if (board[index] == board[index + 1] == board[index - 6] == player): + current_mills.append((index, index + 1, index - 6)) + elif (board[index] == board[index + 1] == board[index + 2] == player): + current_mills.append((index, index + 1, index + 2)) + + index += 2 + + index = 1 + + while index < 8: #check intersections + if (board[index] == board[index + 8] == board[index + 16] == player): + current_mills.append((index, index + 8, index + 16)) + + index += 2 + + return list(current_mills) + + """ + Gets all neighbour postions for a position on the board + """ + def get_neighbours(self, position): + """ + Input: + position: postion index on the board + + Returns: + neighbours: Tuple of all neighbours + """ + assert(0 <= position <= 23) + if position != None: + if (position % 2) == 0: #position is in a corner + + if (position % 8) == 0: # position is in the top left corner of a ring + return (position + 1, position + 7) + + else: #position is in top right, or bottom corners + return (position - 1, position + 1) + + else: #position is in a intersection + if position in [1,3,5,7]: #outer ring + if position == 7: + return (0, 6, 15) + else: + return (position - 1, position + 1, position + 8) + + + elif position in [9,11,13,15]: #middle ring + if position == 15: + return (7, 8, 14, 23) + else: + return (position - 8, position - 1, position + 1, position + 8) + + elif position in [17,19,21,23]: #outer ring + if position == 23: + return (15, 16, 22) + else: + return (position - 8, position - 1, position + 1) + + + return + + """ + Gets all pieces that are outside of mills for the given player and the + current board + """ + def get_pieces_outside_mills(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + pieces: all pieces for the given player outside of mills + """ + all_pieces = self.get_player_pieces(player) + + mills = self.check_for_mills(player) + + remaining_pieces = self.get_player_pieces(player) + + for piece in all_pieces: + if len(mills) != 0: + for mill in mills: + if piece in mill and piece in remaining_pieces: + remaining_pieces.remove(piece) + + + return list(remaining_pieces) + + """ + Looks at the board, given the current player and identifies all + legal moves for the current gamestate, given that the player is + in Phase 0 + """ + def get_legal_moves_0(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + moves: list of move tuples that are legal for the given player, + the players game phase and the current board + """ + #get enemy pieces that can be taken if a mill is formed + enemies_outside_mills = self.get_pieces_outside_mills(-player) + if len(enemies_outside_mills) > 0: + enemies_to_take = enemies_outside_mills + else: + enemies_to_take = self.get_player_pieces(-player) + + + #get empty positions, they represent all possible move locations for phase zero + empty_locations = [] + for position in self.get_empty_positions(): + empty_locations.append(('none',position)) + + #get moves -> for each move_location, check if a mill is formed (check row(s)) + mill_moves = self.get_possible_mills(empty_locations, player) + + + #generate action tuples + moves = [] + + for move in empty_locations: + if move in mill_moves: + for enemy in enemies_to_take: + moves.append(('none',move[1],enemy)) + else: + moves.append(('none',move[1],'none')) + + + return list(moves) + + + """ + Looks at the board, given the current player and identifies all + legal moves for the current gamestate, given that the player is + in Phase 1 + """ + def get_legal_moves_1(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + moves: list of move tuples that are legal for the given player, + the players game phase and the current board + """ + moves = [] + board, placements = self.piecesToArray() + assert(placements[0] == 18) + assert(len(board) == 24) + + #get enemy pieces that can be taken if a mill is formed + enemies_outside_mills = self.get_pieces_outside_mills(-player) + if len(enemies_outside_mills) > 0: + enemies_to_take = enemies_outside_mills + else: + enemies_to_take = self.get_player_pieces(-player) + + #get the current players pieces that will be moved + current_positions = self.get_player_pieces(player) + + #creating the first part of the moves + part_moves = [] + + for position in current_positions: + neighbours = self.get_neighbours(position) + index = 0 + while index < len(neighbours): + if board[neighbours[index]] == 0: + part_moves.append((position, neighbours[index])) + index += 1 + + #finding the part moves that create mills, then pairing them accordingly with enemy pieces to beat + #get moves -> for each move_location, check if a mill is formed (check row(s)) + mill_moves = self.get_possible_mills(part_moves, player) + + for move in part_moves: + if move in mill_moves: + for enemy in enemies_to_take: + moves.append((move[0],move[1],enemy)) + else: + moves.append((move[0],move[1],'none')) + + + + return list(moves) + + + """ + Looks at the board, given the current player and identifies all + legal moves for the current gamestate, given that the player is + in Phase 2 + """ + def get_legal_moves_2(self, player): + """ + Input: + player: current player (1 or -1) + + Returns: + moves: list of move tuples that are legal for the given player, + the players game phase and the current board + """ + moves = [] + + #get enemy pieces that can be taken if a mill is formed + enemies_outside_mills = self.get_pieces_outside_mills(-player) + if len(enemies_outside_mills) > 0: + enemies_to_take = enemies_outside_mills + else: + enemies_to_take = self.get_player_pieces(-player) + + #get the current players pieces that will be moved + current_positions = self.get_player_pieces(player) + + #creating the first part of the moves + part_moves = [] + + empty_locations = self.get_empty_positions() + + #pair the locations of current positions with all empty locations on the board + for position in current_positions: + for location in empty_locations: + part_moves.append((position, location)) + + #finding the part moves that create mills, then pairing them accordingly with enemy pieces to beat + #get moves -> for each move_location, check if a mill is formed (check row(s)) + mill_moves = self.get_possible_mills(part_moves, player) + + for move in part_moves: + if move in mill_moves: + for enemy in enemies_to_take: + moves.append((move[0],move[1],enemy)) + else: + moves.append((move[0],move[1],'none')) + + return list(moves) + + """ + checks if the given player has any legal moves on the current board + """ + def has_legal_moves(self, player): + """ + Returns: + Boolean: has legal moves + """ + if (len(self.get_legal_moves(player)) > 0): + return True + return False + + ''' + Rotates the board three times, each time creating a pair of the rotated + board and the rotated vector of legal moves. + Uses a shift vector for the board to calculate the new position for each + index in the array and a lookup list for the vector of legal moves. + ''' + def get_board_rotations(self, pi, all_moves, policy_rotation_vector): + """ + Input: + pi: the legal move vector + all_moves: list with all legal moves + policy_rotation_vector: lookup list for the vector of legal moves + + Returns: + rotated_results: list of Tuples (image, legal_moves) + """ + #vector to rotate the board 90 degrees -> move each ring by two positions + rot90_vector = [2,2,2,2,2,2,-6,-6,2,2,2,2,2,2,-6,-6,2,2,2,2,2,2,-6,-6] + + old_board, placements = self.piecesToArray() + new_board = np.zeros((24), dtype = int) + new_pi = np.zeros((len(all_moves)), dtype = int) + + rotated_results = [] + + #rotates the board 3 times + for i in range(3): + index = 0 + while index < 24: + new_board[index+rot90_vector[index]]= np.copy(old_board[index]) + index+=1 + + index = 0 + while index < len(all_moves): + new_pi[policy_rotation_vector[index]] = np.copy(pi[index]) + index += 1 + + rotated_results += [(self.arrayToImage(new_board, placements),new_pi)] + #print("rotating") + #print(old_board) + old_board = np.copy(new_board) + #print(new_board) + pi = np.copy(new_pi) + + i+=1 + + return rotated_results + + + """ + Exectues a move on the current board for the given player + """ + def execute_move(self, player, move_index, all_moves): + """ + Input: + player: the legal move vector + move_index: index for the move in the all_moves list + all_moves: list with all legal moves + """ + move = all_moves[move_index] + assert(len(move)==3) #move is a tuple of length 3 + board, placements = self.piecesToArray() + assert(0 <= placements[0] <= 18) + assert(len(board) == 24) + + count_placements, current_moves = placements + if self.get_game_phase(player) == 0: + count_placements += 1 + if move[0] != 'none': + board[move[0]] = 0 + if move[2] != 'none': + board[move[2]] = 0 + current_moves = 0 + elif move[2] == 'none': + current_moves += 1 + board[move[1]] = player + if current_moves > 50: + print(current_moves) + + placements = (count_placements, current_moves) + + image = self.arrayToImage(board, placements) + self.pieces = np.copy(image) + + + + + diff --git a/ninemensmorris/NineMensMorrisPlayers.py b/ninemensmorris/NineMensMorrisPlayers.py new file mode 100644 index 000000000..a4183ea40 --- /dev/null +++ b/ninemensmorris/NineMensMorrisPlayers.py @@ -0,0 +1,20 @@ +import numpy as np +import random + + +class RandomPlayer(): + def __init__(self, game): + self.game = game + + def find_indices(self, list_to_check, item_to_find): + indices = [] + for idx, value in enumerate(list_to_check): + if value == item_to_find: + indices.append(idx) + return indices + + def play(self, board): + valids = self.game.getValidMoves(board, 1) + indices = self.find_indices(valids, 1) + a = random.choice(indices) + return a diff --git a/ninemensmorris/__init__.py b/ninemensmorris/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ninemensmorris/keras/NNet.py b/ninemensmorris/keras/NNet.py new file mode 100644 index 000000000..d733b0a9f --- /dev/null +++ b/ninemensmorris/keras/NNet.py @@ -0,0 +1,79 @@ +import argparse +import os +import shutil +import time +import random +import numpy as np +import math +import sys +sys.path.append('../..') +from utils import * +from NeuralNet import NeuralNet + +import argparse + +from .OthelloNNet import OthelloNNet as onnet + +args = dotdict({ + 'lr': 0.001, + 'dropout': 0.3, + 'epochs': 10, + 'batch_size': 64, + 'cuda': False, + 'num_channels': 512, +}) + +class NNetWrapper(NeuralNet): + def __init__(self, game): + self.nnet = onnet(game, args) + self.board_x, self.board_y = game.getBoardSize() + self.action_size = game.getActionSize() + + def train(self, examples): + """ + examples: list of examples, each example is of form (board, pi, v) + """ + input_boards, target_pis, target_vs = list(zip(*examples)) + input_boards = np.asarray(input_boards) + target_pis = np.asarray(target_pis) + target_vs = np.asarray(target_vs) + self.nnet.model.fit(x = input_boards, y = [target_pis, target_vs], batch_size = args.batch_size, epochs = args.epochs) + + def predict(self, board): + """ + board: np array with board + """ + # timing + start = time.time() + + # preparing input + board = board[np.newaxis, :, :] + + # run + pi, v = self.nnet.model.predict(board, verbose=False) + + #print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start)) + return pi[0], v[0] + + def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'): + # change extension + filename = filename.split(".")[0] + ".h5" + + filepath = os.path.join(folder, filename) + if not os.path.exists(folder): + print("Checkpoint Directory does not exist! Making directory {}".format(folder)) + os.mkdir(folder) + else: + print("Checkpoint Directory exists! ") + self.nnet.model.save_weights(filepath) + + def load_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'): + # change extension + filename = filename.split(".")[0] + ".h5" + + # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98 + filepath = os.path.join(folder, filename) + if not os.path.exists(filepath): + raise("No model in path {}".format(filepath)) + + self.nnet.model.load_weights(filepath) diff --git a/ninemensmorris/keras/NineMensMorrisNNet.py b/ninemensmorris/keras/NineMensMorrisNNet.py new file mode 100644 index 000000000..2e2eaa935 --- /dev/null +++ b/ninemensmorris/keras/NineMensMorrisNNet.py @@ -0,0 +1,32 @@ +import sys +sys.path.append('..') +from utils import * + +import argparse +from tensorflow.keras.models import * +from tensorflow.keras.layers import * +from tensorflow.keras.optimizers import * + +class OthelloNNet(): + def __init__(self, game, args): + # game params + self.board_x, self.board_y = game.getBoardSize() + self.action_size = game.getActionSize() + self.args = args + + # Neural Net + self.input_boards = Input(shape=(self.board_x, self.board_y)) # s: batch_size x board_x x board_y + + x_image = Reshape((self.board_x, self.board_y, 1))(self.input_boards) # batch_size x board_x x board_y x 1 + h_conv1 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(args.num_channels, 3, padding='same', use_bias=False)(x_image))) # batch_size x board_x x board_y x num_channels + h_conv2 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(args.num_channels, 3, padding='same', use_bias=False)(h_conv1))) # batch_size x board_x x board_y x num_channels + h_conv3 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(args.num_channels, 3, padding='valid', use_bias=False)(h_conv2))) # batch_size x (board_x-2) x (board_y-2) x num_channels + h_conv4 = Activation('relu')(BatchNormalization(axis=3)(Conv2D(args.num_channels, 3, padding='valid', use_bias=False)(h_conv3))) # batch_size x (board_x-4) x (board_y-4) x num_channels + h_conv4_flat = Flatten()(h_conv4) + s_fc1 = Dropout(args.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(1024, use_bias=False)(h_conv4_flat)))) # batch_size x 1024 + s_fc2 = Dropout(args.dropout)(Activation('relu')(BatchNormalization(axis=1)(Dense(512, use_bias=False)(s_fc1)))) # batch_size x 1024 + self.pi = Dense(self.action_size, activation='softmax', name='pi')(s_fc2) # batch_size x self.action_size + self.v = Dense(1, activation='tanh', name='v')(s_fc2) # batch_size x 1 + + self.model = Model(inputs=self.input_boards, outputs=[self.pi, self.v]) + self.model.compile(loss=['categorical_crossentropy','mean_squared_error'], optimizer=Adam(args.lr)) diff --git a/ninemensmorris/keras/__init__-Copy1.py b/ninemensmorris/keras/__init__-Copy1.py new file mode 100644 index 000000000..e69de29bb diff --git a/ninemensmorris/morris_main.py b/ninemensmorris/morris_main.py new file mode 100644 index 000000000..d343310a7 --- /dev/null +++ b/ninemensmorris/morris_main.py @@ -0,0 +1,65 @@ +import logging +import coloredlogs +from Coach import Coach +from ninemensmorris.NineMensMorrisGame import NineMensMorrisGame as Game +from ninemensmorris.pytorch.NNet import NNetWrapper as nn +from utils import * + +#NOTE -> TO SWITCH BETWEEN KERAS AND PYTORCH, CHANGE NAMES FROM NNET AND NNETWRAPPER + +log = logging.getLogger(__name__) + +coloredlogs.install(level='INFO') # Change this to DEBUG to see more info. + +args = dotdict({ + 'numIters': 10, # default 1000 -> takes too long + 'numEps': 1000, # Number of complete self-play games to simulate during a new iteration. default 100 + 'tempThreshold': 5, # default 15 + 'updateThreshold': 0.55, # During arena playoff, new neural net will be accepted if threshold or more of games are won. default 0.6 + 'maxlenOfQueue': 200000, # Number of game examples to train the neural networks. default 200000 + 'numMCTSSims': 25, # Number of games moves for MCTS to simulate. default 25 + 'arenaCompare': 40, # Number of games to play during arena play to determine if new net will be accepted. default 40 + 'cpuct': 1, # default 1 + + 'checkpoint': './temp/', + 'load_model': False, + 'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'), + 'numItersForTrainExamplesHistory': 20, + + 'lr': 0.005, #default 0.001 + 'dropout': 0.3, + 'epochs': 10, #default 10 -> try 15 or 20 + 'batch_size': 64, + #'cuda': False, + 'cuda': torch.cuda.is_available(), + 'num_channels': 512, + +}) + + +def main(): + log.info('Loading %s...', NineMensMorrisGame.__name__) + g = NineMensMorrisGame() + + log.info('Loading %s...', NNetWrapper.__name__) + nnet = NNetWrapper(g) + log.info('cuda available "%s"', torch.cuda.is_available()) + if args.load_model: + log.info('Loading checkpoint "%s/%s"...', args.load_folder_file[0], args.load_folder_file[1]) + nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) + else: + log.warning('Not loading a checkpoint!') + + log.info('Loading the Coach...') + c = Coach(g, nnet, args) + + if args.load_model: + log.info("Loading 'trainExamples' from file...") + c.loadTrainExamples() + + log.info('Starting the learning process 🎉') + c.learn() + + +if __name__ == "__main__": + main() diff --git a/ninemensmorris/morris_pit.py b/ninemensmorris/morris_pit.py new file mode 100644 index 000000000..a984d0151 --- /dev/null +++ b/ninemensmorris/morris_pit.py @@ -0,0 +1,23 @@ +""" +use this script to play any two agents against each other, or play manually with +any agent. +""" +human_vs_cpu = False + +g = NineMensMorrisGame() + +# all players +rp = RandomPlayer(g).play + +# nnet players +n1 = NNetWrapper(g) +n1.load_checkpoint('/content/drive/My Drive/training/20it/','best.pth.tar') +args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0}) +mcts1 = MCTS(g, n1, args1) +n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) + +player2 = rp # Player 2 is neural network if it's cpu vs cpu. + +arena = Arena(n1p, player2, g, display=NineMensMorrisGame.display) + +print(arena.playGames(20, verbose=True)) diff --git a/ninemensmorris/pretrained_models/best.pth.tar b/ninemensmorris/pretrained_models/best.pth.tar new file mode 100644 index 000000000..c13f8d4e5 Binary files /dev/null and b/ninemensmorris/pretrained_models/best.pth.tar differ diff --git a/ninemensmorris/pytorch/NNet.py b/ninemensmorris/pytorch/NNet.py new file mode 100644 index 000000000..cfc9627a0 --- /dev/null +++ b/ninemensmorris/pytorch/NNet.py @@ -0,0 +1,129 @@ +import os +import sys +import time + +import numpy as np +from tqdm import tqdm + +sys.path.append('../../') +from utils import * +from NeuralNet import NeuralNet + +import torch +import torch.optim as optim + +from .NineMensMorrisNNet import NineMensMorrisNNet + +args = dotdict({ + 'lr': 0.001, + 'dropout': 0.3, + 'epochs': 10, + 'batch_size': 64, + 'cuda': torch.cuda.is_available(), + 'num_channels': 512, +}) + +""" +Copied from: +https://github.com/suragnair/alpha-zero-general/blob/master/othello/pytorch/NNet.py +Adapted the load and save methods to save and load in google drive from colab +""" +class NNetWrapper(NeuralNet): + def __init__(self, game): + self.nnet = NineMensMorrisNNet(game, args) + self.board_x, self.board_y = game.getBoardSize() + self.action_size = game.getActionSize() + + if args.cuda: + #self.nnet.cuda.set_device({"cuda:0"}) + self.nnet.to("cuda:0") + + def train(self, examples): + """ + examples: list of examples, each example is of form (board, pi, v) + """ + optimizer = optim.Adam(self.nnet.parameters()) + + for epoch in range(args.epochs): + print('EPOCH ::: ' + str(epoch + 1)) + self.nnet.train() + pi_losses = AverageMeter() + v_losses = AverageMeter() + + batch_count = int(len(examples) / args.batch_size) + + t = tqdm(range(batch_count), desc='Training Net') + for _ in t: + sample_ids = np.random.randint(len(examples), size=args.batch_size) + boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) + boards = torch.FloatTensor(np.array(boards).astype(np.float64)) + target_pis = torch.FloatTensor(np.array(pis)) + target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) + + # predict + if args.cuda: + boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda() + + # compute output + out_pi, out_v = self.nnet(boards) + l_pi = self.loss_pi(target_pis, out_pi) + l_v = self.loss_v(target_vs, out_v) + total_loss = l_pi + l_v + + # record loss + pi_losses.update(l_pi.item(), boards.size(0)) + v_losses.update(l_v.item(), boards.size(0)) + t.set_postfix(Loss_pi=pi_losses, Loss_v=v_losses) + + # compute gradient and do SGD step + optimizer.zero_grad() + total_loss.backward() + optimizer.step() + + def predict(self, board): + """ + board: np array with board + """ + # timing + start = time.time() + + # preparing input + board = torch.FloatTensor(board.astype(np.float64)) + if args.cuda: board = board.contiguous().cuda() + board = board.view(1, self.board_x, self.board_y) + self.nnet.eval() + with torch.no_grad(): + pi, v = self.nnet(board) + + # print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start)) + return torch.exp(pi).data.cpu().numpy()[0], v.data.cpu().numpy()[0] + + def loss_pi(self, targets, outputs): + return -torch.sum(targets * outputs) / targets.size()[0] + + def loss_v(self, targets, outputs): + return torch.sum((targets - outputs.view(-1)) ** 2) / targets.size()[0] + + def save_checkpoint(self, folder, filename): + filepath = folder + filename + if not os.path.exists(folder): + print("Checkpoint Directory does not exist! Making directory {}".format(folder)) + os.mkdir(folder) + else: + print("Checkpoint Directory exists! ") + print("saving to path '%s", filepath) + torch.save({ + 'state_dict': self.nnet.state_dict(), + }, filepath) + + def load_checkpoint(self, folder, filename): + # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98 + filepath = folder + filename + print("Trying to load checkpoint") + if not os.path.exists(filepath): + raise ("No model in path {}".format(filepath)) + map_location = None if args.cuda else 'cpu' + checkpoint = torch.load(filepath, map_location=map_location) + self.nnet.load_state_dict(checkpoint['state_dict']) + + diff --git a/ninemensmorris/pytorch/NineMensMorrisNNet.py b/ninemensmorris/pytorch/NineMensMorrisNNet.py new file mode 100644 index 000000000..78a96cec7 --- /dev/null +++ b/ninemensmorris/pytorch/NineMensMorrisNNet.py @@ -0,0 +1,59 @@ +import sys +sys.path.append('..') + +import argparse +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim + +""" +Copied from: +https://github.com/suragnair/alpha-zero-general/blob/master/othello/pytorch/OthelloNNet.py +""" +class NineMensMorrisNNet(nn.Module): + def __init__(self, game, args): + # game params + self.board_x, self.board_y = game.getBoardSize() + self.action_size = game.getActionSize() + self.args = args + + super(NineMensMorrisNNet, self).__init__() + self.conv1 = nn.Conv2d(1, args.num_channels, 3, stride=1, padding=1) + self.conv2 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1) + self.conv3 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1) + self.conv4 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1) + + self.bn1 = nn.BatchNorm2d(args.num_channels) + self.bn2 = nn.BatchNorm2d(args.num_channels) + self.bn3 = nn.BatchNorm2d(args.num_channels) + self.bn4 = nn.BatchNorm2d(args.num_channels) + + self.fc1 = nn.Linear(args.num_channels*(self.board_x-4)*(self.board_y-4), 1024) + self.fc_bn1 = nn.BatchNorm1d(1024) + + self.fc2 = nn.Linear(1024, 512) + self.fc_bn2 = nn.BatchNorm1d(512) + + self.fc3 = nn.Linear(512, self.action_size) + + self.fc4 = nn.Linear(512, 1) + + def forward(self, s): + # s: batch_size x board_x x board_y + s = s.view(-1, 1, self.board_x, self.board_y) # batch_size x 1 x board_x x board_y + s = F.relu(self.bn1(self.conv1(s))) # batch_size x num_channels x board_x x board_y + s = F.relu(self.bn2(self.conv2(s))) # batch_size x num_channels x board_x x board_y + s = F.relu(self.bn3(self.conv3(s))) # batch_size x num_channels x (board_x-2) x (board_y-2) + s = F.relu(self.bn4(self.conv4(s))) # batch_size x num_channels x (board_x-4) x (board_y-4) + s = s.view(-1, self.args.num_channels*(self.board_x-4)*(self.board_y-4)) + + s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.args.dropout, training=self.training) # batch_size x 1024 + s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.args.dropout, training=self.training) # batch_size x 512 + + pi = self.fc3(s) # batch_size x action_size + v = self.fc4(s) # batch_size x 1 + + return F.log_softmax(pi, dim=1), torch.tanh(v) + + diff --git a/ninemensmorris/pytorch/__init__.py b/ninemensmorris/pytorch/__init__.py new file mode 100644 index 000000000..e69de29bb