diff --git a/ultimatetictactoe.py b/ultimatetictactoe.py new file mode 100644 index 0000000..106d2e5 --- /dev/null +++ b/ultimatetictactoe.py @@ -0,0 +1,120 @@ +from vacuumDecay import * +from collections import Counter +import itertools + +class TTTState(State): + def __init__(self, curPlayer=0, generation=0, playersNum=2, board=None, lastMove=-1): + if type(board) == type(None): + board = "." * 81 + self.curPlayer = curPlayer + self.generation = generation + self.playersNum = playersNum + self.board = board + self.last_move = lastMove + self.possible_goals = [(0, 4, 8), (2, 4, 6)] + self.possible_goals += [(i, i+3, i+6) for i in range(3)] + self.possible_goals += [(3*i, 3*i+1, 3*i+2) for i in range(3)] + self.update_box_won() + + def update_box_won(self): + state = self.board + temp_box_win = ["."] * 9 + for b in range(9): + idxs_box = self.indices_of_box(b) + box_str = state[idxs_box[0]: idxs_box[-1]+1] + temp_box_win[b] = self.check_small_box(box_str) + self.box_won = temp_box_win + + def indices_of_box(self, b): + return list(range(b*9, b*9 + 9)) + + def check_small_box(self, box_str): + for idxs in self.possible_goals: + (x, y, z) = idxs + if (box_str[x] == box_str[y] == box_str[z]) and box_str[x] != ".": + return box_str[x] + return "." + + def mutate(self, action): + newBoard = self.board[:action.data] + ['O','X'][self.curPlayer] + self.board[action.data+1:] + return TTTState(curPlayer=(self.curPlayer+1)%self.playersNum, playersNum=self.playersNum, board=newBoard, lastMove=action.data) + + def box(self, x, y): + return index(x, y) // 9 + + + def next_box(self, i): + return i % 9 + + def indices_of_box(self, b): + return list(range(b*9, b*9 + 9)) + + def index(self, x, y): + x -= 1 + y -= 1 + return ((x//3)*27) + ((x % 3)*3) + ((y//3)*9) + (y % 3) + + def getAvaibleActions(self): + if self.last_move == -1: + for i in range(9*9): + yield Action(self.curPlayer, i) + return + + box_to_play = self.next_box(self.last_move) + idxs = self.indices_of_box(box_to_play) + if self.box_won[box_to_play] != ".": + pi_2d = [self.indices_of_box(b) for b in range(9) if self.box_won[b] == "."] + possible_indices = list(itertools.chain.from_iterable(pi_2d)) + else: + possible_indices = idxs + + for ind in possible_indices: + if self.board[ind]=='.': + yield Action(self.curPlayer, ind) + + def getScoreFor(self, player): + p = ['O','X'][player] + sco = 5 + for w in self.box_won: + if w==p: + sco += 1 + elif w!='.': + sco -= 0.5 + return 1/sco + + def checkWin(self): + self.update_box_won() + game_won = self.check_small_box(self.box_won) + if game_won == '.': + return None + return game_won == 'X' + + def __str__(self): + state = self.board + acts = list(self.getAvaibleActions()) + if len(acts)<=9: + for i, act in enumerate(acts): + state = state[:act.data] + str(i+1) + state[act.data+1:] + s = [] + for row in range(1, 10): + row_str = ["|"] + for col in range(1, 10): + row_str += [state[self.index(row, col)]] + if (col) % 3 == 0: + row_str += ["|"] + if (row-1) % 3 == 0: + s.append("-"*(len(row_str)*2-1)) + s.append(" ".join(row_str)) + s.append("-"*(len(row_str)*2-1)) + return '\n'.join(s) + + def getTensor(self): + pass + + @classmethod + def getModel(): + pass + +if __name__=="__main__": + run = Runtime(TTTState()) + run.game(None, 4) diff --git a/vacuumDecay.py b/vacuumDecay.py index 7209dd5..6b7d629 100644 --- a/vacuumDecay.py +++ b/vacuumDecay.py @@ -2,7 +2,7 @@ import time import random import threading import torch -from math import sqrt +from math import sqrt, inf #from multiprocessing import Event from abc import ABC, abstractmethod from threading import Event @@ -84,7 +84,10 @@ class State(ABC): @abstractmethod def getAvaibleActions(self): # Should return an array of all possible actions - return [] + return [i] + + def askUserForAction(self, actions): + return choose('What does player '+str(self.curPlayer)+' want to do?', actions) # improveMe def getPriority(self, score, cascadeMemory=None): @@ -176,7 +179,7 @@ class Node(): for p in range(self.playersNum): cp = self.state.curPlayer if cp == p: # P owns the turn; controlls outcome - best = 1000000000 + best = inf for c in self.childs: if c.getStrongFor(p) < best: best = c.getStrongFor(p) @@ -311,7 +314,8 @@ def choose(txt, options): class Runtime(): def __init__(self, initState): - self.head = Node(initState) + universe = Universe() + self.head = Node(initState,universe = universe) def performAction(self, action): for c in self.head.childs: @@ -323,16 +327,16 @@ class Runtime(): return raise Exception('No such action avaible...') - def turn(self, bot=None): + def turn(self, bot=None, calcDepth=7): print(str(self.head)) if bot==None: - c = choose('?', ['human', 'bot', 'undo']) + c = choose('Select action?', ['human', 'bot', 'undo']) if c=='undo': self.head = self.head.parent return bot = c=='bot' if bot: - self.head.forceStrong(7) + self.head.forceStrong(calcDepth) opts = [] for c in self.head.childs: opts.append((c, c.getStrongFor(self.head.curPlayer))) @@ -344,11 +348,11 @@ class Runtime(): print('[#] I choose to play: ' + str(opts[0][0].lastAction)) self.performAction(opts[0][0].lastAction) else: - action = choose('What does player '+str(self.head.curPlayer)+' want to do?', self.head.avaibleActions) + action = self.head.askUserForAction(self.head.avaibleActions) self.performAction(action) - def game(self, bots=None): + def game(self, bots=None, calcDepth=7): if bots==None: bots = [None]*self.head.playersNum while True: - self.turn(bots[self.head.curPlayer]) + self.turn(bots[self.head.curPlayer], calcDepth)