From 072bc391d1606b9f5d2d2d932fbf7a415e0727e5 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 18 May 2022 19:44:56 +0200 Subject: [PATCH] Different net arch (back to fancy conv) --- ultimatetictactoe.py | 94 +++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/ultimatetictactoe.py b/ultimatetictactoe.py index 025b539..8973a5d 100644 --- a/ultimatetictactoe.py +++ b/ultimatetictactoe.py @@ -6,6 +6,7 @@ from vacuumDecay import * from collections import Counter import itertools + class TTTState(State): def __init__(self, curPlayer=0, generation=0, playersNum=2, board=None, lastMove=-1): if type(board) == type(None): @@ -40,13 +41,13 @@ class TTTState(State): return "." def mutate(self, action): - newBoard = self.board[:action.data] + ['O','X'][self.curPlayer] + self.board[action.data+1:] - return TTTState(curPlayer=(self.curPlayer+1)%self.playersNum, playersNum=self.playersNum, board=newBoard, lastMove=action.data) + newBoard = self.board[:action.data] + ['O', + 'X'][self.curPlayer] + self.board[action.data+1:] + return TTTState(curPlayer=(self.curPlayer+1) % self.playersNum, playersNum=self.playersNum, board=newBoard, lastMove=action.data) def box(self, x, y): return index(x, y) // 9 - def next_box(self, i): return i % 9 @@ -67,16 +68,17 @@ class TTTState(State): box_to_play = self.next_box(self.last_move) idxs = self.indices_of_box(box_to_play) if self.box_won[box_to_play] != ".": - pi_2d = [self.indices_of_box(b) for b in range(9) if self.box_won[b] == "."] + pi_2d = [self.indices_of_box(b) for b in range( + 9) if self.box_won[b] == "."] possible_indices = list(itertools.chain.from_iterable(pi_2d)) else: possible_indices = idxs for ind in possible_indices: - if self.board[ind]=='.': + if self.board[ind] == '.': yield Action(self.curPlayer, ind) - #def getScoreFor(self, player): + # def getScoreFor(self, player): # p = ['O','X'][player] # sco = 5 # for w in self.box_won: @@ -86,7 +88,7 @@ class TTTState(State): # sco -= 0.5 # return 1/sco - #def getPriority(self, score, cascadeMem): + # def getPriority(self, score, cascadeMem): # return -cascadeMem*1 + 100 def checkWin(self): @@ -100,13 +102,13 @@ class TTTState(State): def checkDraw(self): for act in self.getAvaibleActions(): - return False # at least one action avaible + return False # at least one action avaible return True def __str__(self): state = self.board acts = list(self.getAvaibleActions()) - if len(acts)<=9: + if len(acts) <= 9: for i, act in enumerate(acts): state = state[:act.data] + str(i+1) + state[act.data+1:] s = [] @@ -128,11 +130,10 @@ class TTTState(State): elif b == 'O': return -1.0 + 2.0 * self.curPlayer else: - return 1.0 - 2.0 * self.curPlayer - + return 1.0 - 2.0 * self.curPlayer def getTensor(self, player=None, phase='default'): - if player==None: + if player == None: player = self.curPlayer s = '' for row in range(1, 10): @@ -144,51 +145,60 @@ class TTTState(State): def getModel(cls, phase='default'): return Model() + class Model(nn.Module): def __init__(self): super().__init__() + self.chansPerSmol = 24 + self.chansPerSlot = 8 + self.chansComp = 8 + self.smol = nn.Sequential( nn.Conv2d( in_channels=1, - out_channels=16, - kernel_size=(3,3), + out_channels=self.chansPerSmol, + kernel_size=(3, 3), stride=3, padding=0, ), nn.ReLU() ) - #self.comb = nn.Sequential( - # nn.Conv1d( - # in_channels=24, - # out_channels=8, - # kernel_size=1, - # stride=1, - # padding=0, - # ), - # nn.ReLU() - #) + self.comb = nn.Sequential( + nn.Conv1d( + in_channels=self.chansPerSmol, + out_channels=self.chansPerSlot, + kernel_size=1, + stride=1, + padding=0, + ), + nn.ReLU() + ) self.out = nn.Sequential( - #nn.Linear(9*8, 32), - #nn.ReLU(), - #nn.Linear(32, 8), - #nn.ReLU(), - nn.Linear(16*9, 12), + nn.Linear(self.chansPerSlot*9, self.chansComp), nn.ReLU(), - nn.Linear(12, 1), + nn.Linear(self.chansComp, 1), + #nn.Linear(9*8, 32), + # nn.ReLU(), + #nn.Linear(32, 8), + # nn.ReLU(), + #nn.Linear(16*9, 12), + # nn.ReLU(), + #nn.Linear(12, 1), nn.Sigmoid() ) def forward(self, x): - x = torch.reshape(x, (1,9,9)) + x = torch.reshape(x, (1, 9, 9)) x = self.smol(x) - #x = torch.reshape(x, (24,9)) - #x = self.comb(x) + x = torch.reshape(x, (self.chansPerSmol, 9)) + x = self.comb(x) x = torch.reshape(x, (-1,)) y = self.out(x) return y -def humanVsAi(train=True, remember=False, depth=3, bots=[0,1], noBg=False): + +def humanVsAi(train=True, remember=False, depth=3, bots=[0, 1], noBg=False): init = TTTState() run = NeuralRuntime(init) run.game(bots, depth, bg=not noBg) @@ -199,27 +209,31 @@ def humanVsAi(train=True, remember=False, depth=3, bots=[0,1], noBg=False): trainer.saveToMemoryBank(run.head) print('[!] Your cognitive and strategic destinctiveness was added to my own! (Game inserted into memoryBank)') if train: - print("[!] Your knowledge will be assimilated!!! Please stand by.... (Updating Neuristic)") + print( + "[!] Your knowledge will be assimilated!!! Please stand by.... (Updating Neuristic)") trainer.trainFromTerm(run.head) print('[!] I have become smart! Destroyer of human Ultimate-TicTacToe players! (Neuristic update completed)') print('[!] This marks the beginning of the end of humankind!') print('[i] Thanks for playing! Goodbye...') + def aiVsAiLoop(): init = TTTState() trainer = Trainer(init) trainer.train() -if __name__=='__main__': - options = ['Play Against AI','Play Against AI (AI begins)','Play Against AI (Fast Play)','Playground','Let AI train'] + +if __name__ == '__main__': + options = ['Play Against AI', + 'Play Against AI (AI begins)', 'Play Against AI (Fast Play)', 'Playground', 'Let AI train'] opt = choose('?', options) if opt == options[0]: humanVsAi() elif opt == options[1]: - humanVsAi(bots[1,0]) + humanVsAi(bots[1, 0]) elif opt == options[2]: - humanVsAi(depth=2,noBg=True) + humanVsAi(depth=2, noBg=True) elif opt == options[3]: - humanVsAi(bots=[None,None]) + humanVsAi(bots=[None, None]) else: aiVsAiLoop()