diff --git a/brains/uttt.pth b/brains/uttt.pth index edcaf6f..eae2edd 100644 Binary files a/brains/uttt.pth and b/brains/uttt.pth differ diff --git a/ultimatetictactoe.py b/ultimatetictactoe.py index 585136b..57e3e1f 100644 --- a/ultimatetictactoe.py +++ b/ultimatetictactoe.py @@ -127,7 +127,11 @@ class TTTState(State): def getTensor(self, player=None, phase='default'): if player==None: player = self.curPlayer - return torch.tensor([self.symbToNum(b) for b in self.board]) + s = '' + for row in range(1, 10): + for col in range(1, 10): + s += self.board[self.index(row, col)] + return torch.tensor([self.symbToNum(b) for b in s]) @classmethod def getModel(cls, phase='default'): @@ -138,8 +142,7 @@ class Model(nn.Module): super().__init__() self.smolChan = 12 - self.bigChan = 5 - self.compChan = 3 + self.compChan = 7 self.smol = nn.Sequential( nn.Conv2d( @@ -152,35 +155,26 @@ class Model(nn.Module): nn.ReLU() ) self.big = nn.Sequential( - nn.Conv2d( - in_channels=self.smolChan, - out_channels=self.bigChan, - kernel_size=(3,3), - stride=3, - padding=0, - ), - nn.ReLU() - ) - self.out = nn.Sequential( - #nn.Linear(bigChan, 1), - nn.Linear(self.bigChan, self.compChan), + nn.Linear(self.smolChan*9, self.compChan), + #nn.ReLU(), + #nn.Linear(self.compChan, 1), nn.ReLU(), - nn.Linear(self.compChan, 1), + nn.Linear(self.compChan, 3), + nn.ReLU(), + nn.Linear(3, 1), nn.Sigmoid() ) def forward(self, x): x = torch.reshape(x, (1,9,9)) x = self.smol(x) - x = self.big(x) - x = torch.reshape(x, (self.bigChan,)) - #x = x.view(x.size(0), -1) - y = self.out(x) + x = torch.reshape(x, (self.smolChan*9,)) + y = self.big(x) return y if __name__=="__main__": run = NeuralRuntime(TTTState()) run.game(None, 4) - #trainer = Trainer(TTTState()) - #trainer.train() + trainer = Trainer(TTTState()) + trainer.train() diff --git a/vacuumDecay.py b/vacuumDecay.py index 05b1096..1fa68d9 100644 --- a/vacuumDecay.py +++ b/vacuumDecay.py @@ -186,6 +186,8 @@ class Node(): def _expand(self): self._childs = [] + if self.getWinner()!=None: + return actions = self.state.getAvaibleActions() for action in actions: newNode = Node(self.state.mutate(action), self.universe, self, action) @@ -284,11 +286,17 @@ class Node(): self._calcScore(p) def _calcScore(self, player): + winner = self.getWinner() + if winner!=None: + if winner==player: + self._scores[player] = 0.0 + else: + self._scores[player] = 1.0 + return if self.universe.scoreProvider == 'naive': self._scores[player] = self.state.getScoreFor(player) elif self.universe.scoreProvider == 'neural': self._scores[player] = self.state.getScoreNeural(self.universe.model, player) - else: raise Exception('Uknown Score-Provider') @@ -329,7 +337,7 @@ class Node(): s.append("[ -> "+str(self.lastAction)+" ]") s.append("[ turn: "+str(self.state.curPlayer)+" ]") s.append(str(self.state)) - s.append("[ score: "+str(self.getStrongFor(self.state.curPlayer))+" ]") + s.append("[ score: "+str(self.getScoreFor(0))+" ]") return '\n'.join(s) def choose(txt, options): @@ -452,7 +460,7 @@ class Trainer(Runtime): self.rootNode = Node(initState, universe = self.universe) self.terminal = None - def buildDatasetFromModel(self, model, depth=4, refining=False): + def buildDatasetFromModel(self, model, depth=4, refining=True): print('[*] Building Timeline') term = self.linearPlay(model, calcDepth=depth) if refining: @@ -462,8 +470,8 @@ class Trainer(Runtime): self.fanOut(term.parent.parent, depth=depth+1) return term - def fanOut(self, head, depth=10): - for d in range(max(3, depth-3)): + def fanOut(self, head, depth=4): + for d in range(max(1, depth-2)): head = head.parent head.forceStrong(depth) @@ -499,7 +507,7 @@ class Trainer(Runtime): loss_func = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr) term = self.buildDatasetFromModel(model, depth=calcDepth) - for r in range(16): + for r in range(64): loss_sum = 0 zeroLen = 0 for i, node in enumerate(self.timelineIter(term)):