diff --git a/brains/uttt.pth b/brains/uttt.pth index eae2edd..200b673 100644 Binary files a/brains/uttt.pth and b/brains/uttt.pth differ diff --git a/ultimatetictactoe.py b/ultimatetictactoe.py index 57e3e1f..7e327d0 100644 --- a/ultimatetictactoe.py +++ b/ultimatetictactoe.py @@ -141,40 +141,47 @@ class Model(nn.Module): def __init__(self): super().__init__() - self.smolChan = 12 - self.compChan = 7 - self.smol = nn.Sequential( nn.Conv2d( in_channels=1, - out_channels=self.smolChan, + out_channels=24, kernel_size=(3,3), stride=3, padding=0, ), nn.ReLU() ) - self.big = nn.Sequential( - nn.Linear(self.smolChan*9, self.compChan), - #nn.ReLU(), - #nn.Linear(self.compChan, 1), + self.comb = nn.Sequential( + nn.Conv1d( + in_channels=24, + out_channels=8, + kernel_size=1, + stride=1, + padding=0, + ), + nn.ReLU() + ) + self.out = nn.Sequential( + nn.Linear(9*8, 32), nn.ReLU(), - nn.Linear(self.compChan, 3), + nn.Linear(32, 8), nn.ReLU(), - nn.Linear(3, 1), + nn.Linear(8, 1), nn.Sigmoid() ) def forward(self, x): x = torch.reshape(x, (1,9,9)) x = self.smol(x) - x = torch.reshape(x, (self.smolChan*9,)) - y = self.big(x) + x = torch.reshape(x, (24,9)) + x = self.comb(x) + x = torch.reshape(x, (-1,)) + y = self.out(x) return y if __name__=="__main__": run = NeuralRuntime(TTTState()) - run.game(None, 4) + run.game([0,1], 4) - trainer = Trainer(TTTState()) - trainer.train() + #trainer = Trainer(TTTState()) + #trainer.train() diff --git a/vacuumDecay.py b/vacuumDecay.py index 1fa68d9..2b20a2c 100644 --- a/vacuumDecay.py +++ b/vacuumDecay.py @@ -11,6 +11,7 @@ from threading import Event from queue import PriorityQueue, Empty from dataclasses import dataclass, field from typing import Any +import random class Action(): # Should hold the data representing an action @@ -388,6 +389,7 @@ class Runtime(): def __init__(self, initState): universe = QueueingUniverse() self.head = Node(initState, universe = universe) + _ = self.head.childs universe.newOpen(self.head) def spawnWorker(self): @@ -460,9 +462,9 @@ class Trainer(Runtime): self.rootNode = Node(initState, universe = self.universe) self.terminal = None - def buildDatasetFromModel(self, model, depth=4, refining=True): + def buildDatasetFromModel(self, model, depth=4, refining=True, exacity=5): print('[*] Building Timeline') - term = self.linearPlay(model, calcDepth=depth) + term = self.linearPlay(model, calcDepth=depth, exacity=exacity) if refining: print('[*] Refining Timeline') self.fanOut(term, depth=depth+1) @@ -475,7 +477,7 @@ class Trainer(Runtime): head = head.parent head.forceStrong(depth) - def linearPlay(self, model, calcDepth=7, verbose=True): + def linearPlay(self, model, calcDepth=7, exacity=5, verbose=True): head = self.rootNode self.universe.model = model while head.getWinner()==None: @@ -490,7 +492,10 @@ class Trainer(Runtime): for c in head.childs: opts.append((c, c.getStrongFor(head.curPlayer))) opts.sort(key=lambda x: x[1]) - ind = int(pow(random.random(),5)*(len(opts)-1)) + if exacity >= 10: + ind = 0 + else: + ind = int(pow(random.random(),exacity)*(len(opts)-1)) head = opts[ind][0] print('') return head @@ -499,16 +504,20 @@ class Trainer(Runtime): head = term while True: yield head + if len(head.childs): + yield random.choice(head.childs) if head.parent == None: return head = head.parent - def trainModel(self, model, lr=0.01, cut=0.01, calcDepth=4): + def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5): loss_func = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr) - term = self.buildDatasetFromModel(model, depth=calcDepth) + term = self.buildDatasetFromModel(model, depth=calcDepth, exacity=exacity) + print('[*] Conditioning Brain...') for r in range(64): loss_sum = 0 + lLoss = 0 zeroLen = 0 for i, node in enumerate(self.timelineIter(term)): for p in range(self.rootNode.playersNum): @@ -524,19 +533,20 @@ class Trainer(Runtime): zeroLen+=1 if zeroLen == 5: break - print(loss_sum/i) - if loss_sum/i < cut: + #print(loss_sum/i) + if r > 16 and (loss_sum/i < cut or lLoss == loss_sum): return + lLoss = loss_sum - def main(self, model=None, gens=64): + def main(self, model=None, gens=1024, startGen=12): newModel = False if model==None: newModel = True model = self.rootNode.state.getModel() self.universe.scoreProvider = ['neural','naive'][newModel] - for gen in range(gens): + for gen in range(startGen, startGen+gens): print('[#####] Gen '+str(gen)+' training:') - self.trainModel(model, calcDepth=3) + self.trainModel(model, calcDepth=min(5,3+int(gen/16)), exacity=int(gen/3+1)) self.universe.scoreProvider = 'neural' torch.save(model.state_dict(), 'brains/uttt.pth') @@ -544,4 +554,4 @@ class Trainer(Runtime): model = self.rootNode.state.getModel() model.load_state_dict(torch.load('brains/uttt.pth')) model.eval() - self.main(model) + self.main(model, startGen=0)