Does not work

This commit is contained in:
Dominik Moritz Roth 2022-04-15 00:32:48 +02:00
parent 5440c23378
commit de5137ecd3
3 changed files with 168 additions and 19 deletions

BIN
brains/uttt.pth Normal file

Binary file not shown.

View File

@ -115,13 +115,72 @@ class TTTState(State):
s.append("-"*(len(row_str)*2-1)) s.append("-"*(len(row_str)*2-1))
return '\n'.join(s) return '\n'.join(s)
def getTensor(self): def symbToNum(self, b):
pass if b == '.':
return 0.0
elif b == 'O':
return -1.0 + 2.0 * self.curPlayer
else:
return 1.0 - 2.0 * self.curPlayer
def getTensor(self, player=None, phase='default'):
if player==None:
player = self.curPlayer
return torch.tensor([self.symbToNum(b) for b in self.board])
@classmethod @classmethod
def getModel(): def getModel(cls, phase='default'):
pass return Model()
class Model(nn.Module):
def __init__(self):
super().__init__()
self.smolChan = 12
self.bigChan = 5
self.compChan = 3
self.smol = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=self.smolChan,
kernel_size=(3,3),
stride=3,
padding=0,
),
nn.ReLU()
)
self.big = nn.Sequential(
nn.Conv2d(
in_channels=self.smolChan,
out_channels=self.bigChan,
kernel_size=(3,3),
stride=3,
padding=0,
),
nn.ReLU()
)
self.out = nn.Sequential(
#nn.Linear(bigChan, 1),
nn.Linear(self.bigChan, self.compChan),
nn.ReLU(),
nn.Linear(self.compChan, 1),
nn.Sigmoid()
)
def forward(self, x):
x = torch.reshape(x, (1,9,9))
x = self.smol(x)
x = self.big(x)
x = torch.reshape(x, (self.bigChan,))
#x = x.view(x.size(0), -1)
y = self.out(x)
return y
if __name__=="__main__": if __name__=="__main__":
run = Runtime(TTTState()) run = NeuralRuntime(TTTState())
run.game([0,1], 4) run.game(None, 4)
#trainer = Trainer(TTTState())
#trainer.train()

View File

@ -2,7 +2,9 @@ import time
import random import random
import threading import threading
import torch import torch
from math import sqrt, inf import torch.nn as nn
from torch import optim
from math import sqrt, pow, inf
#from multiprocessing import Event #from multiprocessing import Event
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from threading import Event from threading import Event
@ -91,15 +93,17 @@ class State(ABC):
return "[#]" return "[#]"
@abstractmethod @abstractmethod
def getTensor(self, phase='default'): def getTensor(self, player=None, phase='default'):
if player==None:
player = self.curPlayer
return torch.tensor([0]) return torch.tensor([0])
@classmethod @classmethod
def getModel(): def getModel(cls, phase='default'):
pass pass
def getScoreNeural(self): def getScoreNeural(self, model, player=None, phase='default'):
return self.model(self.getTensor()) return model(self.getTensor(player=player, phase=phase)).item()
class Universe(): class Universe():
def __init__(self): def __init__(self):
@ -282,6 +286,9 @@ class Node():
def _calcScore(self, player): def _calcScore(self, player):
if self.universe.scoreProvider == 'naive': if self.universe.scoreProvider == 'naive':
self._scores[player] = self.state.getScoreFor(player) self._scores[player] = self.state.getScoreFor(player)
elif self.universe.scoreProvider == 'neural':
self._scores[player] = self.state.getScoreNeural(self.universe.model, player)
else: else:
raise Exception('Uknown Score-Provider') raise Exception('Uknown Score-Provider')
@ -322,7 +329,7 @@ class Node():
s.append("[ -> "+str(self.lastAction)+" ]") s.append("[ -> "+str(self.lastAction)+" ]")
s.append("[ turn: "+str(self.state.curPlayer)+" ]") s.append("[ turn: "+str(self.state.curPlayer)+" ]")
s.append(str(self.state)) s.append(str(self.state))
s.append("[ score: "+str(self.getSelfScore())+" ]") s.append("[ score: "+str(self.getStrongFor(self.state.curPlayer))+" ]")
return '\n'.join(s) return '\n'.join(s)
def choose(txt, options): def choose(txt, options):
@ -428,22 +435,105 @@ class Runtime():
print(self.head.getWinner() + ' won!') print(self.head.getWinner() + ' won!')
self.killWorker() self.killWorker()
class NeuralRuntime(Runtime):
def __init__(self, initState):
super().__init__(initState)
model = self.head.state.getModel()
model.load_state_dict(torch.load('brains/uttt.pth'))
model.eval()
self.head.universe.model = model
self.head.universe.scoreProvider = 'neural'
class Trainer(Runtime): class Trainer(Runtime):
def __init__(self, initState): def __init__(self, initState):
self.universe = Universe() self.universe = Universe()
self.rootNode = Node(initState, universe = self.universe) self.rootNode = Node(initState, universe = self.universe)
self.terminal = None self.terminal = None
def linearPlay(self, calcDepth=8): def buildDatasetFromModel(self, model, depth=4, refining=False):
head = rootNode print('[*] Building Timeline')
term = self.linearPlay(model, calcDepth=depth)
if refining:
print('[*] Refining Timeline')
self.fanOut(term, depth=depth+1)
self.fanOut(term.parent, depth=depth+1)
self.fanOut(term.parent.parent, depth=depth+1)
return term
def fanOut(self, head, depth=10):
for d in range(max(3, depth-3)):
head = head.parent
head.forceStrong(depth)
def linearPlay(self, model, calcDepth=7, verbose=True):
head = self.rootNode
self.universe.model = model
while head.getWinner()==None: while head.getWinner()==None:
self.head.forceStrong(calcDepth) if verbose:
print(head)
else:
print('.', end='', flush=True)
head.forceStrong(calcDepth)
opts = [] opts = []
for c in self.head.childs: if len(head.childs)==0:
opts.append((c, c.getStrongFor(self.head.curPlayer))) break
for c in head.childs:
opts.append((c, c.getStrongFor(head.curPlayer)))
opts.sort(key=lambda x: x[1]) opts.sort(key=lambda x: x[1])
ind = int(math.pow(random.random(),5)*len(opts)) ind = int(pow(random.random(),5)*(len(opts)-1))
head = opts[ind][0] head = opts[ind][0]
self.terminal = head print('')
return head return head
def timelineIter(self, term):
head = term
while True:
yield head
if head.parent == None:
return
head = head.parent
def trainModel(self, model, lr=0.01, cut=0.01, calcDepth=4):
loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)
term = self.buildDatasetFromModel(model, depth=calcDepth)
for r in range(16):
loss_sum = 0
zeroLen = 0
for i, node in enumerate(self.timelineIter(term)):
for p in range(self.rootNode.playersNum):
inp = node.state.getTensor(player=p)
gol = torch.tensor(node.getStrongFor(p), dtype=torch.float)
out = model(inp)
loss = loss_func(out, gol)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_sum += loss.item()
if loss.item() == 0.0:
zeroLen+=1
if zeroLen == 5:
break
print(loss_sum/i)
if loss_sum/i < cut:
return
def main(self, model=None, gens=64):
newModel = False
if model==None:
newModel = True
model = self.rootNode.state.getModel()
self.universe.scoreProvider = ['neural','naive'][newModel]
for gen in range(gens):
print('[#####] Gen '+str(gen)+' training:')
self.trainModel(model, calcDepth=3)
self.universe.scoreProvider = 'neural'
torch.save(model.state_dict(), 'brains/uttt.pth')
def train(self):
model = self.rootNode.state.getModel()
model.load_state_dict(torch.load('brains/uttt.pth'))
model.eval()
self.main(model)