Does not work
This commit is contained in:
parent
5440c23378
commit
de5137ecd3
BIN
brains/uttt.pth
Normal file
BIN
brains/uttt.pth
Normal file
Binary file not shown.
@ -115,13 +115,72 @@ class TTTState(State):
|
|||||||
s.append("-"*(len(row_str)*2-1))
|
s.append("-"*(len(row_str)*2-1))
|
||||||
return '\n'.join(s)
|
return '\n'.join(s)
|
||||||
|
|
||||||
def getTensor(self):
|
def symbToNum(self, b):
|
||||||
pass
|
if b == '.':
|
||||||
|
return 0.0
|
||||||
|
elif b == 'O':
|
||||||
|
return -1.0 + 2.0 * self.curPlayer
|
||||||
|
else:
|
||||||
|
return 1.0 - 2.0 * self.curPlayer
|
||||||
|
|
||||||
|
|
||||||
|
def getTensor(self, player=None, phase='default'):
|
||||||
|
if player==None:
|
||||||
|
player = self.curPlayer
|
||||||
|
return torch.tensor([self.symbToNum(b) for b in self.board])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getModel():
|
def getModel(cls, phase='default'):
|
||||||
pass
|
return Model()
|
||||||
|
|
||||||
|
class Model(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.smolChan = 12
|
||||||
|
self.bigChan = 5
|
||||||
|
self.compChan = 3
|
||||||
|
|
||||||
|
self.smol = nn.Sequential(
|
||||||
|
nn.Conv2d(
|
||||||
|
in_channels=1,
|
||||||
|
out_channels=self.smolChan,
|
||||||
|
kernel_size=(3,3),
|
||||||
|
stride=3,
|
||||||
|
padding=0,
|
||||||
|
),
|
||||||
|
nn.ReLU()
|
||||||
|
)
|
||||||
|
self.big = nn.Sequential(
|
||||||
|
nn.Conv2d(
|
||||||
|
in_channels=self.smolChan,
|
||||||
|
out_channels=self.bigChan,
|
||||||
|
kernel_size=(3,3),
|
||||||
|
stride=3,
|
||||||
|
padding=0,
|
||||||
|
),
|
||||||
|
nn.ReLU()
|
||||||
|
)
|
||||||
|
self.out = nn.Sequential(
|
||||||
|
#nn.Linear(bigChan, 1),
|
||||||
|
nn.Linear(self.bigChan, self.compChan),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(self.compChan, 1),
|
||||||
|
nn.Sigmoid()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = torch.reshape(x, (1,9,9))
|
||||||
|
x = self.smol(x)
|
||||||
|
x = self.big(x)
|
||||||
|
x = torch.reshape(x, (self.bigChan,))
|
||||||
|
#x = x.view(x.size(0), -1)
|
||||||
|
y = self.out(x)
|
||||||
|
return y
|
||||||
|
|
||||||
if __name__=="__main__":
|
if __name__=="__main__":
|
||||||
run = Runtime(TTTState())
|
run = NeuralRuntime(TTTState())
|
||||||
run.game([0,1], 4)
|
run.game(None, 4)
|
||||||
|
|
||||||
|
#trainer = Trainer(TTTState())
|
||||||
|
#trainer.train()
|
||||||
|
116
vacuumDecay.py
116
vacuumDecay.py
@ -2,7 +2,9 @@ import time
|
|||||||
import random
|
import random
|
||||||
import threading
|
import threading
|
||||||
import torch
|
import torch
|
||||||
from math import sqrt, inf
|
import torch.nn as nn
|
||||||
|
from torch import optim
|
||||||
|
from math import sqrt, pow, inf
|
||||||
#from multiprocessing import Event
|
#from multiprocessing import Event
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from threading import Event
|
from threading import Event
|
||||||
@ -91,15 +93,17 @@ class State(ABC):
|
|||||||
return "[#]"
|
return "[#]"
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def getTensor(self, phase='default'):
|
def getTensor(self, player=None, phase='default'):
|
||||||
|
if player==None:
|
||||||
|
player = self.curPlayer
|
||||||
return torch.tensor([0])
|
return torch.tensor([0])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getModel():
|
def getModel(cls, phase='default'):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def getScoreNeural(self):
|
def getScoreNeural(self, model, player=None, phase='default'):
|
||||||
return self.model(self.getTensor())
|
return model(self.getTensor(player=player, phase=phase)).item()
|
||||||
|
|
||||||
class Universe():
|
class Universe():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -282,6 +286,9 @@ class Node():
|
|||||||
def _calcScore(self, player):
|
def _calcScore(self, player):
|
||||||
if self.universe.scoreProvider == 'naive':
|
if self.universe.scoreProvider == 'naive':
|
||||||
self._scores[player] = self.state.getScoreFor(player)
|
self._scores[player] = self.state.getScoreFor(player)
|
||||||
|
elif self.universe.scoreProvider == 'neural':
|
||||||
|
self._scores[player] = self.state.getScoreNeural(self.universe.model, player)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise Exception('Uknown Score-Provider')
|
raise Exception('Uknown Score-Provider')
|
||||||
|
|
||||||
@ -322,7 +329,7 @@ class Node():
|
|||||||
s.append("[ -> "+str(self.lastAction)+" ]")
|
s.append("[ -> "+str(self.lastAction)+" ]")
|
||||||
s.append("[ turn: "+str(self.state.curPlayer)+" ]")
|
s.append("[ turn: "+str(self.state.curPlayer)+" ]")
|
||||||
s.append(str(self.state))
|
s.append(str(self.state))
|
||||||
s.append("[ score: "+str(self.getSelfScore())+" ]")
|
s.append("[ score: "+str(self.getStrongFor(self.state.curPlayer))+" ]")
|
||||||
return '\n'.join(s)
|
return '\n'.join(s)
|
||||||
|
|
||||||
def choose(txt, options):
|
def choose(txt, options):
|
||||||
@ -428,22 +435,105 @@ class Runtime():
|
|||||||
print(self.head.getWinner() + ' won!')
|
print(self.head.getWinner() + ' won!')
|
||||||
self.killWorker()
|
self.killWorker()
|
||||||
|
|
||||||
|
class NeuralRuntime(Runtime):
|
||||||
|
def __init__(self, initState):
|
||||||
|
super().__init__(initState)
|
||||||
|
|
||||||
|
model = self.head.state.getModel()
|
||||||
|
model.load_state_dict(torch.load('brains/uttt.pth'))
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
self.head.universe.model = model
|
||||||
|
self.head.universe.scoreProvider = 'neural'
|
||||||
|
|
||||||
class Trainer(Runtime):
|
class Trainer(Runtime):
|
||||||
def __init__(self, initState):
|
def __init__(self, initState):
|
||||||
self.universe = Universe()
|
self.universe = Universe()
|
||||||
self.rootNode = Node(initState, universe = self.universe)
|
self.rootNode = Node(initState, universe = self.universe)
|
||||||
self.terminal = None
|
self.terminal = None
|
||||||
|
|
||||||
def linearPlay(self, calcDepth=8):
|
def buildDatasetFromModel(self, model, depth=4, refining=False):
|
||||||
head = rootNode
|
print('[*] Building Timeline')
|
||||||
|
term = self.linearPlay(model, calcDepth=depth)
|
||||||
|
if refining:
|
||||||
|
print('[*] Refining Timeline')
|
||||||
|
self.fanOut(term, depth=depth+1)
|
||||||
|
self.fanOut(term.parent, depth=depth+1)
|
||||||
|
self.fanOut(term.parent.parent, depth=depth+1)
|
||||||
|
return term
|
||||||
|
|
||||||
|
def fanOut(self, head, depth=10):
|
||||||
|
for d in range(max(3, depth-3)):
|
||||||
|
head = head.parent
|
||||||
|
head.forceStrong(depth)
|
||||||
|
|
||||||
|
def linearPlay(self, model, calcDepth=7, verbose=True):
|
||||||
|
head = self.rootNode
|
||||||
|
self.universe.model = model
|
||||||
while head.getWinner()==None:
|
while head.getWinner()==None:
|
||||||
self.head.forceStrong(calcDepth)
|
if verbose:
|
||||||
|
print(head)
|
||||||
|
else:
|
||||||
|
print('.', end='', flush=True)
|
||||||
|
head.forceStrong(calcDepth)
|
||||||
opts = []
|
opts = []
|
||||||
for c in self.head.childs:
|
if len(head.childs)==0:
|
||||||
opts.append((c, c.getStrongFor(self.head.curPlayer)))
|
break
|
||||||
|
for c in head.childs:
|
||||||
|
opts.append((c, c.getStrongFor(head.curPlayer)))
|
||||||
opts.sort(key=lambda x: x[1])
|
opts.sort(key=lambda x: x[1])
|
||||||
ind = int(math.pow(random.random(),5)*len(opts))
|
ind = int(pow(random.random(),5)*(len(opts)-1))
|
||||||
head = opts[ind][0]
|
head = opts[ind][0]
|
||||||
self.terminal = head
|
print('')
|
||||||
return head
|
return head
|
||||||
|
|
||||||
|
def timelineIter(self, term):
|
||||||
|
head = term
|
||||||
|
while True:
|
||||||
|
yield head
|
||||||
|
if head.parent == None:
|
||||||
|
return
|
||||||
|
head = head.parent
|
||||||
|
|
||||||
|
def trainModel(self, model, lr=0.01, cut=0.01, calcDepth=4):
|
||||||
|
loss_func = nn.MSELoss()
|
||||||
|
optimizer = optim.Adam(model.parameters(), lr)
|
||||||
|
term = self.buildDatasetFromModel(model, depth=calcDepth)
|
||||||
|
for r in range(16):
|
||||||
|
loss_sum = 0
|
||||||
|
zeroLen = 0
|
||||||
|
for i, node in enumerate(self.timelineIter(term)):
|
||||||
|
for p in range(self.rootNode.playersNum):
|
||||||
|
inp = node.state.getTensor(player=p)
|
||||||
|
gol = torch.tensor(node.getStrongFor(p), dtype=torch.float)
|
||||||
|
out = model(inp)
|
||||||
|
loss = loss_func(out, gol)
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
loss_sum += loss.item()
|
||||||
|
if loss.item() == 0.0:
|
||||||
|
zeroLen+=1
|
||||||
|
if zeroLen == 5:
|
||||||
|
break
|
||||||
|
print(loss_sum/i)
|
||||||
|
if loss_sum/i < cut:
|
||||||
|
return
|
||||||
|
|
||||||
|
def main(self, model=None, gens=64):
|
||||||
|
newModel = False
|
||||||
|
if model==None:
|
||||||
|
newModel = True
|
||||||
|
model = self.rootNode.state.getModel()
|
||||||
|
self.universe.scoreProvider = ['neural','naive'][newModel]
|
||||||
|
for gen in range(gens):
|
||||||
|
print('[#####] Gen '+str(gen)+' training:')
|
||||||
|
self.trainModel(model, calcDepth=3)
|
||||||
|
self.universe.scoreProvider = 'neural'
|
||||||
|
torch.save(model.state_dict(), 'brains/uttt.pth')
|
||||||
|
|
||||||
|
def train(self):
|
||||||
|
model = self.rootNode.state.getModel()
|
||||||
|
model.load_state_dict(torch.load('brains/uttt.pth'))
|
||||||
|
model.eval()
|
||||||
|
self.main(model)
|
||||||
|
Loading…
Reference in New Issue
Block a user