Wider timeline iter & beautify

This commit is contained in:
Dominik Moritz Roth 2022-05-18 19:02:51 +02:00
parent 6967243ae2
commit 4a018638d5

View File

@ -1,3 +1,7 @@
if __name__ == '__main__':
print('[!] VacuumDecay should not be started directly')
exit()
import os import os
import io import io
import time import time
@ -17,6 +21,7 @@ import random
import datetime import datetime
import pickle import pickle
class Action(): class Action():
# Should hold the data representing an action # Should hold the data representing an action
# Actions are applied to a State in State.mutate # Actions are applied to a State in State.mutate
@ -37,6 +42,7 @@ class Action():
# should start with < and end with > # should start with < and end with >
return "<P"+str(self.player)+"-"+str(self.data)+">" return "<P"+str(self.player)+"-"+str(self.data)+">"
class State(ABC): class State(ABC):
# Hold a representation of the current game-state # Hold a representation of the current game-state
# Allows retriving avaible actions (getAvaibleActions) and applying them (mutate) # Allows retriving avaible actions (getAvaibleActions) and applying them (mutate)
@ -110,6 +116,7 @@ class State(ABC):
def getScoreNeural(self, model, player=None, phase='default'): def getScoreNeural(self, model, player=None, phase='default'):
return model(self.getTensor(player=player, phase=phase)).item() return model(self.getTensor(player=player, phase=phase)).item()
class Universe(): class Universe():
def __init__(self): def __init__(self):
self.scoreProvider = 'naive' self.scoreProvider = 'naive'
@ -129,11 +136,13 @@ class Universe():
def activateEdge(self, head): def activateEdge(self, head):
pass pass
@dataclass(order=True) @dataclass(order=True)
class PQItem: class PQItem:
priority: int priority: int
data: Any = field(compare=False) data: Any = field(compare=False)
class QueueingUniverse(Universe): class QueueingUniverse(Universe):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
@ -193,7 +202,8 @@ class Node():
self._childs = [] self._childs = []
actions = self.state.getAvaibleActions() actions = self.state.getAvaibleActions()
for action in actions: for action in actions:
newNode = Node(self.state.mutate(action), self.universe, self, action) newNode = Node(self.state.mutate(action),
self.universe, self, action)
self._childs.append(self.universe.merge(newNode)) self._childs.append(self.universe.merge(newNode))
def getStrongFor(self, player): def getStrongFor(self, player):
@ -213,11 +223,13 @@ class Node():
best = c.getStrongFor(p) best = c.getStrongFor(p)
strongs[p] = best strongs[p] = best
else: else:
scos = [(c.getStrongFor(p), c.getStrongFor(cp)) for c in self.childs] scos = [(c.getStrongFor(p), c.getStrongFor(cp))
for c in self.childs]
scos.sort(key=lambda x: x[1]) scos.sort(key=lambda x: x[1])
betterHalf = scos[:max(3, int(len(scos)/3))] betterHalf = scos[:max(3, int(len(scos)/3))]
myScores = [bh[0]**2 for bh in betterHalf] myScores = [bh[0]**2 for bh in betterHalf]
strongs[p] = sqrt(myScores[0]*0.75 + sum(myScores)/(len(myScores)*4)) strongs[p] = sqrt(myScores[0]*0.75 +
sum(myScores)/(len(myScores)*4))
update = False update = False
for s in range(self.playersNum): for s in range(self.playersNum):
if strongs[s] != self._strongs[s]: if strongs[s] != self._strongs[s]:
@ -301,7 +313,8 @@ class Node():
if self.universe.scoreProvider == 'naive': if self.universe.scoreProvider == 'naive':
self._scores[player] = self.state.getScoreFor(player) self._scores[player] = self.state.getScoreFor(player)
elif self.universe.scoreProvider == 'neural': elif self.universe.scoreProvider == 'neural':
self._scores[player] = self.state.getScoreNeural(self.universe.model, player) self._scores[player] = self.state.getScoreNeural(
self.universe.model, player)
else: else:
raise Exception('Uknown Score-Provider') raise Exception('Uknown Score-Provider')
@ -350,6 +363,7 @@ class Node():
s.append("[ score: "+str(self.getScoreFor(0))+" ]") s.append("[ score: "+str(self.getScoreFor(0))+" ]")
return '\n'.join(s) return '\n'.join(s)
def choose(txt, options): def choose(txt, options):
while True: while True:
print('[*] '+txt) print('[*] '+txt)
@ -371,6 +385,7 @@ def choose(txt, options):
return opt return opt
print('[!] Invalid Input.') print('[!] Invalid Input.')
class Worker(): class Worker():
def __init__(self, universe): def __init__(self, universe):
self.universe = universe self.universe = universe
@ -396,6 +411,7 @@ class Worker():
def revive(self): def revive(self):
self._alive = True self._alive = True
class Runtime(): class Runtime():
def __init__(self, initState): def __init__(self, initState):
universe = QueueingUniverse() universe = QueueingUniverse()
@ -458,17 +474,42 @@ class Runtime():
if bg: if bg:
self.killWorker() self.killWorker()
def saveModel(self, model, gen):
dat = model.state_dict()
with open(self.getModelFileName(), 'wb') as f:
pickle.dump((gen, dat), f)
def loadModelState(self, model):
with open(self.getModelFileName(), 'rb') as f:
gen, dat = pickle.load(f)
model.load_state_dict(dat)
model.eval()
return gen
def loadModel(self):
model = self.head.state.getModel()
gen = self.loadModelState(model)
return model, gen
def getModelFileName(self):
return 'brains/utt.vac'
def saveToMemoryBank(self, term):
return
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
pickle.dump(term, f)
class NeuralRuntime(Runtime): class NeuralRuntime(Runtime):
def __init__(self, initState): def __init__(self, initState):
super().__init__(initState) super().__init__(initState)
model = self.head.state.getModel() model, gen = self.loadModel()
model.load_state_dict(torch.load('brains/uttt.pth'))
model.eval()
self.head.universe.model = model self.head.universe.model = model
self.head.universe.scoreProvider = 'neural' self.head.universe.scoreProvider = 'neural'
class Trainer(Runtime): class Trainer(Runtime):
def __init__(self, initState): def __init__(self, initState):
super().__init__(initState) super().__init__(initState)
@ -523,15 +564,35 @@ class Trainer(Runtime):
print(' => '+['O', 'X', 'No one'][head.getWinner()] + ' won!') print(' => '+['O', 'X', 'No one'][head.getWinner()] + ' won!')
return head return head
def timelineIter(self, term): def timelineIterSingle(self, term):
head = term for i in self.timelineIter(self, [term]):
yield i
def timelineIter(self, terms, altChildPerNode=-1):
batch = len(terms)
heads = terms
while True: while True:
empty = True
for b in range(batch):
head = heads[b]
if head == None:
continue
empty = False
yield head yield head
if len(head.childs): if len(head.childs):
if altChildPerNode == -1: # all
for child in head.childs:
yield child
else:
for j in range(min(altChildPerNode, int(len(head.childs)/2))):
yield random.choice(head.childs) yield random.choice(head.childs)
if head.parent == None: if head.parent == None:
return head = None
else:
head = head.parent head = head.parent
heads[b] = head
if empty:
return
def timelineExpandUncertain(self, term, secs): def timelineExpandUncertain(self, term, secs):
self.rootNode.universe.clearPQ() self.rootNode.universe.clearPQ()
@ -544,20 +605,24 @@ class Trainer(Runtime):
self.killWorker() self.killWorker()
print('') print('')
def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, term=None): def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, terms=None, batch=16):
loss_func = nn.MSELoss() loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr) optimizer = optim.Adam(model.parameters(), lr)
if term==None: if terms == None:
term = self.buildDatasetFromModel(model, depth=calcDepth, exacity=exacity) terms = []
for i in range(batch):
terms.append(self.buildDatasetFromModel(
model, depth=calcDepth, exacity=exacity))
print('[*] Conditioning Brain') print('[*] Conditioning Brain')
for r in range(64): for r in range(64):
loss_sum = 0 loss_sum = 0
lLoss = 0 lLoss = 0
zeroLen = 0 zeroLen = 0
for i, node in enumerate(self.timelineIter(term)): for i, node in enumerate(self.timelineIter(terms)):
for p in range(self.rootNode.playersNum): for p in range(self.rootNode.playersNum):
inp = node.state.getTensor(player=p) inp = node.state.getTensor(player=p)
gol = torch.tensor([node.getStrongFor(p)], dtype=torch.float) gol = torch.tensor(
[node.getStrongFor(p)], dtype=torch.float)
out = model(inp) out = model(inp)
loss = loss_func(out, gol) loss = loss_func(out, gol)
optimizer.zero_grad() optimizer.zero_grad()
@ -584,27 +649,17 @@ class Trainer(Runtime):
model.train() model.train()
for gen in range(startGen, startGen+gens): for gen in range(startGen, startGen+gens):
print('[#####] Gen '+str(gen)+' training:') print('[#####] Gen '+str(gen)+' training:')
loss = self.trainModel(model, calcDepth=min(4,3+int(gen/16)), exacity=int(gen/3+1)) loss = self.trainModel(model, calcDepth=min(
4, 3+int(gen/16)), exacity=int(gen/3+1), batch=4)
print('[L] '+str(loss)) print('[L] '+str(loss))
self.universe.scoreProvider = 'neural' self.universe.scoreProvider = 'neural'
self.saveModel(model, gen) self.saveModel(model, gen)
def saveModel(self, model, gen): def trainFromTerm(self, term):
dat = model.state_dict() model, gen = self.loadModel()
with open(self.getModelFileName(), 'wb') as f: self.universe.scoreProvider = 'neural'
pickle.dump((gen, dat), f) self.trainModel(model, calcDepth=4, exacity=10, term=term)
self.saveModel(model)
def loadModelState(self, model):
with open(self.getModelFileName(), 'rb') as f:
gen, dat = pickle.load(f)
model.load_state_dict(dat)
model.eval()
return gen
def loadModel(self):
model = self.rootNode.state.getModel()
gen = self.loadModelState(model)
return model, gen
def train(self): def train(self):
if os.path.exists(self.getModelFileName()): if os.path.exists(self.getModelFileName()):
@ -612,20 +667,3 @@ class Trainer(Runtime):
self.main(model, startGen=gen+1) self.main(model, startGen=gen+1)
else: else:
self.main() self.main()
def getModelFileName(self):
return 'brains/utt.vac'
def trainFromTerm(self, term):
model = self.rootNode.state.getModel()
model.load_state_dict(torch.load('brains/uttt.vac'))
model.eval()
self.universe.scoreProvider = 'neural'
self.trainModel(model, calcDepth=4, exacity=10, term=term)
self.saveModel(model)
def saveToMemoryBank(self, term):
return
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
pickle.dump(term, f)