Compare commits
No commits in common. "cac86ef6abe216b49b1ded903bcc6b142fb03b5f" and "6967243ae2f6cf48b5baf1837f948beb02aba707" have entirely different histories.
cac86ef6ab
...
6967243ae2
BIN
brains/utt.vac
BIN
brains/utt.vac
Binary file not shown.
BIN
brains/uttt.pth.bak
Normal file
BIN
brains/uttt.pth.bak
Normal file
Binary file not shown.
136
vacuumDecay.py
136
vacuumDecay.py
@ -1,7 +1,3 @@
|
||||
if __name__ == '__main__':
|
||||
print('[!] VacuumDecay should not be started directly')
|
||||
exit()
|
||||
|
||||
import os
|
||||
import io
|
||||
import time
|
||||
@ -21,7 +17,6 @@ import random
|
||||
import datetime
|
||||
import pickle
|
||||
|
||||
|
||||
class Action():
|
||||
# Should hold the data representing an action
|
||||
# Actions are applied to a State in State.mutate
|
||||
@ -42,7 +37,6 @@ class Action():
|
||||
# should start with < and end with >
|
||||
return "<P"+str(self.player)+"-"+str(self.data)+">"
|
||||
|
||||
|
||||
class State(ABC):
|
||||
# Hold a representation of the current game-state
|
||||
# Allows retriving avaible actions (getAvaibleActions) and applying them (mutate)
|
||||
@ -116,7 +110,6 @@ class State(ABC):
|
||||
def getScoreNeural(self, model, player=None, phase='default'):
|
||||
return model(self.getTensor(player=player, phase=phase)).item()
|
||||
|
||||
|
||||
class Universe():
|
||||
def __init__(self):
|
||||
self.scoreProvider = 'naive'
|
||||
@ -136,13 +129,11 @@ class Universe():
|
||||
def activateEdge(self, head):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(order=True)
|
||||
class PQItem:
|
||||
priority: int
|
||||
data: Any=field(compare=False)
|
||||
|
||||
|
||||
class QueueingUniverse(Universe):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@ -202,8 +193,7 @@ class Node():
|
||||
self._childs = []
|
||||
actions = self.state.getAvaibleActions()
|
||||
for action in actions:
|
||||
newNode = Node(self.state.mutate(action),
|
||||
self.universe, self, action)
|
||||
newNode = Node(self.state.mutate(action), self.universe, self, action)
|
||||
self._childs.append(self.universe.merge(newNode))
|
||||
|
||||
def getStrongFor(self, player):
|
||||
@ -223,13 +213,11 @@ class Node():
|
||||
best = c.getStrongFor(p)
|
||||
strongs[p] = best
|
||||
else:
|
||||
scos = [(c.getStrongFor(p), c.getStrongFor(cp))
|
||||
for c in self.childs]
|
||||
scos = [(c.getStrongFor(p), c.getStrongFor(cp)) for c in self.childs]
|
||||
scos.sort(key=lambda x: x[1])
|
||||
betterHalf = scos[:max(3,int(len(scos)/3))]
|
||||
myScores = [bh[0]**2 for bh in betterHalf]
|
||||
strongs[p] = sqrt(myScores[0]*0.75 +
|
||||
sum(myScores)/(len(myScores)*4))
|
||||
strongs[p] = sqrt(myScores[0]*0.75 + sum(myScores)/(len(myScores)*4))
|
||||
update = False
|
||||
for s in range(self.playersNum):
|
||||
if strongs[s] != self._strongs[s]:
|
||||
@ -313,8 +301,7 @@ class Node():
|
||||
if self.universe.scoreProvider == 'naive':
|
||||
self._scores[player] = self.state.getScoreFor(player)
|
||||
elif self.universe.scoreProvider == 'neural':
|
||||
self._scores[player] = self.state.getScoreNeural(
|
||||
self.universe.model, player)
|
||||
self._scores[player] = self.state.getScoreNeural(self.universe.model, player)
|
||||
else:
|
||||
raise Exception('Uknown Score-Provider')
|
||||
|
||||
@ -363,7 +350,6 @@ class Node():
|
||||
s.append("[ score: "+str(self.getScoreFor(0))+" ]")
|
||||
return '\n'.join(s)
|
||||
|
||||
|
||||
def choose(txt, options):
|
||||
while True:
|
||||
print('[*] '+txt)
|
||||
@ -385,7 +371,6 @@ def choose(txt, options):
|
||||
return opt
|
||||
print('[!] Invalid Input.')
|
||||
|
||||
|
||||
class Worker():
|
||||
def __init__(self, universe):
|
||||
self.universe = universe
|
||||
@ -411,7 +396,6 @@ class Worker():
|
||||
def revive(self):
|
||||
self._alive = True
|
||||
|
||||
|
||||
class Runtime():
|
||||
def __init__(self, initState):
|
||||
universe = QueueingUniverse()
|
||||
@ -474,42 +458,17 @@ class Runtime():
|
||||
if bg:
|
||||
self.killWorker()
|
||||
|
||||
def saveModel(self, model, gen):
|
||||
dat = model.state_dict()
|
||||
with open(self.getModelFileName(), 'wb') as f:
|
||||
pickle.dump((gen, dat), f)
|
||||
|
||||
def loadModelState(self, model):
|
||||
with open(self.getModelFileName(), 'rb') as f:
|
||||
gen, dat = pickle.load(f)
|
||||
model.load_state_dict(dat)
|
||||
model.eval()
|
||||
return gen
|
||||
|
||||
def loadModel(self):
|
||||
model = self.head.state.getModel()
|
||||
gen = self.loadModelState(model)
|
||||
return model, gen
|
||||
|
||||
def getModelFileName(self):
|
||||
return 'brains/utt.vac'
|
||||
|
||||
def saveToMemoryBank(self, term):
|
||||
return
|
||||
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
|
||||
pickle.dump(term, f)
|
||||
|
||||
|
||||
class NeuralRuntime(Runtime):
|
||||
def __init__(self, initState):
|
||||
super().__init__(initState)
|
||||
|
||||
model, gen = self.loadModel()
|
||||
model = self.head.state.getModel()
|
||||
model.load_state_dict(torch.load('brains/uttt.pth'))
|
||||
model.eval()
|
||||
|
||||
self.head.universe.model = model
|
||||
self.head.universe.scoreProvider = 'neural'
|
||||
|
||||
|
||||
class Trainer(Runtime):
|
||||
def __init__(self, initState):
|
||||
super().__init__(initState)
|
||||
@ -564,35 +523,15 @@ class Trainer(Runtime):
|
||||
print(' => '+['O','X','No one'][head.getWinner()] + ' won!')
|
||||
return head
|
||||
|
||||
def timelineIterSingle(self, term):
|
||||
for i in self.timelineIter(self, [term]):
|
||||
yield i
|
||||
|
||||
def timelineIter(self, terms, altChildPerNode=-1):
|
||||
batch = len(terms)
|
||||
heads = terms
|
||||
def timelineIter(self, term):
|
||||
head = term
|
||||
while True:
|
||||
empty = True
|
||||
for b in range(batch):
|
||||
head = heads[b]
|
||||
if head == None:
|
||||
continue
|
||||
empty = False
|
||||
yield head
|
||||
if len(head.childs):
|
||||
if altChildPerNode == -1: # all
|
||||
for child in head.childs:
|
||||
yield child
|
||||
else:
|
||||
for j in range(min(altChildPerNode, int(len(head.childs)/2))):
|
||||
yield random.choice(head.childs)
|
||||
if head.parent == None:
|
||||
head = None
|
||||
else:
|
||||
head = head.parent
|
||||
heads[b] = head
|
||||
if empty:
|
||||
return
|
||||
head = head.parent
|
||||
|
||||
def timelineExpandUncertain(self, term, secs):
|
||||
self.rootNode.universe.clearPQ()
|
||||
@ -605,24 +544,20 @@ class Trainer(Runtime):
|
||||
self.killWorker()
|
||||
print('')
|
||||
|
||||
def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, terms=None, batch=16):
|
||||
def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, term=None):
|
||||
loss_func = nn.MSELoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr)
|
||||
if terms == None:
|
||||
terms = []
|
||||
for i in range(batch):
|
||||
terms.append(self.buildDatasetFromModel(
|
||||
model, depth=calcDepth, exacity=exacity))
|
||||
if term==None:
|
||||
term = self.buildDatasetFromModel(model, depth=calcDepth, exacity=exacity)
|
||||
print('[*] Conditioning Brain')
|
||||
for r in range(64):
|
||||
loss_sum = 0
|
||||
lLoss = 0
|
||||
zeroLen = 0
|
||||
for i, node in enumerate(self.timelineIter(terms)):
|
||||
for i, node in enumerate(self.timelineIter(term)):
|
||||
for p in range(self.rootNode.playersNum):
|
||||
inp = node.state.getTensor(player=p)
|
||||
gol = torch.tensor(
|
||||
[node.getStrongFor(p)], dtype=torch.float)
|
||||
gol = torch.tensor([node.getStrongFor(p)], dtype=torch.float)
|
||||
out = model(inp)
|
||||
loss = loss_func(out, gol)
|
||||
optimizer.zero_grad()
|
||||
@ -649,17 +584,27 @@ class Trainer(Runtime):
|
||||
model.train()
|
||||
for gen in range(startGen, startGen+gens):
|
||||
print('[#####] Gen '+str(gen)+' training:')
|
||||
loss = self.trainModel(model, calcDepth=min(
|
||||
4, 3+int(gen/16)), exacity=int(gen/3+1), batch=4)
|
||||
loss = self.trainModel(model, calcDepth=min(4,3+int(gen/16)), exacity=int(gen/3+1))
|
||||
print('[L] '+str(loss))
|
||||
self.universe.scoreProvider = 'neural'
|
||||
self.saveModel(model, gen)
|
||||
|
||||
def trainFromTerm(self, term):
|
||||
model, gen = self.loadModel()
|
||||
self.universe.scoreProvider = 'neural'
|
||||
self.trainModel(model, calcDepth=4, exacity=10, term=term)
|
||||
self.saveModel(model)
|
||||
def saveModel(self, model, gen):
|
||||
dat = model.state_dict()
|
||||
with open(self.getModelFileName(), 'wb') as f:
|
||||
pickle.dump((gen, dat), f)
|
||||
|
||||
def loadModelState(self, model):
|
||||
with open(self.getModelFileName(), 'rb') as f:
|
||||
gen, dat = pickle.load(f)
|
||||
model.load_state_dict(dat)
|
||||
model.eval()
|
||||
return gen
|
||||
|
||||
def loadModel(self):
|
||||
model = self.rootNode.state.getModel()
|
||||
gen = self.loadModelState(model)
|
||||
return model, gen
|
||||
|
||||
def train(self):
|
||||
if os.path.exists(self.getModelFileName()):
|
||||
@ -667,3 +612,20 @@ class Trainer(Runtime):
|
||||
self.main(model, startGen=gen+1)
|
||||
else:
|
||||
self.main()
|
||||
|
||||
def getModelFileName(self):
|
||||
return 'brains/utt.vac'
|
||||
|
||||
def trainFromTerm(self, term):
|
||||
model = self.rootNode.state.getModel()
|
||||
model.load_state_dict(torch.load('brains/uttt.vac'))
|
||||
model.eval()
|
||||
self.universe.scoreProvider = 'neural'
|
||||
self.trainModel(model, calcDepth=4, exacity=10, term=term)
|
||||
self.saveModel(model)
|
||||
|
||||
def saveToMemoryBank(self, term):
|
||||
return
|
||||
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
|
||||
pickle.dump(term, f)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user