Compare commits

..

No commits in common. "cac86ef6abe216b49b1ded903bcc6b142fb03b5f" and "6967243ae2f6cf48b5baf1837f948beb02aba707" have entirely different histories.

3 changed files with 96 additions and 134 deletions

Binary file not shown.

BIN
brains/uttt.pth.bak Normal file

Binary file not shown.

View File

@ -1,7 +1,3 @@
if __name__ == '__main__':
print('[!] VacuumDecay should not be started directly')
exit()
import os import os
import io import io
import time import time
@ -21,7 +17,6 @@ import random
import datetime import datetime
import pickle import pickle
class Action(): class Action():
# Should hold the data representing an action # Should hold the data representing an action
# Actions are applied to a State in State.mutate # Actions are applied to a State in State.mutate
@ -42,7 +37,6 @@ class Action():
# should start with < and end with > # should start with < and end with >
return "<P"+str(self.player)+"-"+str(self.data)+">" return "<P"+str(self.player)+"-"+str(self.data)+">"
class State(ABC): class State(ABC):
# Hold a representation of the current game-state # Hold a representation of the current game-state
# Allows retriving avaible actions (getAvaibleActions) and applying them (mutate) # Allows retriving avaible actions (getAvaibleActions) and applying them (mutate)
@ -105,7 +99,7 @@ class State(ABC):
@abstractmethod @abstractmethod
def getTensor(self, player=None, phase='default'): def getTensor(self, player=None, phase='default'):
if player == None: if player==None:
player = self.curPlayer player = self.curPlayer
return torch.tensor([0]) return torch.tensor([0])
@ -116,7 +110,6 @@ class State(ABC):
def getScoreNeural(self, model, player=None, phase='default'): def getScoreNeural(self, model, player=None, phase='default'):
return model(self.getTensor(player=player, phase=phase)).item() return model(self.getTensor(player=player, phase=phase)).item()
class Universe(): class Universe():
def __init__(self): def __init__(self):
self.scoreProvider = 'naive' self.scoreProvider = 'naive'
@ -136,12 +129,10 @@ class Universe():
def activateEdge(self, head): def activateEdge(self, head):
pass pass
@dataclass(order=True) @dataclass(order=True)
class PQItem: class PQItem:
priority: int priority: int
data: Any = field(compare=False) data: Any=field(compare=False)
class QueueingUniverse(Universe): class QueueingUniverse(Universe):
def __init__(self): def __init__(self):
@ -149,8 +140,8 @@ class QueueingUniverse(Universe):
self.pq = PriorityQueue() self.pq = PriorityQueue()
def newOpen(self, node): def newOpen(self, node):
item = PQItem(node.getPriority(), node) item = PQItem(node.getPriority(), node)
self.pq.put(item) self.pq.put(item)
def merge(self, node): def merge(self, node):
self.newOpen(node) self.newOpen(node)
@ -173,7 +164,7 @@ class QueueingUniverse(Universe):
class Node(): class Node():
def __init__(self, state, universe=None, parent=None, lastAction=None): def __init__(self, state, universe=None, parent=None, lastAction=None):
self.state = state self.state = state
if universe == None: if universe==None:
print('[!] No Universe defined. Spawning one...') print('[!] No Universe defined. Spawning one...')
universe = Universe() universe = Universe()
self.universe = universe self.universe = universe
@ -184,7 +175,7 @@ class Node():
self._scores = [None]*self.state.playersNum self._scores = [None]*self.state.playersNum
self._strongs = [None]*self.state.playersNum self._strongs = [None]*self.state.playersNum
self._alive = True self._alive = True
self._cascadeMemory = 0 # Used for our alternative to alpha-beta pruning self._cascadeMemory = 0 # Used for our alternative to alpha-beta pruning
def kill(self): def kill(self):
self._alive = False self._alive = False
@ -202,34 +193,31 @@ class Node():
self._childs = [] self._childs = []
actions = self.state.getAvaibleActions() actions = self.state.getAvaibleActions()
for action in actions: for action in actions:
newNode = Node(self.state.mutate(action), newNode = Node(self.state.mutate(action), self.universe, self, action)
self.universe, self, action)
self._childs.append(self.universe.merge(newNode)) self._childs.append(self.universe.merge(newNode))
def getStrongFor(self, player): def getStrongFor(self, player):
if self._strongs[player] != None: if self._strongs[player]!=None:
return self._strongs[player] return self._strongs[player]
else: else:
return self.getScoreFor(player) return self.getScoreFor(player)
def _pullStrong(self): # Currently Expecti-Max def _pullStrong(self): # Currently Expecti-Max
strongs = [None]*self.playersNum strongs = [None]*self.playersNum
for p in range(self.playersNum): for p in range(self.playersNum):
cp = self.state.curPlayer cp = self.state.curPlayer
if cp == p: # P owns the turn; controlls outcome if cp == p: # P owns the turn; controlls outcome
best = inf best = inf
for c in self.childs: for c in self.childs:
if c.getStrongFor(p) < best: if c.getStrongFor(p) < best:
best = c.getStrongFor(p) best = c.getStrongFor(p)
strongs[p] = best strongs[p] = best
else: else:
scos = [(c.getStrongFor(p), c.getStrongFor(cp)) scos = [(c.getStrongFor(p), c.getStrongFor(cp)) for c in self.childs]
for c in self.childs]
scos.sort(key=lambda x: x[1]) scos.sort(key=lambda x: x[1])
betterHalf = scos[:max(3, int(len(scos)/3))] betterHalf = scos[:max(3,int(len(scos)/3))]
myScores = [bh[0]**2 for bh in betterHalf] myScores = [bh[0]**2 for bh in betterHalf]
strongs[p] = sqrt(myScores[0]*0.75 + strongs[p] = sqrt(myScores[0]*0.75 + sum(myScores)/(len(myScores)*4))
sum(myScores)/(len(myScores)*4))
update = False update = False
for s in range(self.playersNum): for s in range(self.playersNum):
if strongs[s] != self._strongs[s]: if strongs[s] != self._strongs[s]:
@ -237,7 +225,7 @@ class Node():
break break
self._strongs = strongs self._strongs = strongs
if update: if update:
if self.parent != None: if self.parent!=None:
cascade = self.parent._pullStrong() cascade = self.parent._pullStrong()
else: else:
cascade = 2 cascade = 2
@ -247,7 +235,7 @@ class Node():
return 0 return 0
def forceStrong(self, depth=3): def forceStrong(self, depth=3):
if depth == 0: if depth==0:
self.strongDecay() self.strongDecay()
else: else:
if len(self.childs): if len(self.childs):
@ -283,13 +271,13 @@ class Node():
def scoresAvaible(self): def scoresAvaible(self):
for p in self._scores: for p in self._scores:
if p == None: if p==None:
return False return False
return True return True
def strongScoresAvaible(self): def strongScoresAvaible(self):
for p in self._strongs: for p in self._strongs:
if p == None: if p==None:
return False return False
return True return True
@ -302,10 +290,10 @@ class Node():
def _calcScore(self, player): def _calcScore(self, player):
winner = self._getWinner() winner = self._getWinner()
if winner != None: if winner!=None:
if winner == player: if winner==player:
self._scores[player] = 0.0 self._scores[player] = 0.0
elif winner == -1: elif winner==-1:
self._scores[player] = 2/3 self._scores[player] = 2/3
else: else:
self._scores[player] = 1.0 self._scores[player] = 1.0
@ -313,8 +301,7 @@ class Node():
if self.universe.scoreProvider == 'naive': if self.universe.scoreProvider == 'naive':
self._scores[player] = self.state.getScoreFor(player) self._scores[player] = self.state.getScoreFor(player)
elif self.universe.scoreProvider == 'neural': elif self.universe.scoreProvider == 'neural':
self._scores[player] = self.state.getScoreNeural( self._scores[player] = self.state.getScoreNeural(self.universe.model, player)
self.universe.model, player)
else: else:
raise Exception('Uknown Score-Provider') raise Exception('Uknown Score-Provider')
@ -340,7 +327,7 @@ class Node():
return self.state.checkWin() return self.state.checkWin()
def getWinner(self): def getWinner(self):
if len(self.childs) == 0: if len(self.childs)==0:
return -1 return -1
return self._getWinner() return self._getWinner()
@ -349,7 +336,7 @@ class Node():
self.universe.newOpen(self) self.universe.newOpen(self)
else: else:
for c in self.childs: for c in self.childs:
if c._cascadeMemory > 0.001*(dist-2) or random.random() < 0.01: if c._cascadeMemory > 0.001*(dist-2) or random.random()<0.01:
c._activateEdge(dist=dist+1) c._activateEdge(dist=dist+1)
def __str__(self): def __str__(self):
@ -363,29 +350,27 @@ class Node():
s.append("[ score: "+str(self.getScoreFor(0))+" ]") s.append("[ score: "+str(self.getScoreFor(0))+" ]")
return '\n'.join(s) return '\n'.join(s)
def choose(txt, options): def choose(txt, options):
while True: while True:
print('[*] '+txt) print('[*] '+txt)
for num, opt in enumerate(options): for num,opt in enumerate(options):
print('['+str(num+1)+'] ' + str(opt)) print('['+str(num+1)+'] ' + str(opt))
inp = input('[> ') inp = input('[> ')
try: try:
n = int(inp) n = int(inp)
if n in range(1, len(options)+1): if n in range(1,len(options)+1):
return options[n-1] return options[n-1]
except: except:
pass pass
for opt in options: for opt in options:
if inp == str(opt): if inp==str(opt):
return opt return opt
if len(inp) == 1: if len(inp)==1:
for opt in options: for opt in options:
if inp == str(opt)[0]: if inp==str(opt)[0]:
return opt return opt
print('[!] Invalid Input.') print('[!] Invalid Input.')
class Worker(): class Worker():
def __init__(self, universe): def __init__(self, universe):
self.universe = universe self.universe = universe
@ -398,7 +383,7 @@ class Worker():
def runLocal(self): def runLocal(self):
for i, node in enumerate(self.universe.iter()): for i, node in enumerate(self.universe.iter()):
if node == None: if node==None:
time.sleep(1) time.sleep(1)
if not self._alive: if not self._alive:
return return
@ -411,11 +396,10 @@ class Worker():
def revive(self): def revive(self):
self._alive = True self._alive = True
class Runtime(): class Runtime():
def __init__(self, initState): def __init__(self, initState):
universe = QueueingUniverse() universe = QueueingUniverse()
self.head = Node(initState, universe=universe) self.head = Node(initState, universe = universe)
_ = self.head.childs _ = self.head.childs
universe.newOpen(self.head) universe.newOpen(self.head)
@ -438,15 +422,15 @@ class Runtime():
def turn(self, bot=None, calcDepth=3, bg=True): def turn(self, bot=None, calcDepth=3, bg=True):
print(str(self.head)) print(str(self.head))
if bot == None: if bot==None:
c = choose('Select action?', ['human', 'bot', 'undo', 'qlen']) c = choose('Select action?', ['human', 'bot', 'undo', 'qlen'])
if c == 'undo': if c=='undo':
self.head = self.head.parent self.head = self.head.parent
return return
elif c == 'qlen': elif c=='qlen':
print(self.head.universe.pq.qsize()) print(self.head.universe.pq.qsize())
return return
bot = c == 'bot' bot = c=='bot'
if bot: if bot:
self.head.forceStrong(calcDepth) self.head.forceStrong(calcDepth)
opts = [] opts = []
@ -466,50 +450,25 @@ class Runtime():
def game(self, bots=None, calcDepth=7, bg=True): def game(self, bots=None, calcDepth=7, bg=True):
if bg: if bg:
self.spawnWorker() self.spawnWorker()
if bots == None: if bots==None:
bots = [None]*self.head.playersNum bots = [None]*self.head.playersNum
while self.head.getWinner() == None: while self.head.getWinner()==None:
self.turn(bots[self.head.curPlayer], calcDepth, bg=True) self.turn(bots[self.head.curPlayer], calcDepth, bg=True)
print(['O', 'X', 'No one'][self.head.getWinner()] + ' won!') print(['O','X','No one'][self.head.getWinner()] + ' won!')
if bg: if bg:
self.killWorker() self.killWorker()
def saveModel(self, model, gen):
dat = model.state_dict()
with open(self.getModelFileName(), 'wb') as f:
pickle.dump((gen, dat), f)
def loadModelState(self, model):
with open(self.getModelFileName(), 'rb') as f:
gen, dat = pickle.load(f)
model.load_state_dict(dat)
model.eval()
return gen
def loadModel(self):
model = self.head.state.getModel()
gen = self.loadModelState(model)
return model, gen
def getModelFileName(self):
return 'brains/utt.vac'
def saveToMemoryBank(self, term):
return
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
pickle.dump(term, f)
class NeuralRuntime(Runtime): class NeuralRuntime(Runtime):
def __init__(self, initState): def __init__(self, initState):
super().__init__(initState) super().__init__(initState)
model, gen = self.loadModel() model = self.head.state.getModel()
model.load_state_dict(torch.load('brains/uttt.pth'))
model.eval()
self.head.universe.model = model self.head.universe.model = model
self.head.universe.scoreProvider = 'neural' self.head.universe.scoreProvider = 'neural'
class Trainer(Runtime): class Trainer(Runtime):
def __init__(self, initState): def __init__(self, initState):
super().__init__(initState) super().__init__(initState)
@ -518,7 +477,7 @@ class Trainer(Runtime):
self.rootNode = self.head self.rootNode = self.head
self.terminal = None self.terminal = None
def buildDatasetFromModel(self, model, depth=4, refining=True, fanOut=[5, 5, 5, 5, 4, 4, 4, 4], uncertainSec=15, exacity=5): def buildDatasetFromModel(self, model, depth=4, refining=True, fanOut=[5,5,5,5,4,4,4,4], uncertainSec=15, exacity=5):
print('[*] Building Timeline') print('[*] Building Timeline')
term = self.linearPlay(model, calcDepth=depth, exacity=exacity) term = self.linearPlay(model, calcDepth=depth, exacity=exacity)
if refining: if refining:
@ -537,62 +496,42 @@ class Trainer(Runtime):
head = self.rootNode head = self.rootNode
self.universe.model = model self.universe.model = model
self.spawnWorker() self.spawnWorker()
while head.getWinner() == None: while head.getWinner()==None:
if verbose: if verbose:
print(head) print(head)
else: else:
print('.', end='', flush=True) print('.', end='', flush=True)
head.forceStrong(calcDepth) head.forceStrong(calcDepth)
opts = [] opts = []
if len(head.childs) == 0: if len(head.childs)==0:
break break
for c in head.childs: for c in head.childs:
opts.append((c, c.getStrongFor(head.curPlayer))) opts.append((c, c.getStrongFor(head.curPlayer)))
if firstNRandom: if firstNRandom:
firstNRandom -= 1 firstNRandom-=1
ind = int(random.random()*len(opts)) ind = int(random.random()*len(opts))
else: else:
opts.sort(key=lambda x: x[1]) opts.sort(key=lambda x: x[1])
if exacity >= 10: if exacity >= 10:
ind = 0 ind = 0
else: else:
ind = int(pow(random.random(), exacity)*(len(opts)-1)) ind = int(pow(random.random(),exacity)*(len(opts)-1))
head = opts[ind][0] head = opts[ind][0]
self.killWorker() self.killWorker()
if verbose: if verbose:
print(head) print(head)
print(' => '+['O', 'X', 'No one'][head.getWinner()] + ' won!') print(' => '+['O','X','No one'][head.getWinner()] + ' won!')
return head return head
def timelineIterSingle(self, term): def timelineIter(self, term):
for i in self.timelineIter(self, [term]): head = term
yield i
def timelineIter(self, terms, altChildPerNode=-1):
batch = len(terms)
heads = terms
while True: while True:
empty = True yield head
for b in range(batch): if len(head.childs):
head = heads[b] yield random.choice(head.childs)
if head == None: if head.parent == None:
continue
empty = False
yield head
if len(head.childs):
if altChildPerNode == -1: # all
for child in head.childs:
yield child
else:
for j in range(min(altChildPerNode, int(len(head.childs)/2))):
yield random.choice(head.childs)
if head.parent == None:
head = None
else:
head = head.parent
heads[b] = head
if empty:
return return
head = head.parent
def timelineExpandUncertain(self, term, secs): def timelineExpandUncertain(self, term, secs):
self.rootNode.universe.clearPQ() self.rootNode.universe.clearPQ()
@ -605,24 +544,20 @@ class Trainer(Runtime):
self.killWorker() self.killWorker()
print('') print('')
def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, terms=None, batch=16): def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, term=None):
loss_func = nn.MSELoss() loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr) optimizer = optim.Adam(model.parameters(), lr)
if terms == None: if term==None:
terms = [] term = self.buildDatasetFromModel(model, depth=calcDepth, exacity=exacity)
for i in range(batch):
terms.append(self.buildDatasetFromModel(
model, depth=calcDepth, exacity=exacity))
print('[*] Conditioning Brain') print('[*] Conditioning Brain')
for r in range(64): for r in range(64):
loss_sum = 0 loss_sum = 0
lLoss = 0 lLoss = 0
zeroLen = 0 zeroLen = 0
for i, node in enumerate(self.timelineIter(terms)): for i, node in enumerate(self.timelineIter(term)):
for p in range(self.rootNode.playersNum): for p in range(self.rootNode.playersNum):
inp = node.state.getTensor(player=p) inp = node.state.getTensor(player=p)
gol = torch.tensor( gol = torch.tensor([node.getStrongFor(p)], dtype=torch.float)
[node.getStrongFor(p)], dtype=torch.float)
out = model(inp) out = model(inp)
loss = loss_func(out, gol) loss = loss_func(out, gol)
optimizer.zero_grad() optimizer.zero_grad()
@ -630,10 +565,10 @@ class Trainer(Runtime):
optimizer.step() optimizer.step()
loss_sum += loss.item() loss_sum += loss.item()
if loss.item() == 0.0: if loss.item() == 0.0:
zeroLen += 1 zeroLen+=1
if zeroLen == 5: if zeroLen == 5:
break break
# print(loss_sum/i) #print(loss_sum/i)
if r > 16 and (loss_sum/i < cut or lLoss == loss_sum): if r > 16 and (loss_sum/i < cut or lLoss == loss_sum):
return loss_sum return loss_sum
lLoss = loss_sum lLoss = loss_sum
@ -641,25 +576,35 @@ class Trainer(Runtime):
def main(self, model=None, gens=1024, startGen=0): def main(self, model=None, gens=1024, startGen=0):
newModel = False newModel = False
if model == None: if model==None:
print('[!] No brain found. Creating new one...') print('[!] No brain found. Creating new one...')
newModel = True newModel = True
model = self.rootNode.state.getModel() model = self.rootNode.state.getModel()
self.universe.scoreProvider = ['neural', 'naive'][newModel] self.universe.scoreProvider = ['neural','naive'][newModel]
model.train() model.train()
for gen in range(startGen, startGen+gens): for gen in range(startGen, startGen+gens):
print('[#####] Gen '+str(gen)+' training:') print('[#####] Gen '+str(gen)+' training:')
loss = self.trainModel(model, calcDepth=min( loss = self.trainModel(model, calcDepth=min(4,3+int(gen/16)), exacity=int(gen/3+1))
4, 3+int(gen/16)), exacity=int(gen/3+1), batch=4)
print('[L] '+str(loss)) print('[L] '+str(loss))
self.universe.scoreProvider = 'neural' self.universe.scoreProvider = 'neural'
self.saveModel(model, gen) self.saveModel(model, gen)
def trainFromTerm(self, term): def saveModel(self, model, gen):
model, gen = self.loadModel() dat = model.state_dict()
self.universe.scoreProvider = 'neural' with open(self.getModelFileName(), 'wb') as f:
self.trainModel(model, calcDepth=4, exacity=10, term=term) pickle.dump((gen, dat), f)
self.saveModel(model)
def loadModelState(self, model):
with open(self.getModelFileName(), 'rb') as f:
gen, dat = pickle.load(f)
model.load_state_dict(dat)
model.eval()
return gen
def loadModel(self):
model = self.rootNode.state.getModel()
gen = self.loadModelState(model)
return model, gen
def train(self): def train(self):
if os.path.exists(self.getModelFileName()): if os.path.exists(self.getModelFileName()):
@ -667,3 +612,20 @@ class Trainer(Runtime):
self.main(model, startGen=gen+1) self.main(model, startGen=gen+1)
else: else:
self.main() self.main()
def getModelFileName(self):
return 'brains/utt.vac'
def trainFromTerm(self, term):
model = self.rootNode.state.getModel()
model.load_state_dict(torch.load('brains/uttt.vac'))
model.eval()
self.universe.scoreProvider = 'neural'
self.trainModel(model, calcDepth=4, exacity=10, term=term)
self.saveModel(model)
def saveToMemoryBank(self, term):
return
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
pickle.dump(term, f)