Wider timeline iter & beautify
This commit is contained in:
parent
6967243ae2
commit
4a018638d5
230
vacuumDecay.py
230
vacuumDecay.py
@ -1,3 +1,7 @@
|
|||||||
|
if __name__ == '__main__':
|
||||||
|
print('[!] VacuumDecay should not be started directly')
|
||||||
|
exit()
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import io
|
import io
|
||||||
import time
|
import time
|
||||||
@ -17,6 +21,7 @@ import random
|
|||||||
import datetime
|
import datetime
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
class Action():
|
class Action():
|
||||||
# Should hold the data representing an action
|
# Should hold the data representing an action
|
||||||
# Actions are applied to a State in State.mutate
|
# Actions are applied to a State in State.mutate
|
||||||
@ -37,6 +42,7 @@ class Action():
|
|||||||
# should start with < and end with >
|
# should start with < and end with >
|
||||||
return "<P"+str(self.player)+"-"+str(self.data)+">"
|
return "<P"+str(self.player)+"-"+str(self.data)+">"
|
||||||
|
|
||||||
|
|
||||||
class State(ABC):
|
class State(ABC):
|
||||||
# Hold a representation of the current game-state
|
# Hold a representation of the current game-state
|
||||||
# Allows retriving avaible actions (getAvaibleActions) and applying them (mutate)
|
# Allows retriving avaible actions (getAvaibleActions) and applying them (mutate)
|
||||||
@ -99,7 +105,7 @@ class State(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def getTensor(self, player=None, phase='default'):
|
def getTensor(self, player=None, phase='default'):
|
||||||
if player==None:
|
if player == None:
|
||||||
player = self.curPlayer
|
player = self.curPlayer
|
||||||
return torch.tensor([0])
|
return torch.tensor([0])
|
||||||
|
|
||||||
@ -110,6 +116,7 @@ class State(ABC):
|
|||||||
def getScoreNeural(self, model, player=None, phase='default'):
|
def getScoreNeural(self, model, player=None, phase='default'):
|
||||||
return model(self.getTensor(player=player, phase=phase)).item()
|
return model(self.getTensor(player=player, phase=phase)).item()
|
||||||
|
|
||||||
|
|
||||||
class Universe():
|
class Universe():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.scoreProvider = 'naive'
|
self.scoreProvider = 'naive'
|
||||||
@ -129,10 +136,12 @@ class Universe():
|
|||||||
def activateEdge(self, head):
|
def activateEdge(self, head):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@dataclass(order=True)
|
@dataclass(order=True)
|
||||||
class PQItem:
|
class PQItem:
|
||||||
priority: int
|
priority: int
|
||||||
data: Any=field(compare=False)
|
data: Any = field(compare=False)
|
||||||
|
|
||||||
|
|
||||||
class QueueingUniverse(Universe):
|
class QueueingUniverse(Universe):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -140,8 +149,8 @@ class QueueingUniverse(Universe):
|
|||||||
self.pq = PriorityQueue()
|
self.pq = PriorityQueue()
|
||||||
|
|
||||||
def newOpen(self, node):
|
def newOpen(self, node):
|
||||||
item = PQItem(node.getPriority(), node)
|
item = PQItem(node.getPriority(), node)
|
||||||
self.pq.put(item)
|
self.pq.put(item)
|
||||||
|
|
||||||
def merge(self, node):
|
def merge(self, node):
|
||||||
self.newOpen(node)
|
self.newOpen(node)
|
||||||
@ -164,7 +173,7 @@ class QueueingUniverse(Universe):
|
|||||||
class Node():
|
class Node():
|
||||||
def __init__(self, state, universe=None, parent=None, lastAction=None):
|
def __init__(self, state, universe=None, parent=None, lastAction=None):
|
||||||
self.state = state
|
self.state = state
|
||||||
if universe==None:
|
if universe == None:
|
||||||
print('[!] No Universe defined. Spawning one...')
|
print('[!] No Universe defined. Spawning one...')
|
||||||
universe = Universe()
|
universe = Universe()
|
||||||
self.universe = universe
|
self.universe = universe
|
||||||
@ -175,7 +184,7 @@ class Node():
|
|||||||
self._scores = [None]*self.state.playersNum
|
self._scores = [None]*self.state.playersNum
|
||||||
self._strongs = [None]*self.state.playersNum
|
self._strongs = [None]*self.state.playersNum
|
||||||
self._alive = True
|
self._alive = True
|
||||||
self._cascadeMemory = 0 # Used for our alternative to alpha-beta pruning
|
self._cascadeMemory = 0 # Used for our alternative to alpha-beta pruning
|
||||||
|
|
||||||
def kill(self):
|
def kill(self):
|
||||||
self._alive = False
|
self._alive = False
|
||||||
@ -193,31 +202,34 @@ class Node():
|
|||||||
self._childs = []
|
self._childs = []
|
||||||
actions = self.state.getAvaibleActions()
|
actions = self.state.getAvaibleActions()
|
||||||
for action in actions:
|
for action in actions:
|
||||||
newNode = Node(self.state.mutate(action), self.universe, self, action)
|
newNode = Node(self.state.mutate(action),
|
||||||
|
self.universe, self, action)
|
||||||
self._childs.append(self.universe.merge(newNode))
|
self._childs.append(self.universe.merge(newNode))
|
||||||
|
|
||||||
def getStrongFor(self, player):
|
def getStrongFor(self, player):
|
||||||
if self._strongs[player]!=None:
|
if self._strongs[player] != None:
|
||||||
return self._strongs[player]
|
return self._strongs[player]
|
||||||
else:
|
else:
|
||||||
return self.getScoreFor(player)
|
return self.getScoreFor(player)
|
||||||
|
|
||||||
def _pullStrong(self): # Currently Expecti-Max
|
def _pullStrong(self): # Currently Expecti-Max
|
||||||
strongs = [None]*self.playersNum
|
strongs = [None]*self.playersNum
|
||||||
for p in range(self.playersNum):
|
for p in range(self.playersNum):
|
||||||
cp = self.state.curPlayer
|
cp = self.state.curPlayer
|
||||||
if cp == p: # P owns the turn; controlls outcome
|
if cp == p: # P owns the turn; controlls outcome
|
||||||
best = inf
|
best = inf
|
||||||
for c in self.childs:
|
for c in self.childs:
|
||||||
if c.getStrongFor(p) < best:
|
if c.getStrongFor(p) < best:
|
||||||
best = c.getStrongFor(p)
|
best = c.getStrongFor(p)
|
||||||
strongs[p] = best
|
strongs[p] = best
|
||||||
else:
|
else:
|
||||||
scos = [(c.getStrongFor(p), c.getStrongFor(cp)) for c in self.childs]
|
scos = [(c.getStrongFor(p), c.getStrongFor(cp))
|
||||||
|
for c in self.childs]
|
||||||
scos.sort(key=lambda x: x[1])
|
scos.sort(key=lambda x: x[1])
|
||||||
betterHalf = scos[:max(3,int(len(scos)/3))]
|
betterHalf = scos[:max(3, int(len(scos)/3))]
|
||||||
myScores = [bh[0]**2 for bh in betterHalf]
|
myScores = [bh[0]**2 for bh in betterHalf]
|
||||||
strongs[p] = sqrt(myScores[0]*0.75 + sum(myScores)/(len(myScores)*4))
|
strongs[p] = sqrt(myScores[0]*0.75 +
|
||||||
|
sum(myScores)/(len(myScores)*4))
|
||||||
update = False
|
update = False
|
||||||
for s in range(self.playersNum):
|
for s in range(self.playersNum):
|
||||||
if strongs[s] != self._strongs[s]:
|
if strongs[s] != self._strongs[s]:
|
||||||
@ -225,7 +237,7 @@ class Node():
|
|||||||
break
|
break
|
||||||
self._strongs = strongs
|
self._strongs = strongs
|
||||||
if update:
|
if update:
|
||||||
if self.parent!=None:
|
if self.parent != None:
|
||||||
cascade = self.parent._pullStrong()
|
cascade = self.parent._pullStrong()
|
||||||
else:
|
else:
|
||||||
cascade = 2
|
cascade = 2
|
||||||
@ -235,7 +247,7 @@ class Node():
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
def forceStrong(self, depth=3):
|
def forceStrong(self, depth=3):
|
||||||
if depth==0:
|
if depth == 0:
|
||||||
self.strongDecay()
|
self.strongDecay()
|
||||||
else:
|
else:
|
||||||
if len(self.childs):
|
if len(self.childs):
|
||||||
@ -271,13 +283,13 @@ class Node():
|
|||||||
|
|
||||||
def scoresAvaible(self):
|
def scoresAvaible(self):
|
||||||
for p in self._scores:
|
for p in self._scores:
|
||||||
if p==None:
|
if p == None:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def strongScoresAvaible(self):
|
def strongScoresAvaible(self):
|
||||||
for p in self._strongs:
|
for p in self._strongs:
|
||||||
if p==None:
|
if p == None:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -290,10 +302,10 @@ class Node():
|
|||||||
|
|
||||||
def _calcScore(self, player):
|
def _calcScore(self, player):
|
||||||
winner = self._getWinner()
|
winner = self._getWinner()
|
||||||
if winner!=None:
|
if winner != None:
|
||||||
if winner==player:
|
if winner == player:
|
||||||
self._scores[player] = 0.0
|
self._scores[player] = 0.0
|
||||||
elif winner==-1:
|
elif winner == -1:
|
||||||
self._scores[player] = 2/3
|
self._scores[player] = 2/3
|
||||||
else:
|
else:
|
||||||
self._scores[player] = 1.0
|
self._scores[player] = 1.0
|
||||||
@ -301,7 +313,8 @@ class Node():
|
|||||||
if self.universe.scoreProvider == 'naive':
|
if self.universe.scoreProvider == 'naive':
|
||||||
self._scores[player] = self.state.getScoreFor(player)
|
self._scores[player] = self.state.getScoreFor(player)
|
||||||
elif self.universe.scoreProvider == 'neural':
|
elif self.universe.scoreProvider == 'neural':
|
||||||
self._scores[player] = self.state.getScoreNeural(self.universe.model, player)
|
self._scores[player] = self.state.getScoreNeural(
|
||||||
|
self.universe.model, player)
|
||||||
else:
|
else:
|
||||||
raise Exception('Uknown Score-Provider')
|
raise Exception('Uknown Score-Provider')
|
||||||
|
|
||||||
@ -327,7 +340,7 @@ class Node():
|
|||||||
return self.state.checkWin()
|
return self.state.checkWin()
|
||||||
|
|
||||||
def getWinner(self):
|
def getWinner(self):
|
||||||
if len(self.childs)==0:
|
if len(self.childs) == 0:
|
||||||
return -1
|
return -1
|
||||||
return self._getWinner()
|
return self._getWinner()
|
||||||
|
|
||||||
@ -336,7 +349,7 @@ class Node():
|
|||||||
self.universe.newOpen(self)
|
self.universe.newOpen(self)
|
||||||
else:
|
else:
|
||||||
for c in self.childs:
|
for c in self.childs:
|
||||||
if c._cascadeMemory > 0.001*(dist-2) or random.random()<0.01:
|
if c._cascadeMemory > 0.001*(dist-2) or random.random() < 0.01:
|
||||||
c._activateEdge(dist=dist+1)
|
c._activateEdge(dist=dist+1)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
@ -350,27 +363,29 @@ class Node():
|
|||||||
s.append("[ score: "+str(self.getScoreFor(0))+" ]")
|
s.append("[ score: "+str(self.getScoreFor(0))+" ]")
|
||||||
return '\n'.join(s)
|
return '\n'.join(s)
|
||||||
|
|
||||||
|
|
||||||
def choose(txt, options):
|
def choose(txt, options):
|
||||||
while True:
|
while True:
|
||||||
print('[*] '+txt)
|
print('[*] '+txt)
|
||||||
for num,opt in enumerate(options):
|
for num, opt in enumerate(options):
|
||||||
print('['+str(num+1)+'] ' + str(opt))
|
print('['+str(num+1)+'] ' + str(opt))
|
||||||
inp = input('[> ')
|
inp = input('[> ')
|
||||||
try:
|
try:
|
||||||
n = int(inp)
|
n = int(inp)
|
||||||
if n in range(1,len(options)+1):
|
if n in range(1, len(options)+1):
|
||||||
return options[n-1]
|
return options[n-1]
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
for opt in options:
|
for opt in options:
|
||||||
if inp==str(opt):
|
if inp == str(opt):
|
||||||
return opt
|
return opt
|
||||||
if len(inp)==1:
|
if len(inp) == 1:
|
||||||
for opt in options:
|
for opt in options:
|
||||||
if inp==str(opt)[0]:
|
if inp == str(opt)[0]:
|
||||||
return opt
|
return opt
|
||||||
print('[!] Invalid Input.')
|
print('[!] Invalid Input.')
|
||||||
|
|
||||||
|
|
||||||
class Worker():
|
class Worker():
|
||||||
def __init__(self, universe):
|
def __init__(self, universe):
|
||||||
self.universe = universe
|
self.universe = universe
|
||||||
@ -383,7 +398,7 @@ class Worker():
|
|||||||
|
|
||||||
def runLocal(self):
|
def runLocal(self):
|
||||||
for i, node in enumerate(self.universe.iter()):
|
for i, node in enumerate(self.universe.iter()):
|
||||||
if node==None:
|
if node == None:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
if not self._alive:
|
if not self._alive:
|
||||||
return
|
return
|
||||||
@ -396,10 +411,11 @@ class Worker():
|
|||||||
def revive(self):
|
def revive(self):
|
||||||
self._alive = True
|
self._alive = True
|
||||||
|
|
||||||
|
|
||||||
class Runtime():
|
class Runtime():
|
||||||
def __init__(self, initState):
|
def __init__(self, initState):
|
||||||
universe = QueueingUniverse()
|
universe = QueueingUniverse()
|
||||||
self.head = Node(initState, universe = universe)
|
self.head = Node(initState, universe=universe)
|
||||||
_ = self.head.childs
|
_ = self.head.childs
|
||||||
universe.newOpen(self.head)
|
universe.newOpen(self.head)
|
||||||
|
|
||||||
@ -422,15 +438,15 @@ class Runtime():
|
|||||||
|
|
||||||
def turn(self, bot=None, calcDepth=3, bg=True):
|
def turn(self, bot=None, calcDepth=3, bg=True):
|
||||||
print(str(self.head))
|
print(str(self.head))
|
||||||
if bot==None:
|
if bot == None:
|
||||||
c = choose('Select action?', ['human', 'bot', 'undo', 'qlen'])
|
c = choose('Select action?', ['human', 'bot', 'undo', 'qlen'])
|
||||||
if c=='undo':
|
if c == 'undo':
|
||||||
self.head = self.head.parent
|
self.head = self.head.parent
|
||||||
return
|
return
|
||||||
elif c=='qlen':
|
elif c == 'qlen':
|
||||||
print(self.head.universe.pq.qsize())
|
print(self.head.universe.pq.qsize())
|
||||||
return
|
return
|
||||||
bot = c=='bot'
|
bot = c == 'bot'
|
||||||
if bot:
|
if bot:
|
||||||
self.head.forceStrong(calcDepth)
|
self.head.forceStrong(calcDepth)
|
||||||
opts = []
|
opts = []
|
||||||
@ -450,25 +466,50 @@ class Runtime():
|
|||||||
def game(self, bots=None, calcDepth=7, bg=True):
|
def game(self, bots=None, calcDepth=7, bg=True):
|
||||||
if bg:
|
if bg:
|
||||||
self.spawnWorker()
|
self.spawnWorker()
|
||||||
if bots==None:
|
if bots == None:
|
||||||
bots = [None]*self.head.playersNum
|
bots = [None]*self.head.playersNum
|
||||||
while self.head.getWinner()==None:
|
while self.head.getWinner() == None:
|
||||||
self.turn(bots[self.head.curPlayer], calcDepth, bg=True)
|
self.turn(bots[self.head.curPlayer], calcDepth, bg=True)
|
||||||
print(['O','X','No one'][self.head.getWinner()] + ' won!')
|
print(['O', 'X', 'No one'][self.head.getWinner()] + ' won!')
|
||||||
if bg:
|
if bg:
|
||||||
self.killWorker()
|
self.killWorker()
|
||||||
|
|
||||||
|
def saveModel(self, model, gen):
|
||||||
|
dat = model.state_dict()
|
||||||
|
with open(self.getModelFileName(), 'wb') as f:
|
||||||
|
pickle.dump((gen, dat), f)
|
||||||
|
|
||||||
|
def loadModelState(self, model):
|
||||||
|
with open(self.getModelFileName(), 'rb') as f:
|
||||||
|
gen, dat = pickle.load(f)
|
||||||
|
model.load_state_dict(dat)
|
||||||
|
model.eval()
|
||||||
|
return gen
|
||||||
|
|
||||||
|
def loadModel(self):
|
||||||
|
model = self.head.state.getModel()
|
||||||
|
gen = self.loadModelState(model)
|
||||||
|
return model, gen
|
||||||
|
|
||||||
|
def getModelFileName(self):
|
||||||
|
return 'brains/utt.vac'
|
||||||
|
|
||||||
|
def saveToMemoryBank(self, term):
|
||||||
|
return
|
||||||
|
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
|
||||||
|
pickle.dump(term, f)
|
||||||
|
|
||||||
|
|
||||||
class NeuralRuntime(Runtime):
|
class NeuralRuntime(Runtime):
|
||||||
def __init__(self, initState):
|
def __init__(self, initState):
|
||||||
super().__init__(initState)
|
super().__init__(initState)
|
||||||
|
|
||||||
model = self.head.state.getModel()
|
model, gen = self.loadModel()
|
||||||
model.load_state_dict(torch.load('brains/uttt.pth'))
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
self.head.universe.model = model
|
self.head.universe.model = model
|
||||||
self.head.universe.scoreProvider = 'neural'
|
self.head.universe.scoreProvider = 'neural'
|
||||||
|
|
||||||
|
|
||||||
class Trainer(Runtime):
|
class Trainer(Runtime):
|
||||||
def __init__(self, initState):
|
def __init__(self, initState):
|
||||||
super().__init__(initState)
|
super().__init__(initState)
|
||||||
@ -477,7 +518,7 @@ class Trainer(Runtime):
|
|||||||
self.rootNode = self.head
|
self.rootNode = self.head
|
||||||
self.terminal = None
|
self.terminal = None
|
||||||
|
|
||||||
def buildDatasetFromModel(self, model, depth=4, refining=True, fanOut=[5,5,5,5,4,4,4,4], uncertainSec=15, exacity=5):
|
def buildDatasetFromModel(self, model, depth=4, refining=True, fanOut=[5, 5, 5, 5, 4, 4, 4, 4], uncertainSec=15, exacity=5):
|
||||||
print('[*] Building Timeline')
|
print('[*] Building Timeline')
|
||||||
term = self.linearPlay(model, calcDepth=depth, exacity=exacity)
|
term = self.linearPlay(model, calcDepth=depth, exacity=exacity)
|
||||||
if refining:
|
if refining:
|
||||||
@ -496,42 +537,62 @@ class Trainer(Runtime):
|
|||||||
head = self.rootNode
|
head = self.rootNode
|
||||||
self.universe.model = model
|
self.universe.model = model
|
||||||
self.spawnWorker()
|
self.spawnWorker()
|
||||||
while head.getWinner()==None:
|
while head.getWinner() == None:
|
||||||
if verbose:
|
if verbose:
|
||||||
print(head)
|
print(head)
|
||||||
else:
|
else:
|
||||||
print('.', end='', flush=True)
|
print('.', end='', flush=True)
|
||||||
head.forceStrong(calcDepth)
|
head.forceStrong(calcDepth)
|
||||||
opts = []
|
opts = []
|
||||||
if len(head.childs)==0:
|
if len(head.childs) == 0:
|
||||||
break
|
break
|
||||||
for c in head.childs:
|
for c in head.childs:
|
||||||
opts.append((c, c.getStrongFor(head.curPlayer)))
|
opts.append((c, c.getStrongFor(head.curPlayer)))
|
||||||
if firstNRandom:
|
if firstNRandom:
|
||||||
firstNRandom-=1
|
firstNRandom -= 1
|
||||||
ind = int(random.random()*len(opts))
|
ind = int(random.random()*len(opts))
|
||||||
else:
|
else:
|
||||||
opts.sort(key=lambda x: x[1])
|
opts.sort(key=lambda x: x[1])
|
||||||
if exacity >= 10:
|
if exacity >= 10:
|
||||||
ind = 0
|
ind = 0
|
||||||
else:
|
else:
|
||||||
ind = int(pow(random.random(),exacity)*(len(opts)-1))
|
ind = int(pow(random.random(), exacity)*(len(opts)-1))
|
||||||
head = opts[ind][0]
|
head = opts[ind][0]
|
||||||
self.killWorker()
|
self.killWorker()
|
||||||
if verbose:
|
if verbose:
|
||||||
print(head)
|
print(head)
|
||||||
print(' => '+['O','X','No one'][head.getWinner()] + ' won!')
|
print(' => '+['O', 'X', 'No one'][head.getWinner()] + ' won!')
|
||||||
return head
|
return head
|
||||||
|
|
||||||
def timelineIter(self, term):
|
def timelineIterSingle(self, term):
|
||||||
head = term
|
for i in self.timelineIter(self, [term]):
|
||||||
|
yield i
|
||||||
|
|
||||||
|
def timelineIter(self, terms, altChildPerNode=-1):
|
||||||
|
batch = len(terms)
|
||||||
|
heads = terms
|
||||||
while True:
|
while True:
|
||||||
yield head
|
empty = True
|
||||||
if len(head.childs):
|
for b in range(batch):
|
||||||
yield random.choice(head.childs)
|
head = heads[b]
|
||||||
if head.parent == None:
|
if head == None:
|
||||||
|
continue
|
||||||
|
empty = False
|
||||||
|
yield head
|
||||||
|
if len(head.childs):
|
||||||
|
if altChildPerNode == -1: # all
|
||||||
|
for child in head.childs:
|
||||||
|
yield child
|
||||||
|
else:
|
||||||
|
for j in range(min(altChildPerNode, int(len(head.childs)/2))):
|
||||||
|
yield random.choice(head.childs)
|
||||||
|
if head.parent == None:
|
||||||
|
head = None
|
||||||
|
else:
|
||||||
|
head = head.parent
|
||||||
|
heads[b] = head
|
||||||
|
if empty:
|
||||||
return
|
return
|
||||||
head = head.parent
|
|
||||||
|
|
||||||
def timelineExpandUncertain(self, term, secs):
|
def timelineExpandUncertain(self, term, secs):
|
||||||
self.rootNode.universe.clearPQ()
|
self.rootNode.universe.clearPQ()
|
||||||
@ -544,20 +605,24 @@ class Trainer(Runtime):
|
|||||||
self.killWorker()
|
self.killWorker()
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, term=None):
|
def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, terms=None, batch=16):
|
||||||
loss_func = nn.MSELoss()
|
loss_func = nn.MSELoss()
|
||||||
optimizer = optim.Adam(model.parameters(), lr)
|
optimizer = optim.Adam(model.parameters(), lr)
|
||||||
if term==None:
|
if terms == None:
|
||||||
term = self.buildDatasetFromModel(model, depth=calcDepth, exacity=exacity)
|
terms = []
|
||||||
|
for i in range(batch):
|
||||||
|
terms.append(self.buildDatasetFromModel(
|
||||||
|
model, depth=calcDepth, exacity=exacity))
|
||||||
print('[*] Conditioning Brain')
|
print('[*] Conditioning Brain')
|
||||||
for r in range(64):
|
for r in range(64):
|
||||||
loss_sum = 0
|
loss_sum = 0
|
||||||
lLoss = 0
|
lLoss = 0
|
||||||
zeroLen = 0
|
zeroLen = 0
|
||||||
for i, node in enumerate(self.timelineIter(term)):
|
for i, node in enumerate(self.timelineIter(terms)):
|
||||||
for p in range(self.rootNode.playersNum):
|
for p in range(self.rootNode.playersNum):
|
||||||
inp = node.state.getTensor(player=p)
|
inp = node.state.getTensor(player=p)
|
||||||
gol = torch.tensor([node.getStrongFor(p)], dtype=torch.float)
|
gol = torch.tensor(
|
||||||
|
[node.getStrongFor(p)], dtype=torch.float)
|
||||||
out = model(inp)
|
out = model(inp)
|
||||||
loss = loss_func(out, gol)
|
loss = loss_func(out, gol)
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
@ -565,10 +630,10 @@ class Trainer(Runtime):
|
|||||||
optimizer.step()
|
optimizer.step()
|
||||||
loss_sum += loss.item()
|
loss_sum += loss.item()
|
||||||
if loss.item() == 0.0:
|
if loss.item() == 0.0:
|
||||||
zeroLen+=1
|
zeroLen += 1
|
||||||
if zeroLen == 5:
|
if zeroLen == 5:
|
||||||
break
|
break
|
||||||
#print(loss_sum/i)
|
# print(loss_sum/i)
|
||||||
if r > 16 and (loss_sum/i < cut or lLoss == loss_sum):
|
if r > 16 and (loss_sum/i < cut or lLoss == loss_sum):
|
||||||
return loss_sum
|
return loss_sum
|
||||||
lLoss = loss_sum
|
lLoss = loss_sum
|
||||||
@ -576,35 +641,25 @@ class Trainer(Runtime):
|
|||||||
|
|
||||||
def main(self, model=None, gens=1024, startGen=0):
|
def main(self, model=None, gens=1024, startGen=0):
|
||||||
newModel = False
|
newModel = False
|
||||||
if model==None:
|
if model == None:
|
||||||
print('[!] No brain found. Creating new one...')
|
print('[!] No brain found. Creating new one...')
|
||||||
newModel = True
|
newModel = True
|
||||||
model = self.rootNode.state.getModel()
|
model = self.rootNode.state.getModel()
|
||||||
self.universe.scoreProvider = ['neural','naive'][newModel]
|
self.universe.scoreProvider = ['neural', 'naive'][newModel]
|
||||||
model.train()
|
model.train()
|
||||||
for gen in range(startGen, startGen+gens):
|
for gen in range(startGen, startGen+gens):
|
||||||
print('[#####] Gen '+str(gen)+' training:')
|
print('[#####] Gen '+str(gen)+' training:')
|
||||||
loss = self.trainModel(model, calcDepth=min(4,3+int(gen/16)), exacity=int(gen/3+1))
|
loss = self.trainModel(model, calcDepth=min(
|
||||||
|
4, 3+int(gen/16)), exacity=int(gen/3+1), batch=4)
|
||||||
print('[L] '+str(loss))
|
print('[L] '+str(loss))
|
||||||
self.universe.scoreProvider = 'neural'
|
self.universe.scoreProvider = 'neural'
|
||||||
self.saveModel(model, gen)
|
self.saveModel(model, gen)
|
||||||
|
|
||||||
def saveModel(self, model, gen):
|
def trainFromTerm(self, term):
|
||||||
dat = model.state_dict()
|
model, gen = self.loadModel()
|
||||||
with open(self.getModelFileName(), 'wb') as f:
|
self.universe.scoreProvider = 'neural'
|
||||||
pickle.dump((gen, dat), f)
|
self.trainModel(model, calcDepth=4, exacity=10, term=term)
|
||||||
|
self.saveModel(model)
|
||||||
def loadModelState(self, model):
|
|
||||||
with open(self.getModelFileName(), 'rb') as f:
|
|
||||||
gen, dat = pickle.load(f)
|
|
||||||
model.load_state_dict(dat)
|
|
||||||
model.eval()
|
|
||||||
return gen
|
|
||||||
|
|
||||||
def loadModel(self):
|
|
||||||
model = self.rootNode.state.getModel()
|
|
||||||
gen = self.loadModelState(model)
|
|
||||||
return model, gen
|
|
||||||
|
|
||||||
def train(self):
|
def train(self):
|
||||||
if os.path.exists(self.getModelFileName()):
|
if os.path.exists(self.getModelFileName()):
|
||||||
@ -612,20 +667,3 @@ class Trainer(Runtime):
|
|||||||
self.main(model, startGen=gen+1)
|
self.main(model, startGen=gen+1)
|
||||||
else:
|
else:
|
||||||
self.main()
|
self.main()
|
||||||
|
|
||||||
def getModelFileName(self):
|
|
||||||
return 'brains/utt.vac'
|
|
||||||
|
|
||||||
def trainFromTerm(self, term):
|
|
||||||
model = self.rootNode.state.getModel()
|
|
||||||
model.load_state_dict(torch.load('brains/uttt.vac'))
|
|
||||||
model.eval()
|
|
||||||
self.universe.scoreProvider = 'neural'
|
|
||||||
self.trainModel(model, calcDepth=4, exacity=10, term=term)
|
|
||||||
self.saveModel(model)
|
|
||||||
|
|
||||||
def saveToMemoryBank(self, term):
|
|
||||||
return
|
|
||||||
with open('memoryBank/uttt/'+datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+'_'+str(int(random.random()*99999))+'.vdm', 'wb') as f:
|
|
||||||
pickle.dump(term, f)
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user