This commit is contained in:
Dominik Moritz Roth 2022-04-14 11:38:08 +02:00
parent 5ba277a2aa
commit f9a5ba1964

View File

@ -2,6 +2,7 @@ import time
import random
import threading
import torch
from math import sqrt
#from multiprocessing import Event
from abc import ABC, abstractmethod
from threading import Event
@ -86,12 +87,13 @@ class State(ABC):
return []
# improveMe
def getPriority(self, score):
def getPriority(self, score, cascadeMemory=None):
# Used for ordering the priority queue
# Priority should not change for the same root
# Lower prioritys get worked on first
# Higher generations should have higher priority
return score + self.generation*0.5
# Higher cascadeMemory (more influence on higher-order-scores) should have lower priority
return score + self.generation*0.5 - cascadeMemory*0.35
@abstractmethod
def checkWin(self):
@ -142,10 +144,14 @@ class Node():
self._scores = [None]*self.state.playersNum
self._strongs = [None]*self.state.playersNum
self._alive = True
self._cascadeMemory = 0 # Used for our alternative to alpha-beta pruning
def kill(self):
self._alive = False
def revive(self):
self._alive = True
@property
def childs(self):
if self._childs == None:
@ -159,26 +165,28 @@ class Node():
newNode = Node(self.state.mutate(action), self.universe, self, action)
self._childs.append(self.universe.merge(newNode))
@property
def strongs(self):
return self._strongs
def getStrongFor(self, player):
if self._strongs[player]!=None:
return self._strongs[player]
else:
return self.getScoreFor(player)
def _pullStrong(self): # Currently Expecti-Max
strongs = [None]*self.playersNum
for p in range(self.playersNum):
cp = self.state.curPlayer
if cp == p: # P owns the turn; controlls outcome
best = 10000000
best = 1000000000
for c in self.childs:
if c._strongs[cp] < best:
best = c._strongs[p]
if c.getStrongFor(p) < best:
best = c.getStrongFor(p)
strongs[p] = best
else:
scos = [(c._strongs[cp], c._strongs[p]) for c in self.childs]
scos.sort(key=lambda x: x[0])
betterHalf = scos[:max(3,int(len(scos)/2))]
myScores = [bh[1] for bh in betterHalf]
strongs[p] = sum(myScores)/len(myScores)
scos = [(c.getStrongFor(p), c.getStrongFor(cp)) for c in self.childs]
scos.sort(key=lambda x: x[1])
betterHalf = scos[:max(3,int(len(scos)/3))]
myScores = [bh[0]**2 for bh in betterHalf]
strongs[p] = sqrt(myScores[0]*0.75 + sum(myScores)/(len(myScores)*4))
update = False
for s in range(self.playersNum):
if strongs[s] != self._strongs[s]:
@ -186,21 +194,32 @@ class Node():
break
self._strongs = strongs
if update:
self.parent._pullStrong()
if self.parent!=None:
cascade = self.parent._pullStrong()
else:
cascade = 2
self._cascadeMemory = self._cascadeMemory/2 + cascade
return cascade + 1
self._cascadeMemory /= 2
return 0
def forceStrong(self, depth=3):
if depth==0:
self.strongDecay()
else:
for c in self.childs:
c.forceStrong(depth-1)
if len(self.childs):
for c in self.childs:
c.forceStrong(depth-1)
else:
self.strongDecay()
def strongDecay(self):
if self._strongs == [None]*self.playersNum:
if not self.scoresAvaible():
self._calcScores()
self._strongs = self._scores
self.parent._pullStrong()
return self.parent._pullStrong()
return None
def getSelfScore(self):
return self.getScoreFor(self.curPlayer)
@ -219,6 +238,12 @@ class Node():
return False
return True
def strongScoresAvaible(self):
for p in self._strongs:
if p==None:
return False
return True
def _calcScores(self):
for p in range(self.state.playersNum):
self._calcScore(p)
@ -307,9 +332,10 @@ class Runtime():
return
bot = c=='bot'
if bot:
self.head.forceStrong(7)
opts = []
for c in self.head.childs:
opts.append((c, c.getStrongScore(self.head.curPlayer, -1)[0]))
opts.append((c, c.getStrongFor(self.head.curPlayer)))
opts.sort(key=lambda x: x[1])
print('[i] Evaluated Options:')
for o in opts: