This commit is contained in:
Dominik Moritz Roth 2022-04-14 21:05:45 +02:00
parent 2ced953c77
commit 5440c23378
2 changed files with 77 additions and 71 deletions

View File

@ -1,3 +1,7 @@
"""
A lot of this code was stolen from Pulkit Maloo (https://github.com/pulkitmaloo/Ultimate-Tic-Tac-Toe)
"""
from vacuumDecay import * from vacuumDecay import *
from collections import Counter from collections import Counter
import itertools import itertools

View File

@ -7,7 +7,8 @@ from math import sqrt, inf
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from threading import Event from threading import Event
from queue import PriorityQueue, Empty from queue import PriorityQueue, Empty
from dataclasses import dataclass, field
from typing import Any
class Action(): class Action():
# Should hold the data representing an action # Should hold the data representing an action
@ -29,60 +30,6 @@ class Action():
# should start with < and end with > # should start with < and end with >
return "<P"+str(self.player)+"-"+str(self.data)+">" return "<P"+str(self.player)+"-"+str(self.data)+">"
class Universe():
def __init__(self):
self.scoreProvider = 'naive'
def newOpen(self, node):
pass
def merge(self, node):
return node
def clearPQ(self):
pass
def iter(self):
return []
def activateEdge(self, head):
pass
from dataclasses import dataclass, field
from typing import Any
@dataclass(order=True)
class PQItem:
priority: int
data: Any=field(compare=False)
class QueueingUniverse(Universe):
def __init__(self):
super().__init__()
self.pq = PriorityQueue()
def newOpen(self, node):
item = PQItem(node.getPriority(), node)
self.pq.put(item)
def merge(self, node):
self.newOpen(node)
return node
def clearPQ(self):
self.pq = PriorityQueue()
def iter(self):
while True:
try:
yield self.pq.get(False).data
except Empty:
time.sleep(1)
def activateEdge(self, head):
head._activateEdge()
class State(ABC): class State(ABC):
# Hold a representation of the current game-state # Hold a representation of the current game-state
# Allows retriving avaible actions (getAvaibleActions) and applying them (mutate) # Allows retriving avaible actions (getAvaibleActions) and applying them (mutate)
@ -105,7 +52,7 @@ class State(ABC):
@abstractmethod @abstractmethod
def getAvaibleActions(self): def getAvaibleActions(self):
# Should return an array of all possible actions # Should return an array of all possible actions
return [i] return []
def askUserForAction(self, actions): def askUserForAction(self, actions):
return choose('What does player '+str(self.curPlayer)+' want to do?', actions) return choose('What does player '+str(self.curPlayer)+' want to do?', actions)
@ -154,6 +101,56 @@ class State(ABC):
def getScoreNeural(self): def getScoreNeural(self):
return self.model(self.getTensor()) return self.model(self.getTensor())
class Universe():
def __init__(self):
self.scoreProvider = 'naive'
def newOpen(self, node):
pass
def merge(self, node):
return node
def clearPQ(self):
pass
def iter(self):
return []
def activateEdge(self, head):
pass
@dataclass(order=True)
class PQItem:
priority: int
data: Any=field(compare=False)
class QueueingUniverse(Universe):
def __init__(self):
super().__init__()
self.pq = PriorityQueue()
def newOpen(self, node):
item = PQItem(node.getPriority(), node)
self.pq.put(item)
def merge(self, node):
self.newOpen(node)
return node
def clearPQ(self):
self.pq = PriorityQueue()
def iter(self):
while True:
try:
yield self.pq.get(False).data
except Empty:
time.sleep(1)
def activateEdge(self, head):
head._activateEdge()
class Node(): class Node():
def __init__(self, state, universe=None, parent=None, lastAction=None): def __init__(self, state, universe=None, parent=None, lastAction=None):
@ -372,21 +369,6 @@ class Worker():
def revive(self): def revive(self):
self._alive = True self._alive = True
class Trainer():
def __init__(self):
pass
def spawnRuntime(self, initState):
self._runtime = Runtime(initState)
def setRuntime(self, runtime):
self._runtime = runtime
def playFrom(self, start=None):
if start==None:
start = self._runtime.head
self._runtime.game([1]*self._runtime.head.playersNum)
class Runtime(): class Runtime():
def __init__(self, initState): def __init__(self, initState):
universe = QueueingUniverse() universe = QueueingUniverse()
@ -445,3 +427,23 @@ class Runtime():
self.turn(bots[self.head.curPlayer], calcDepth) self.turn(bots[self.head.curPlayer], calcDepth)
print(self.head.getWinner() + ' won!') print(self.head.getWinner() + ' won!')
self.killWorker() self.killWorker()
class Trainer(Runtime):
def __init__(self, initState):
self.universe = Universe()
self.rootNode = Node(initState, universe = self.universe)
self.terminal = None
def linearPlay(self, calcDepth=8):
head = rootNode
while head.getWinner()==None:
self.head.forceStrong(calcDepth)
opts = []
for c in self.head.childs:
opts.append((c, c.getStrongFor(self.head.curPlayer)))
opts.sort(key=lambda x: x[1])
ind = int(math.pow(random.random(),5)*len(opts))
head = opts[ind][0]
self.terminal = head
return head