Initial commit

2022-03-21 14:27:16 +01:00 · 2022-03-21 14:27:16 +01:00 · a46557a635
commit a46557a635
7 changed files with 667 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 __pycache__
 *.~*
--- a/README.md
+++ b/README.md
@ -0,0 +1,21 @@
 # Project vacuumDecay
 Project vacuumDecay is a framework for building AIs for games.  
 Avaible architectures are
 - those used in Deep Blue (mini-max / expecti-max)
 - advanced expecti-max exploration based on utility heuristics
 - those used in AlphaGo Zero (knowledge distilation using neural-networks)
 A new AI is created by subclassing the State-class and defining the following functionality (mycelia.py provies a template):
 - initialization (generating the gameboard or similar)
 - getting avaible actions for the current situation (returns an Action-object, which can be subclassed to add additional functionality)
 - applying an action (the state itself should be immutable, a new state should be returned)
 - checking for a winning-condition (should return None if game has not yet ended)
 - (optional) a getter for a string-representation of the current state
 - (optional) a heuristic for the winning-condition (greatly improves capability)
 - (optional) a getter for a tensor that describes the current game state (required for knowledge distilation)
 - (optional) interface to allow a human to select an action
 ### Current state of the project
 It currently does not work and implements none of the named functionality in a working fashion.  
 Experiment for TicTacToe, Dikehiker and an encryption-breaker for rc4 are being worked on.
--- a/dikehiker.py
+++ b/dikehiker.py
@ -0,0 +1,61 @@
 from vacuumDecay import *
 import numpy as np
 class TTTState(State):
    def __init__(self, turn=0, generation=0, playersNum=4, bank=[2904,3135,2563,0], bet=[0]*4):
        self.turn = turn
        self.generation = generation
        self.playersNum = playersNum
        self.bank = bank
        self.bet = bet
        self.alive = [1]*playersNum
        self.score = self.getScore()
    def mutate(self, action):
        newBank = np.copy(self.bank)
        newBet = np.copy(self.bet)
        newBet[self.turn] = action.data
        newBank[self.turn] = newBank[self.turn]-max(0,newBet[self.turn])
        if self.turn == self.playersNum-1:
            loser = min(range(len(newBet)), key=newBet.__getitem__)
            winer = max(range(len(newBet)), key=newBet.__getitem__)
            self.alive[loser] = False
            newBank[winer]+=500
        return TTTState(turn=(self.turn+1)%self.playersNum, playersNum=self.playersNum, bank=newBank, bet=newBet)
    def getAvaibleActions(self):
        if self.alive[self.turn]:
            for b in range(-self.playersNum-1, self.bank[self.turn]+1):
                yield Action(self.turn, b)
        else:
            yield Action(self.turn, 0)
    def checkWin(self):
        if sum(self.alive)==1:
            for p,a in enumerate(self.alive):
                if a:
                    return p
        return None
    def getScore(self):
        return max(self.bank) + sum(self.bank) - self.bank[self.turn]*2
    def __str__(self):
        s = []
        for l in range(len(self.bank)):
            if self.alive[l]:
                s.append(str(self.bet[l])+' -> '+str(self.bank[l]))
            else:
                s.append('<dead>')
        return "\n".join(s)
    def getTensor(self):
        return None
    @classmethod
    def getModel():
        return None
 if __name__=="__main__":
    vd = WeakSolver(TTTState())
    vd.selfPlay()
--- a/encBreaker.py
+++ b/encBreaker.py
@ -0,0 +1,72 @@
 from vacuumDecay import *
 from arc4 import ARC4
 import copy
 class KnownPlaintextAndKeylen(State, ABC):
    def __init__(self, plaintext, ciphertext, keyLenBits, keyBits=None, turn=0, generation=0, playersNum=1, lastChange=None):
        if keyBits==None:
            keyBits = [0]*keyLenBits
        self.turn = turn
        self.generation = generation
        self.keyBits = keyBits
        self.keyLenBits = keyLenBits
        self.plaintext = plaintext
        self.ciphertext = ciphertext
        self.lastChange = lastChange
        self.decrypt = self._decrypt()
        self.score = self.getScore()
    def mutate(self, action):
        newKeyBits = copy.copy(self.keyBits)
        newKeyBits[action.data] = int(not newKeyBits[action.data])
        return XorKnownPlaintextAndKeylen(self.plaintext, self.ciphertext, self.keyLenBits, newKeyBits, generation=self.generation+1, lastChange = action.data)
    def getAvaibleActions(self):
        for i in range(self.keyLenBits):
            #if self.keyBits[i] == 0:
            if self.lastChange != i:
                yield Action(0, i)
    def getKey(self):
        s = ""
        for i in range(int(self.keyLenBits/8)):
            s += chr(int("".join([str(c) for c in self.keyBits[i*8:][:8]]),2))
        return s
    @abstractmethod
    def _decrypt(self):
        pass
    def checkWin(self):
        return self.decrypt == self.plaintext
    def getScore(self):
        diff = sum([bin(ord(a) ^ ord(b)).count("1") for a,b in zip(self.decrypt, self.plaintext)])
        return diff / (len(self.plaintext)*8)
    def __str__(self):
        return "{"+self.getKey()+"}["+self.decrypt+"]"
    def getTensor(self):
        return torch.tensor(self.keyBits + list(map(int, ''.join([bin(ord(i)).lstrip('0b').rjust(8,'0') for i in self.decrypt]))))
    def getModel(self):
        pass
    def getPriority(self, score):
        return self.score + (1/self.keyLenBits)*0.01*self.generation
 class XorKnownPlaintextAndKeylen(KnownPlaintextAndKeylen):
    def _decrypt(self):
        return ''.join(chr(ord(a) ^ ord(b)) for a,b in zip(self.ciphertext, self.getKey()))
 class RC4KnownPlayintextAndKeylen(KnownPlaintextAndKeylen):
    def _decrypt(self):
        rc4 = ARC4(self.getKey())
        return rc4.decrypt(self.ciphertext).decode("ascii")
 if __name__=="__main__":
    vd = WeakSolver(RC4KnownPlaintextAndKeylen())
 # TODO:
 # - Should use bytes for everything (not array of ints / string)
--- a/mycelia.py
+++ b/mycelia.py
@ -0,0 +1,18 @@
 class State():
    pass
 class Action():
    pass
 class BotAction():
    pass
 class PlayerAction():
    pass
 class EnvAction():
    pass
--- a/tictactoe.py
+++ b/tictactoe.py
@ -0,0 +1,61 @@
 from vacuumDecay import *
 import numpy as np
 class TTTState(State):
    def __init__(self, turn=0, generation=0, playersNum=2, board=None):
        if type(board) == type(None):
            board = np.array([None]*9)
        self.turn = turn
        self.generation = generation
        self.playersNum = playersNum
        self.board = board
        self.score = self.getScore()
    def mutate(self, action):
        newBoard = np.copy(self.board)
        newBoard[action.data] = self.turn
        return TTTState(turn=(self.turn+1)%self.playersNum, playersNum=self.playersNum, board=newBoard)
    def getAvaibleActions(self):
        for i in range(9):
            if self.board[i]==None:
                yield Action(self.turn, i)
    def checkWin(self):
        s = self.board
        for i in range(3):
            if (s[i] == s[i+3] == s[i+6] != None):
                return s[i]
            if (s[i*3] == s[i*3+1] == s[i*3+2] != None):
                return s[i*3]
        if (s[0] == s[4] == s[8] != None):
            return s[0]
        if (s[2] == s[4] == s[6] != None):
            return s[2]
        for i in range(9):
            if s[i] == None:
                return None
        return -1
    def __str__(self):
        s = []
        for l in range(3):
            s.append(" ".join([str(p) if p!=None else '.' for p in self.board[l*3:][:3]]))
        return "\n".join(s)
    def getTensor(self):
        return torch.tensor([self.turn] + self.board)
    @classmethod
    def getModel():
        return torch.nn.Sequential(
            torch.nn.Linear(10, 10)
            torch.nn.ReLu()
            torch.nn.Linear(10, 3)
            torch.nn.Sigmoid()
            torch.nn.Linear(3,1)
        )
 if __name__=="__main__":
    vd = VacuumDecay(TTTState())
    vd.weakPlay()
--- a/vacuumDecay.py
+++ b/vacuumDecay.py
@ -0,0 +1,432 @@
 import time
 import random
 import threading
 import torch
 #from multiprocessing import Event
 from abc import ABC, abstractmethod
 from threading import Event
 from queue import PriorityQueue, Empty
 class Action():
    # Should hold the data representing an action
    # Actions are applied to a State in State.mutate
    def __init__(self, player, data):
        self.player = player
        self.data = data
    def __eq__(self, other):
        # This should be implemented differently
        # Two actions of different generations will never be compared
        if type(other) != type(self):
            return False
        return str(self.data) == str(other.data)
    def __str__(self):
        # should return visual representation of this action
        # should start with < and end with >
        return "<P"+str(self.player)+"-"+str(self.data)+">"
 class NaiveUniverse():
    def __init__(self):
        pass
    def merge(self, branch):
        return branch
 class BranchUniverse():
    def __init__(self):
        self.branches = {}
    def merge(self, branch):
        tensor = branch.node.state.getTensor()
        match = self.branches.get(tensor)
        if match:
            return match
        else:
            self.branches[tensor] = branch
 class Branch():
    def __new__(self, universe, preState, action):  # fancy!
        self.preState = preState
        self.action = action
        postState = preState.mutate(action)
        self.node = Node(postState, universe=universe,
                         parent=preState, lastAction=action)
        return universe.merge(self)
 class State(ABC):
    # Hold a representation of the current game-state
    # Allows retriving avaible actions (getAvaibleActions) and applying them (mutate)
    # Mutations return a new State and should not have any effect on the current State
    # Allows checking itself for a win (checkWin) or scoring itself based on a simple heuristic (getScore)
    # The calculated score should be 0 when won; higher when in a worse state; highest for loosing
    # getPriority is used for prioritising certain Nodes / States when expanding / walking the tree
    def __init__(self, turn=0, generation=0, playersNum=2):
        self.turn = turn
        self.generation = generation
        self.playersNum = playersNum
        self.score = self.getScore()
    @abstractmethod
    def mutate(self, action):
        # Returns a new state with supplied action performed
        # self should not be changed
        return State(turn=(self.turn+1) % self.playersNum, generation=self.generation+1, playersNum=self.playersNum)
    @abstractmethod
    def getAvaibleActions(self):
        # Should return an array of all possible actions
        return []
    # improveMe
    def getPriority(self, score):
        # Used for ordering the priority queue
        # Priority should not change for the same root
        # Lower prioritys get worked on first
        # Higher generations should have slightly higher priority
        return score + self.generation*0.1
    @abstractmethod
    def checkWin(self):
        # -1 -> Draw
        # None -> Not ended
        # n e N -> player n won
        return None
    # improveMe
    def getScore(self):
        # 0 <= score <= 1; should return close to zero when we are winning
        w = self.checkWin()
        if w == None:
            return 0.5
        if w == 0:
            return 0
        if w == -1:
            return 0.9
        return 1
    @abstractmethod
    def __str__(self):
        # return visual rep of state
        return "[#]"
    @abstractmethod
    def getTensor(self):
        return torch.tensor([0])
    @classmethod
    def getModel():
        pass
    def getScoreNeural(self):
        pass
        return self.model(self.getTensor())
 class Node():
    def __init__(self, state, universe=None, parent=None, lastAction=None, playersNum=2):
        self.state = state
        if not universe:
            universe = NaiveUniverse()
            # TODO: Maybe add self to new BranchUniverse?
        self.universe = universe
        self.parent = parent
        self.lastAction = lastAction
        self.playersNum = playersNum
        self.childs = None
        self.score = state.getScore()
        self.done = Event()
        self.threads = []
        self.walking = False
        self.alive = True
    def expand(self, shuffle=True):
        actions = self.state.getAvaibleActions()
        if self.childs != None:
            return True
        self.childs = []
        for action in actions:
            self.childs.append(Branch(self.universe, self.state, action))
        if self.childs == []:
            return False
        if shuffle:
            random.shuffle(self.childs)
        return True
    def _perform(self, action):
        if self.childs == None:
            raise PerformOnUnexpandedNodeException()
        elif self.childs == []:
            raise PerformOnTerminalNodeException()
        for child in self.childs:
            if child.node.lastAction == action:
                self.endWalk()
                return child
        raise IllegalActionException()
    def performBot(self):
        if self.state.turn != 0:
            raise NotBotsTurnException()
        if self.childs == None:
            raise PerformOnUnexpandedNodeException()
        if self.childs == []:
            raise PerformOnTerminalNodeException()
        if self.walking:
            self.endWalk()
        bChild = self.childs[0]
        for child in self.childs[1:]:
            if not child:
                print(self)
            if child.node.score <= bChild.node.score:
                bChild = child
        return bChild
    def performPlayer(self, action):
        if self.state.turn == 0:
            raise NotPlayersTurnException()
        return self._perform(action)
    def getAvaibleActions(self):
        return self.state.getAvaibleActions()
    def getLastAction(self):
        return self.lastAction
    def beginWalk(self, threadNum=1):
        if self.walking:
            raise Exception("Already Walking")
        self.walking = True
        self.queue = PriorityQueue()
        self.done.clear()
        self.expand()
        self._activateEdge()
        for i in range(threadNum):
            t = threading.Thread(target=self._worker)
            t.start()
            self.threads.append(t)
    def endWalk(self):
        if not self.walking:
            raise Exception("Not Walking")
        self.done.set()
        for t in self.threads:
            t.join()
        self.walking = False
    def walkUntilDone(self):
        if not self.walking:
            self.beginWalk()
        for t in self.threads:
            t.join()
        self.done.set()
    def syncWalk(self, time, threads=16):
        self.beginWalk(threadNum=threadNum)
        time.sleep(time)
        self.endWalk()
    def _worker(self):
        while not self.done.is_set():
            try:
                node = self.queue.get_nowait()
            except Empty:
                continue
            if node.alive:
                if node.expand():
                    node._updateScore()
                    if self.done.is_set():
                        queque.task_done()
                        break
                    if node.state.checkWin == None:
                        for c in node.childs:
                            self.queue.put(c.node)
            self.queue.task_done()
    def _activateEdge(self, node=None):
        if node == None:
            node = self
        if node.childs == None:
            self.queue.put(node)
        elif node.alive:
            for c in node.childs:
                self._activateEdge(node=c.node)
    def __lt__(self, other):
        # Used for ordering the priority queue
        return self.state.getPriority(self.score) < other.state.getPriority(self.score)
    # improveMe
    def _calcAggScore(self):
        if self.childs != None and self.childs != []:
            scores = [c.node.score for c in self.childs]
            if self.state.turn == 0:
                self.score = min(scores)
            elif self.playersNum == 2:
                self.score = max(scores)
            else:
                # Note: This might be tweaked
                self.score = (max(scores) + sum(scores)/len(scores)) / 2
    def _updateScore(self):
        oldScore = self.score
        self._calcAggScore()
        if self.score != oldScore:
            self._pushScore()
    def _pushScore(self):
        if self.parent != None:
            self.parent._updateScore()
        elif self.score == 0:
            self.done.set()
    def __str__(self):
        s = []
        if self.lastAction == None:
            s.append("[ {ROOT} ]")
        else:
            s.append("[ -> "+str(self.lastAction)+" ]")
        s.append("[ turn: "+str(self.state.turn)+" ]")
        s.append(str(self.state))
        s.append("[ score: "+str(self.score)+" ]")
        return '\n'.join(s)
 class WeakSolver():
    def __init__(self, state):
        self.node = Node(state)
    def play(self):
        while self.node.state.checkWin() == None:
            self.step()
        print(self.node)
        print("[*] " + str(self.node.state.checkWin()) + " won!")
        if self.node.walking:
            self.node.endWalk()
    def step(self):
        if self.node.state.turn == 0:
            self.botStep()
        else:
            self.playerStep()
    def botStep(self):
        if self.node.walking:
            self.node.endWalk()
        self.node.expand()
        self.node = self.node.performBot().node
        print("[*] Bot did "+str(self.node.lastAction))
    def playerStep(self):
        self.node.beginWalk()
        print(self.node)
        while True:
            try:
                newNode = self.node.performPlayer(
                    Action(self.node.state.turn, int(input("[#]> "))))
            except IllegalActionException:
                print("[!] Illegal Action")
            else:
                break
        self.node.endWalk()
        self.node = newNode
 class NeuralTrainer():
    def __init__(self, StateClass):
        self.State = StateClass
        self.model = self.State.buildModel()
    def train(self, states, scores, rounds=2000):
        loss_fn = torch.nn.MSELoss(reduction='sum')
        learning_rate = 1e-6
        for t in range(rounds):
            y_pred = self.model(states[t % len(states)])
            y = scores[t % len(states)]
            loss = loss_fn(y_pred, y)
            print(t, loss.item())
            self.model.zeroGrad()
            loss.backwards()
            with torch.no_grad():
                for param in model.parameters():
                    param -= learning_rate * param.grad
    def setWeights(self):
        pass
    def getWeights(self):
        pass
    def loadWeights(self):
        pass
    def storeWeights(self):
        pass
 class SelfPlayDataGen():
    def __init__(self, StateClass, playersNum, compTime=30):
        self.State = StateClass
        self.playersNum = playersNum
        self.compTime = compTime
        self.gameStates = []
    def game(self):
        self.nodes = []
        for p in range(playersNum):
            self.nodes.append(Node(self.State(
                turn=(-p) % self.playersNum, generation=0, playersNum=self.playersNum)))
        while True:
            if (winner := self.nodes[0].state.checkWin) != None:
                return winner
            for n in self.nodes:
                n.beginWalk()
            time.sleep(compTime)
            for n in self.nodes:
                n.endWalk()
            self.step()
            self.gameStates.append(
                [self.nodes[0].state.getTensor(), self.nodes[0].score])
    def step(self):
        turn = self.nodes[0].state.turn
        self.nodes[turn] = self.nodes[turn].performBot()
        action = self.nodes[turn].lastAction
        for n in range(self.playersNum):
            if n != turn:
                action.player = 0
                self.nodes[n] = self.nodes[n].performPlayer(action)
        return self.nodes[0].state.checkWin()
 class VacuumDecayException(Exception):
    pass
 class IllegalActionException(VacuumDecayException):
    pass
 class PerformOnUnexpandedNodeException(VacuumDecayException):
    pass
 class PerformOnTerminalNodeException(VacuumDecayException):
    pass
 class IllegalTurnException(VacuumDecayException):
    pass
 class NotBotsTurnException(IllegalTurnException):
    pass
 class NotPlayersTurnException(IllegalTurnException):
    pass