240 lines
7.2 KiB
Python
240 lines
7.2 KiB
Python
"""
|
|
A lot of this code was stolen from Pulkit Maloo (https://github.com/pulkitmaloo/Ultimate-Tic-Tac-Toe)
|
|
"""
|
|
|
|
from vacuumDecay import *
|
|
from collections import Counter
|
|
import itertools
|
|
|
|
|
|
class TTTState(State):
|
|
def __init__(self, curPlayer=0, generation=0, playersNum=2, board=None, lastMove=-1):
|
|
if type(board) == type(None):
|
|
board = "." * 81
|
|
self.curPlayer = curPlayer
|
|
self.generation = generation
|
|
self.playersNum = playersNum
|
|
self.board = board
|
|
self.last_move = lastMove
|
|
self.possible_goals = [(0, 4, 8), (2, 4, 6)]
|
|
self.possible_goals += [(i, i+3, i+6) for i in range(3)]
|
|
self.possible_goals += [(3*i, 3*i+1, 3*i+2) for i in range(3)]
|
|
self.update_box_won()
|
|
|
|
def update_box_won(self):
|
|
state = self.board
|
|
temp_box_win = ["."] * 9
|
|
for b in range(9):
|
|
idxs_box = self.indices_of_box(b)
|
|
box_str = state[idxs_box[0]: idxs_box[-1]+1]
|
|
temp_box_win[b] = self.check_small_box(box_str)
|
|
self.box_won = temp_box_win
|
|
|
|
def indices_of_box(self, b):
|
|
return list(range(b*9, b*9 + 9))
|
|
|
|
def check_small_box(self, box_str):
|
|
for idxs in self.possible_goals:
|
|
(x, y, z) = idxs
|
|
if (box_str[x] == box_str[y] == box_str[z]) and box_str[x] != ".":
|
|
return box_str[x]
|
|
return "."
|
|
|
|
def mutate(self, action):
|
|
newBoard = self.board[:action.data] + ['O',
|
|
'X'][self.curPlayer] + self.board[action.data+1:]
|
|
return TTTState(curPlayer=(self.curPlayer+1) % self.playersNum, playersNum=self.playersNum, board=newBoard, lastMove=action.data)
|
|
|
|
def box(self, x, y):
|
|
return index(x, y) // 9
|
|
|
|
def next_box(self, i):
|
|
return i % 9
|
|
|
|
def indices_of_box(self, b):
|
|
return list(range(b*9, b*9 + 9))
|
|
|
|
def index(self, x, y):
|
|
x -= 1
|
|
y -= 1
|
|
return ((x//3)*27) + ((x % 3)*3) + ((y//3)*9) + (y % 3)
|
|
|
|
def getAvaibleActions(self):
|
|
if self.last_move == -1:
|
|
for i in range(9*9):
|
|
yield Action(self.curPlayer, i)
|
|
return
|
|
|
|
box_to_play = self.next_box(self.last_move)
|
|
idxs = self.indices_of_box(box_to_play)
|
|
if self.box_won[box_to_play] != ".":
|
|
pi_2d = [self.indices_of_box(b) for b in range(
|
|
9) if self.box_won[b] == "."]
|
|
possible_indices = list(itertools.chain.from_iterable(pi_2d))
|
|
else:
|
|
possible_indices = idxs
|
|
|
|
for ind in possible_indices:
|
|
if self.board[ind] == '.':
|
|
yield Action(self.curPlayer, ind)
|
|
|
|
# def getScoreFor(self, player):
|
|
# p = ['O','X'][player]
|
|
# sco = 5
|
|
# for w in self.box_won:
|
|
# if w==p:
|
|
# sco += 1
|
|
# elif w!='.':
|
|
# sco -= 0.5
|
|
# return 1/sco
|
|
|
|
# def getPriority(self, score, cascadeMem):
|
|
# return -cascadeMem*1 + 100
|
|
|
|
def checkWin(self):
|
|
self.update_box_won()
|
|
game_won = self.check_small_box(self.box_won)
|
|
if game_won == '.':
|
|
if self.checkDraw():
|
|
return -1
|
|
return None
|
|
return game_won == 'X'
|
|
|
|
def checkDraw(self):
|
|
for act in self.getAvaibleActions():
|
|
return False # at least one action avaible
|
|
return True
|
|
|
|
def __str__(self):
|
|
state = self.board
|
|
acts = list(self.getAvaibleActions())
|
|
if len(acts) <= 9:
|
|
for i, act in enumerate(acts):
|
|
state = state[:act.data] + str(i+1) + state[act.data+1:]
|
|
s = []
|
|
for row in range(1, 10):
|
|
row_str = ["|"]
|
|
for col in range(1, 10):
|
|
row_str += [state[self.index(row, col)]]
|
|
if (col) % 3 == 0:
|
|
row_str += ["|"]
|
|
if (row-1) % 3 == 0:
|
|
s.append("-"*(len(row_str)*2-1))
|
|
s.append(" ".join(row_str))
|
|
s.append("-"*(len(row_str)*2-1))
|
|
return '\n'.join(s)
|
|
|
|
def symbToNum(self, b):
|
|
if b == '.':
|
|
return 0.0
|
|
elif b == 'O':
|
|
return -1.0 + 2.0 * self.curPlayer
|
|
else:
|
|
return 1.0 - 2.0 * self.curPlayer
|
|
|
|
def getTensor(self, player=None, phase='default'):
|
|
if player == None:
|
|
player = self.curPlayer
|
|
s = ''
|
|
for row in range(1, 10):
|
|
for col in range(1, 10):
|
|
s += self.board[self.index(row, col)]
|
|
return torch.tensor([self.symbToNum(b) for b in s])
|
|
|
|
@classmethod
|
|
def getModel(cls, phase='default'):
|
|
return Model()
|
|
|
|
|
|
class Model(nn.Module):
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
self.chansPerSmol = 24
|
|
self.chansPerSlot = 8
|
|
self.chansComp = 8
|
|
|
|
self.smol = nn.Sequential(
|
|
nn.Conv2d(
|
|
in_channels=1,
|
|
out_channels=self.chansPerSmol,
|
|
kernel_size=(3, 3),
|
|
stride=3,
|
|
padding=0,
|
|
),
|
|
nn.ReLU()
|
|
)
|
|
self.comb = nn.Sequential(
|
|
nn.Conv1d(
|
|
in_channels=self.chansPerSmol,
|
|
out_channels=self.chansPerSlot,
|
|
kernel_size=1,
|
|
stride=1,
|
|
padding=0,
|
|
),
|
|
nn.ReLU()
|
|
)
|
|
self.out = nn.Sequential(
|
|
nn.Linear(self.chansPerSlot*9, self.chansComp),
|
|
nn.ReLU(),
|
|
nn.Linear(self.chansComp, 1),
|
|
#nn.Linear(9*8, 32),
|
|
# nn.ReLU(),
|
|
#nn.Linear(32, 8),
|
|
# nn.ReLU(),
|
|
#nn.Linear(16*9, 12),
|
|
# nn.ReLU(),
|
|
#nn.Linear(12, 1),
|
|
nn.Sigmoid()
|
|
)
|
|
|
|
def forward(self, x):
|
|
x = torch.reshape(x, (1, 9, 9))
|
|
x = self.smol(x)
|
|
x = torch.reshape(x, (self.chansPerSmol, 9))
|
|
x = self.comb(x)
|
|
x = torch.reshape(x, (-1,))
|
|
y = self.out(x)
|
|
return y
|
|
|
|
|
|
def humanVsAi(train=True, remember=False, depth=3, bots=[0, 1], noBg=False):
|
|
init = TTTState()
|
|
run = NeuralRuntime(init)
|
|
run.game(bots, depth, bg=not noBg)
|
|
|
|
if remember or train:
|
|
trainer = Trainer(init)
|
|
if remember:
|
|
trainer.saveToMemoryBank(run.head)
|
|
print('[!] Your cognitive and strategic destinctiveness was added to my own! (Game inserted into memoryBank)')
|
|
if train:
|
|
print(
|
|
"[!] Your knowledge will be assimilated!!! Please stand by.... (Updating Neuristic)")
|
|
trainer.trainFromTerm(run.head)
|
|
print('[!] I have become smart! Destroyer of human Ultimate-TicTacToe players! (Neuristic update completed)')
|
|
print('[!] This marks the beginning of the end of humankind!')
|
|
print('[i] Thanks for playing! Goodbye...')
|
|
|
|
|
|
def aiVsAiLoop():
|
|
init = TTTState()
|
|
trainer = Trainer(init)
|
|
trainer.train()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
options = ['Play Against AI',
|
|
'Play Against AI (AI begins)', 'Play Against AI (Fast Play)', 'Playground', 'Let AI train']
|
|
opt = choose('?', options)
|
|
if opt == options[0]:
|
|
humanVsAi()
|
|
elif opt == options[1]:
|
|
humanVsAi(bots[1, 0])
|
|
elif opt == options[2]:
|
|
humanVsAi(depth=2, noBg=True)
|
|
elif opt == options[3]:
|
|
humanVsAi(bots=[None, None])
|
|
else:
|
|
aiVsAiLoop()
|