Compare commits

...

2 Commits

Author SHA1 Message Date
072bc391d1 Different net arch (back to fancy conv) 2022-05-18 19:44:56 +02:00
9c6077e213 Loss seems to low? 2022-05-18 19:44:40 +02:00
2 changed files with 59 additions and 45 deletions

View File

@ -6,6 +6,7 @@ from vacuumDecay import *
from collections import Counter from collections import Counter
import itertools import itertools
class TTTState(State): class TTTState(State):
def __init__(self, curPlayer=0, generation=0, playersNum=2, board=None, lastMove=-1): def __init__(self, curPlayer=0, generation=0, playersNum=2, board=None, lastMove=-1):
if type(board) == type(None): if type(board) == type(None):
@ -40,13 +41,13 @@ class TTTState(State):
return "." return "."
def mutate(self, action): def mutate(self, action):
newBoard = self.board[:action.data] + ['O','X'][self.curPlayer] + self.board[action.data+1:] newBoard = self.board[:action.data] + ['O',
return TTTState(curPlayer=(self.curPlayer+1)%self.playersNum, playersNum=self.playersNum, board=newBoard, lastMove=action.data) 'X'][self.curPlayer] + self.board[action.data+1:]
return TTTState(curPlayer=(self.curPlayer+1) % self.playersNum, playersNum=self.playersNum, board=newBoard, lastMove=action.data)
def box(self, x, y): def box(self, x, y):
return index(x, y) // 9 return index(x, y) // 9
def next_box(self, i): def next_box(self, i):
return i % 9 return i % 9
@ -67,16 +68,17 @@ class TTTState(State):
box_to_play = self.next_box(self.last_move) box_to_play = self.next_box(self.last_move)
idxs = self.indices_of_box(box_to_play) idxs = self.indices_of_box(box_to_play)
if self.box_won[box_to_play] != ".": if self.box_won[box_to_play] != ".":
pi_2d = [self.indices_of_box(b) for b in range(9) if self.box_won[b] == "."] pi_2d = [self.indices_of_box(b) for b in range(
9) if self.box_won[b] == "."]
possible_indices = list(itertools.chain.from_iterable(pi_2d)) possible_indices = list(itertools.chain.from_iterable(pi_2d))
else: else:
possible_indices = idxs possible_indices = idxs
for ind in possible_indices: for ind in possible_indices:
if self.board[ind]=='.': if self.board[ind] == '.':
yield Action(self.curPlayer, ind) yield Action(self.curPlayer, ind)
#def getScoreFor(self, player): # def getScoreFor(self, player):
# p = ['O','X'][player] # p = ['O','X'][player]
# sco = 5 # sco = 5
# for w in self.box_won: # for w in self.box_won:
@ -86,7 +88,7 @@ class TTTState(State):
# sco -= 0.5 # sco -= 0.5
# return 1/sco # return 1/sco
#def getPriority(self, score, cascadeMem): # def getPriority(self, score, cascadeMem):
# return -cascadeMem*1 + 100 # return -cascadeMem*1 + 100
def checkWin(self): def checkWin(self):
@ -100,13 +102,13 @@ class TTTState(State):
def checkDraw(self): def checkDraw(self):
for act in self.getAvaibleActions(): for act in self.getAvaibleActions():
return False # at least one action avaible return False # at least one action avaible
return True return True
def __str__(self): def __str__(self):
state = self.board state = self.board
acts = list(self.getAvaibleActions()) acts = list(self.getAvaibleActions())
if len(acts)<=9: if len(acts) <= 9:
for i, act in enumerate(acts): for i, act in enumerate(acts):
state = state[:act.data] + str(i+1) + state[act.data+1:] state = state[:act.data] + str(i+1) + state[act.data+1:]
s = [] s = []
@ -128,11 +130,10 @@ class TTTState(State):
elif b == 'O': elif b == 'O':
return -1.0 + 2.0 * self.curPlayer return -1.0 + 2.0 * self.curPlayer
else: else:
return 1.0 - 2.0 * self.curPlayer return 1.0 - 2.0 * self.curPlayer
def getTensor(self, player=None, phase='default'): def getTensor(self, player=None, phase='default'):
if player==None: if player == None:
player = self.curPlayer player = self.curPlayer
s = '' s = ''
for row in range(1, 10): for row in range(1, 10):
@ -144,51 +145,60 @@ class TTTState(State):
def getModel(cls, phase='default'): def getModel(cls, phase='default'):
return Model() return Model()
class Model(nn.Module): class Model(nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.chansPerSmol = 24
self.chansPerSlot = 8
self.chansComp = 8
self.smol = nn.Sequential( self.smol = nn.Sequential(
nn.Conv2d( nn.Conv2d(
in_channels=1, in_channels=1,
out_channels=16, out_channels=self.chansPerSmol,
kernel_size=(3,3), kernel_size=(3, 3),
stride=3, stride=3,
padding=0, padding=0,
), ),
nn.ReLU() nn.ReLU()
) )
#self.comb = nn.Sequential( self.comb = nn.Sequential(
# nn.Conv1d( nn.Conv1d(
# in_channels=24, in_channels=self.chansPerSmol,
# out_channels=8, out_channels=self.chansPerSlot,
# kernel_size=1, kernel_size=1,
# stride=1, stride=1,
# padding=0, padding=0,
# ), ),
# nn.ReLU() nn.ReLU()
#) )
self.out = nn.Sequential( self.out = nn.Sequential(
#nn.Linear(9*8, 32), nn.Linear(self.chansPerSlot*9, self.chansComp),
#nn.ReLU(),
#nn.Linear(32, 8),
#nn.ReLU(),
nn.Linear(16*9, 12),
nn.ReLU(), nn.ReLU(),
nn.Linear(12, 1), nn.Linear(self.chansComp, 1),
#nn.Linear(9*8, 32),
# nn.ReLU(),
#nn.Linear(32, 8),
# nn.ReLU(),
#nn.Linear(16*9, 12),
# nn.ReLU(),
#nn.Linear(12, 1),
nn.Sigmoid() nn.Sigmoid()
) )
def forward(self, x): def forward(self, x):
x = torch.reshape(x, (1,9,9)) x = torch.reshape(x, (1, 9, 9))
x = self.smol(x) x = self.smol(x)
#x = torch.reshape(x, (24,9)) x = torch.reshape(x, (self.chansPerSmol, 9))
#x = self.comb(x) x = self.comb(x)
x = torch.reshape(x, (-1,)) x = torch.reshape(x, (-1,))
y = self.out(x) y = self.out(x)
return y return y
def humanVsAi(train=True, remember=False, depth=3, bots=[0,1], noBg=False):
def humanVsAi(train=True, remember=False, depth=3, bots=[0, 1], noBg=False):
init = TTTState() init = TTTState()
run = NeuralRuntime(init) run = NeuralRuntime(init)
run.game(bots, depth, bg=not noBg) run.game(bots, depth, bg=not noBg)
@ -199,27 +209,31 @@ def humanVsAi(train=True, remember=False, depth=3, bots=[0,1], noBg=False):
trainer.saveToMemoryBank(run.head) trainer.saveToMemoryBank(run.head)
print('[!] Your cognitive and strategic destinctiveness was added to my own! (Game inserted into memoryBank)') print('[!] Your cognitive and strategic destinctiveness was added to my own! (Game inserted into memoryBank)')
if train: if train:
print("[!] Your knowledge will be assimilated!!! Please stand by.... (Updating Neuristic)") print(
"[!] Your knowledge will be assimilated!!! Please stand by.... (Updating Neuristic)")
trainer.trainFromTerm(run.head) trainer.trainFromTerm(run.head)
print('[!] I have become smart! Destroyer of human Ultimate-TicTacToe players! (Neuristic update completed)') print('[!] I have become smart! Destroyer of human Ultimate-TicTacToe players! (Neuristic update completed)')
print('[!] This marks the beginning of the end of humankind!') print('[!] This marks the beginning of the end of humankind!')
print('[i] Thanks for playing! Goodbye...') print('[i] Thanks for playing! Goodbye...')
def aiVsAiLoop(): def aiVsAiLoop():
init = TTTState() init = TTTState()
trainer = Trainer(init) trainer = Trainer(init)
trainer.train() trainer.train()
if __name__=='__main__':
options = ['Play Against AI','Play Against AI (AI begins)','Play Against AI (Fast Play)','Playground','Let AI train'] if __name__ == '__main__':
options = ['Play Against AI',
'Play Against AI (AI begins)', 'Play Against AI (Fast Play)', 'Playground', 'Let AI train']
opt = choose('?', options) opt = choose('?', options)
if opt == options[0]: if opt == options[0]:
humanVsAi() humanVsAi()
elif opt == options[1]: elif opt == options[1]:
humanVsAi(bots[1,0]) humanVsAi(bots[1, 0])
elif opt == options[2]: elif opt == options[2]:
humanVsAi(depth=2,noBg=True) humanVsAi(depth=2, noBg=True)
elif opt == options[3]: elif opt == options[3]:
humanVsAi(bots=[None,None]) humanVsAi(bots=[None, None])
else: else:
aiVsAiLoop() aiVsAiLoop()

View File

@ -492,7 +492,7 @@ class Runtime():
return model, gen return model, gen
def getModelFileName(self): def getModelFileName(self):
return 'brains/utt.vac' return 'brains/uttt.vac'
def saveToMemoryBank(self, term): def saveToMemoryBank(self, term):
return return
@ -605,7 +605,7 @@ class Trainer(Runtime):
self.killWorker() self.killWorker()
print('') print('')
def trainModel(self, model, lr=0.00005, cut=0.01, calcDepth=4, exacity=5, terms=None, batch=16): def trainModel(self, model, lr=0.000001, cut=0.01, calcDepth=4, exacity=5, terms=None, batch=16):
loss_func = nn.MSELoss() loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr) optimizer = optim.Adam(model.parameters(), lr)
if terms == None: if terms == None:
@ -631,9 +631,9 @@ class Trainer(Runtime):
loss_sum += loss.item() loss_sum += loss.item()
if loss.item() == 0.0: if loss.item() == 0.0:
zeroLen += 1 zeroLen += 1
if zeroLen == 5: if zeroLen == 5:
break break
# print(loss_sum/i) print(loss_sum/i)
if r > 16 and (loss_sum/i < cut or lLoss == loss_sum): if r > 16 and (loss_sum/i < cut or lLoss == loss_sum):
return loss_sum return loss_sum
lLoss = loss_sum lLoss = loss_sum