From de5137ecd382c593fe9d6ed9b629d63d9da62a89 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 15 Apr 2022 00:32:48 +0200 Subject: [PATCH] Does not work --- brains/uttt.pth | Bin 0 -> 5327 bytes ultimatetictactoe.py | 71 +++++++++++++++++++++++--- vacuumDecay.py | 116 ++++++++++++++++++++++++++++++++++++++----- 3 files changed, 168 insertions(+), 19 deletions(-) create mode 100644 brains/uttt.pth diff --git a/brains/uttt.pth b/brains/uttt.pth new file mode 100644 index 0000000000000000000000000000000000000000..edcaf6fc632409066c81cef7bb60d3043ddc7a59 GIT binary patch literal 5327 zcmbVQ3s{X=7e48#%jqJNT%*J&T~wyC*M}xar&6b)6qPQDQoBxR#+*>3qC_!845Lsk zCHKy^K9MO5<31#2x z^qiE$CQwgH-r2wp1fXmH$R2rBY3C zyp(GBi3L_-U9pN-Rjewd-L1sk`9D>$ij->S#wuj#NU5%b>ILb1(l#S5B83WLdB?G8 zu^q&AR6kZztShAkJfT>3F*OvcET%>s6g{NWSVDUS>GBja;%0t|u9p&rJ?$N<>D{-R zl$x}wQs;HGt1`6`$N#0OPY0!$l=hX-enHwFDceQ$S0Z$v19(yDQaVr}N_{aMq!4A^ z!7x}#he)VJyQsue=_gT^N*tr;P@Y3yO05(es$%tQI!vrCR>`KrJ9s{m(h(AB-L9mA zr(KDS5{n}p$+PeiY}?MVnA$0n*mp2ENa-jEb^NHr{;$P%QsQu;&K(@16&(D6Wz)|U zE0s-MI(S4e-%iY0=3kd^S_DSS2Nh-yQAQOTA>&JGR}V@xf$c zOMPT?QWv8g^_5Y-4x=I~`P`rq8TIe5sCQU+Xn>3cD%Pt6stx2!o>)!97Dl>=M+NjUDH3Trly`SLasAT5%{pQT%FVaYwoedK|XboP^ zGvRu@{*H-kPGgMSmcf|3DCUW^E@yCe5MyV%oO@{W5)Gwdw)$o+)XKj@r)dj0^AXvw ze0dA}@RJ6&TGP!3<~4+Ey?26dY#8V^Rk(;9c#Jg*IVW5r7_Ia zY$3NPZ3s*=FlTIrZei^T@|b(Jxy*3`eMW2dEUstf4#tQWa{XL%;ABl7CjaqVhB2K9 z_D5owgYPzQ%-Ku^Ogx#Jzc!Ps!vmNZ>XmGGP66lfq#DKyOCh_`j-$MO4R_#GA!GN` zY|dF`cic-N`GSeKHMMr<={7Z(&93U+jYS;ic>$(hTP=wUFw1XOUO>cF>R% ziej$Ti%1bC#RZ1hcm7g$cv1)mbtF8Ajh2o7Ns=eWw^fg-fA)+`2}#_pk_Xc$^{stX>8` ztoxA^u>INFeM>On)Dt$mv_PajF9Jh^xo}=D6SMBzU=LQ?GkR497++Egy%tr0dH7f+ zpjHRu1G1pk^GwKpuMIXgQ{*9m8|8i9UI1p1iTsXUINV!O34ZY}Sk&E5YR(?TBIg0j ziJJ2uyifw#N%!GdFB!N`+yKH7C%n{n4!oS|A-6{kI!5=vK580hwPriJ>2W@3{<$yv zXJ`NxdX{269A!Q4m&1O8DqL%2jeCA-t*gj0gMk+hv1Kvdu){5ljR;x*%V#*DykRez z^j^-|B}-Vxl|pcxP=o+4`j%Gs4m$Ckz*^+ zV`wc@o^!&l^T_At&9p#I_>f3?${@D;AiQ!p1Y*2O$?L-_uuyVXJ~kx)H(vS*+DaG< zNj%M_K6YXIC(VHy!j*XN1|e@&Y?o6FO{78=x2X(7S*QcPDLDusI;%Fi`4Nq;l{| zw3IUm{&+HByPOqoK#Bk#Qk~Y5*JWdRi>*N)~5c(ZNzkLBQ*_OEM)FfF5|Qhv8I3K5)%8Ospv)?h8sVHTVqnE_}v%e%>aEu(JV8z02grndj{0oJ>(m z^VhhO@x;7Uy@=?!H`fxq8*?w@L*gHD5_)Jn7!Nte`aInMacvTK8>LT*yoZvg^bEY1 zz7N$*%wbqs6|D9d4R*#oz^s=JTXrB7mTD!r6jYAI>O()Xe)qnFmg}RjX-p%V?mG)s z9}|MNmmm6+aX2&;puu=8nc8$2YzzVB_}9X`HcdWvx3O*$i!gemA#|H@2eR#4eBp#lTj>V;C6!f{Yt?7#y-P zaZtl4@H=K93cflNbMN&A+FZICuUFEoPZZUbjUq~ve<6-^Q4QOFL0E@MJ zaEy2%xNQi;x|df;{cVKT57d}1g;&{x7;WaoB2%*D;xcyf{Z(@Bs{5i9P2IuHsDce1 zc?SZPT!Nz->g=ecZD1H(Be$5L0wtR-f2rn_(@~gzv*YowP|2G_VXWVA&EhFZwC8s0T!`b^D-7Eu{^%*@1Y=X9Ad34NH*Gpoq` zV?OK`^LsO!{wT!QhrhD*2i8C~-e#|On4)&TYWUhTK=d{y4<;quV~6+O zfUOv80O{YGvflj$!odTt;qn5)W#zE_RV5v-Zz8|Fx57q^Td_WLFpl$RVDn3iU`*+GJSouN_PyMWr``l& zzDgpDJFbhb0@lH}%LOkN;SBS#t_pv488vDnjShU-`oJ0=YiZ_f?@Hasd7XNaboiL&qGgI#1 z=>ctQ;E#Lo;Qa2Kho4yPopTr6i*Dj0#sUSs@zs20C7x28P z5k8^7=GgyADi*o2u~mpV!M*X#`E#V%I|mi_mmlZ$wo2kFW<~!8_ZNr%b$>Y>W{sAf z_1NEQ3!!!dGM8^@_i?mfbAvH|KT`C+^I@xeM;qntI^w_XXi5cpmmmFomodI)dNJ0n zMa*D_W87hfeT;pTFSl=(HYW;y$7S@J%bBY^#D{NXT-5gm@X&8NL4R^J=XpV^{VwV4 z5qgR68WsKTbZRNzlU2<*%y+m42WWGe<+ABP*l{4{&#$%mHC|9T}S<+ zxc{1lhMR9V?;4duN19?eh5tLM%V+)z|IYu`|K{@2_VqSf8I!3fH5L8;$E`0I^D*uZ zRjRwI=wAi?VFDbq6;YpEKQpi!?^)4(#VU%=$2XzQ1)_oCA4;w33sF~e=UPLkgw*3h z?Tj92+ZEfnPEf`Q_!3+RYu6Rqd5RvBB?73X;(w=| "+str(self.lastAction)+" ]") s.append("[ turn: "+str(self.state.curPlayer)+" ]") s.append(str(self.state)) - s.append("[ score: "+str(self.getSelfScore())+" ]") + s.append("[ score: "+str(self.getStrongFor(self.state.curPlayer))+" ]") return '\n'.join(s) def choose(txt, options): @@ -428,22 +435,105 @@ class Runtime(): print(self.head.getWinner() + ' won!') self.killWorker() +class NeuralRuntime(Runtime): + def __init__(self, initState): + super().__init__(initState) + + model = self.head.state.getModel() + model.load_state_dict(torch.load('brains/uttt.pth')) + model.eval() + + self.head.universe.model = model + self.head.universe.scoreProvider = 'neural' + class Trainer(Runtime): def __init__(self, initState): self.universe = Universe() self.rootNode = Node(initState, universe = self.universe) self.terminal = None - def linearPlay(self, calcDepth=8): - head = rootNode + def buildDatasetFromModel(self, model, depth=4, refining=False): + print('[*] Building Timeline') + term = self.linearPlay(model, calcDepth=depth) + if refining: + print('[*] Refining Timeline') + self.fanOut(term, depth=depth+1) + self.fanOut(term.parent, depth=depth+1) + self.fanOut(term.parent.parent, depth=depth+1) + return term + + def fanOut(self, head, depth=10): + for d in range(max(3, depth-3)): + head = head.parent + head.forceStrong(depth) + + def linearPlay(self, model, calcDepth=7, verbose=True): + head = self.rootNode + self.universe.model = model while head.getWinner()==None: - self.head.forceStrong(calcDepth) + if verbose: + print(head) + else: + print('.', end='', flush=True) + head.forceStrong(calcDepth) opts = [] - for c in self.head.childs: - opts.append((c, c.getStrongFor(self.head.curPlayer))) + if len(head.childs)==0: + break + for c in head.childs: + opts.append((c, c.getStrongFor(head.curPlayer))) opts.sort(key=lambda x: x[1]) - ind = int(math.pow(random.random(),5)*len(opts)) + ind = int(pow(random.random(),5)*(len(opts)-1)) head = opts[ind][0] - self.terminal = head + print('') return head + def timelineIter(self, term): + head = term + while True: + yield head + if head.parent == None: + return + head = head.parent + + def trainModel(self, model, lr=0.01, cut=0.01, calcDepth=4): + loss_func = nn.MSELoss() + optimizer = optim.Adam(model.parameters(), lr) + term = self.buildDatasetFromModel(model, depth=calcDepth) + for r in range(16): + loss_sum = 0 + zeroLen = 0 + for i, node in enumerate(self.timelineIter(term)): + for p in range(self.rootNode.playersNum): + inp = node.state.getTensor(player=p) + gol = torch.tensor(node.getStrongFor(p), dtype=torch.float) + out = model(inp) + loss = loss_func(out, gol) + optimizer.zero_grad() + loss.backward() + optimizer.step() + loss_sum += loss.item() + if loss.item() == 0.0: + zeroLen+=1 + if zeroLen == 5: + break + print(loss_sum/i) + if loss_sum/i < cut: + return + + def main(self, model=None, gens=64): + newModel = False + if model==None: + newModel = True + model = self.rootNode.state.getModel() + self.universe.scoreProvider = ['neural','naive'][newModel] + for gen in range(gens): + print('[#####] Gen '+str(gen)+' training:') + self.trainModel(model, calcDepth=3) + self.universe.scoreProvider = 'neural' + torch.save(model.state_dict(), 'brains/uttt.pth') + + def train(self): + model = self.rootNode.state.getModel() + model.load_state_dict(torch.load('brains/uttt.pth')) + model.eval() + self.main(model)