{VERSION 3 0 "IBM INTEL NT" "3.0" } {USTYLETAB {CSTYLE "Maple Input" -1 0 "Courier" 0 1 255 0 0 1 0 1 0 0 1 0 0 0 0 }{CSTYLE "2D Math" -1 2 "Times" 0 1 0 0 0 0 0 0 2 0 0 0 0 0 0 }{CSTYLE "2D Output" 2 20 "" 0 1 0 0 255 1 0 0 0 0 0 0 0 0 0 } {PSTYLE "Normal" -1 0 1 {CSTYLE "" -1 -1 "" 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 }0 0 0 -1 -1 -1 0 0 0 0 0 0 -1 0 }{PSTYLE "Text Output" -1 2 1 {CSTYLE "" -1 -1 "Courier" 1 10 0 0 255 1 0 0 0 0 0 1 3 0 3 }1 0 0 -1 -1 -1 0 0 0 0 0 0 -1 0 }{PSTYLE "Heading 1" 0 3 1 {CSTYLE "" -1 -1 "" 1 18 0 0 0 0 0 1 0 0 0 0 0 0 0 }1 0 0 0 8 4 0 0 0 0 0 0 -1 0 }{PSTYLE "Warning" 2 7 1 {CSTYLE "" -1 -1 "" 0 1 0 0 255 1 0 0 0 0 0 0 1 0 0 } 0 0 0 -1 -1 -1 0 0 0 0 0 0 -1 0 }{PSTYLE "Maple Output" 0 11 1 {CSTYLE "" -1 -1 "" 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 }3 3 0 -1 -1 -1 0 0 0 0 0 0 -1 0 }} {SECT 0 {SECT 0 {PARA 3 "" 0 "" {TEXT -1 26 "Solving Bellman's Equatio n" }}{PARA 0 "" 0 "" {TEXT -1 58 "Solving Bellman's equation for the f ollowing state problem" }}{PARA 0 "" 0 "" {TEXT -1 67 " (reward 1) wi n <- 1 <-> 2 <-> 3 <-> 4 <-> 5 -> lose (reward 0)" }}{PARA 0 "" 0 " " {TEXT -1 91 "assuming the random policy (all possible states are tra nsitioned to with equal probability)" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 13 "with(linalg):" }}{PARA 7 "" 1 "" {TEXT -1 32 "Warning , new definition for norm" }}{PARA 7 "" 1 "" {TEXT -1 33 "Warning, new definition for trace" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 19 "g \+ := 1; c := - g/2;" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#>%\"gG\"\"\"" }} {PARA 11 "" 1 "" {XPPMATH 20 "6#>%\"cG#!\"\"\"\"#" }}}{EXCHG {PARA 0 " > " 0 "" {MPLTEXT 1 0 23 "a := [ [1, c, 0, 0, 0]," }}{PARA 0 "> " 0 " " {MPLTEXT 1 0 23 " [c, 1, c, 0, 0]," }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 23 " [0, c, 1, c, 0]," }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 23 " [0, 0, c, 1, c]," }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 24 " [0, 0, 0, c, 1]];" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#>%\"aG7'7'\"\"\"#!\"\"\"\"#\"\"!F+F+7'F(F'F(F+F+7'F+F(F 'F(F+7'F+F+F(F'F(7'F+F+F+F(F'" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 24 "b := convert(a, matrix);" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#>%\" bG-%'matrixG6#7'7'\"\"\"#!\"\"\"\"#\"\"!F.F.7'F+F*F+F.F.7'F.F+F*F+F.7' F.F.F+F*F+7'F.F.F.F+F*" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 11 "i nverse(b);" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#-%'matrixG6#7'7'#\"\"&\" \"$#\"\"%F*\"\"\"#\"\"#F*#F-F*7'F+#\"\")F*F/F+F.7'F-F/F*F/F-7'F.F+F/F2 F+7'F0F.F-F+F(" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 21 "w := [ 1/ 2, 0,0,0,0];" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#>%\"wG7'#\"\"\"\"\"#\" \"!F)F)F)" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 54 "val := evalm(i nverse(b) &* w); map(x->evalf(x,5),val);" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#>%$valG-%'vectorG6#7'#\"\"&\"\"'#\"\"#\"\"$#\"\"\"F-#F0F.#F0F+" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#-%'vectorG6#7'$\"&LL)!\"&$\"&nm'F)$ \"&++&F)$\"&LL$F)$\"&nm\"F)" }}}}{SECT 0 {PARA 3 "" 0 "" {TEXT -1 14 " TD(0) Learning" }}{PARA 0 "" 0 "" {TEXT -1 0 "" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 39 "v := [0,0,0,0,0,0,0]; # initial values" }} {PARA 11 "" 1 "" {XPPMATH 20 "6#>%\"vG7)\"\"!F&F&F&F&F&F&" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 34 "train := proc(iter, alpha, gamma) \+ " }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 48 " local i, state, isNotDone, p ick, old, reward;" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 13 " global v; \+ " }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 28 " state := 4; # start state" } }{PARA 0 "> " 0 "" {MPLTEXT 1 0 21 " isNotDone := true;" }}{PARA 0 " > " 0 "" {MPLTEXT 1 0 23 " pick := rand(0..1); " }}{PARA 0 "> " 0 " " {MPLTEXT 1 0 29 " for i from 1 to iter do " }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 24 " while isNotDone do" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 108 " old := state; state := state + (2*pick()-1) ; \n #print(\" state=\",state,\" oldstate=\",old);" }} {PARA 0 "> " 0 "" {MPLTEXT 1 0 26 " if (state=1) then" }} {PARA 0 "> " 0 "" {MPLTEXT 1 0 31 " isNotDone := false;" }} {PARA 0 "> " 0 "" {MPLTEXT 1 0 60 " reward := 1; #print(\"s tate 1, reward =\",reward);" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 31 " \+ else if (state=7) then" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 31 " \+ isNotDone := false;" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 24 " \+ reward := 0;" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 14 " el se " }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 28 " reward := 0; \+ " }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 17 " fi; fi; " }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 144 " v[old] := v[old] +\n a lpha*(reward + gamma*v[state] - v[old]); \n #print(\"v\",v[old ],\"reward\",reward,\"oldstate\",old);" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 25 " od; # end of while" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 36 " isNotDone := true; state := 4;" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 25 " od; # end of for-loop" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 2 "v;" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 4 "end:" }}} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 20 "train(1000, .02, 1);" }} {PARA 11 "" 1 "" {XPPMATH 20 "6#7)\"\"!$\"+`[1y$)!#5$\"+3I8RnF'$\"+\\= 6)z%F'$\"+?BF%)HF'$\"+P4?k9F'F$" }}}{EXCHG {PARA 11 "" 1 "" {TEXT -1 0 "" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 23 "map(x->evalf(x,5),va l);" }}{PARA 11 "" 1 "" {XPPMATH 20 "6#-%'vectorG6#7'$\"&LL)!\"&$\"&nm 'F)$\"&++&F)$\"&LL$F)$\"&nm\"F)" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 0 "" }}}}}{MARK "1 6 0 0" 0 }{VIEWOPTS 1 1 0 1 1 1803 }