1 | package games.scenarios; |
---|
2 | |
---|
3 | import ec.util.MersenneTwisterFast; |
---|
4 | import games.Board; |
---|
5 | import games.BoardGame; |
---|
6 | import games.GameMove; |
---|
7 | import games.Player; |
---|
8 | |
---|
9 | import java.util.List; |
---|
10 | |
---|
11 | import cecj.app.othello.OthelloBoard; |
---|
12 | |
---|
13 | |
---|
14 | public class SelfPlayTDLScenario implements GameScenario { |
---|
15 | |
---|
16 | private double prob; |
---|
17 | private Player player; |
---|
18 | private double learningRate; |
---|
19 | private MersenneTwisterFast random; |
---|
20 | |
---|
21 | public SelfPlayTDLScenario(MersenneTwisterFast random, Player player, double prob, |
---|
22 | double learningRate) { |
---|
23 | this.prob = prob; |
---|
24 | this.player = player; |
---|
25 | this.random = random; |
---|
26 | this.learningRate = learningRate; |
---|
27 | } |
---|
28 | |
---|
29 | public int play(BoardGame game) { |
---|
30 | while (!game.endOfGame()) { |
---|
31 | List<? extends GameMove> moves = game.findMoves(); |
---|
32 | if (!moves.isEmpty()) { |
---|
33 | GameMove bestMove = null; |
---|
34 | if (random.nextBoolean(prob)) { |
---|
35 | game.makeMove(moves.get(random.nextInt(moves.size()))); |
---|
36 | } else { |
---|
37 | double bestEval = Float.NEGATIVE_INFINITY; |
---|
38 | for (GameMove move : moves) { |
---|
39 | double eval = game.evalMove(player, move); |
---|
40 | if (eval > bestEval) { |
---|
41 | bestEval = eval; |
---|
42 | bestMove = move; |
---|
43 | } |
---|
44 | } |
---|
45 | |
---|
46 | Board previousBoard = game.getBoard().clone(); |
---|
47 | game.makeMove(bestMove); |
---|
48 | updateEvaluationFunction(previousBoard, game); |
---|
49 | } |
---|
50 | } |
---|
51 | game.switchPlayer(); |
---|
52 | } |
---|
53 | |
---|
54 | return game.getOutcome(); |
---|
55 | } |
---|
56 | |
---|
57 | private void updateEvaluationFunction(Board previousBoard, BoardGame game) { |
---|
58 | double evalBefore = tanh(previousBoard.evaluate(player)); |
---|
59 | double derivative = (1 - (evalBefore * evalBefore)); |
---|
60 | double error; |
---|
61 | |
---|
62 | if (game.endOfGame()) { |
---|
63 | int result; |
---|
64 | if (game.getOutcome() > 0) { |
---|
65 | result = 1; |
---|
66 | } else if (game.getOutcome() < 0) { |
---|
67 | result = -1; |
---|
68 | } else { |
---|
69 | result = 0; |
---|
70 | } |
---|
71 | error = result - evalBefore; |
---|
72 | } else { |
---|
73 | double evalAfter = tanh(game.getBoard().evaluate(player)); |
---|
74 | error = evalAfter - evalBefore; |
---|
75 | } |
---|
76 | |
---|
77 | double delta = learningRate * error * derivative; |
---|
78 | for (int row = 1; row <= OthelloBoard.size(); row++) { |
---|
79 | for (int col = 1; col <= OthelloBoard.size(); col++) { |
---|
80 | double w = player.getValue(row, col); |
---|
81 | player.setValue(row, col, w + (delta * previousBoard.getValueAt(row, col))); |
---|
82 | } |
---|
83 | } |
---|
84 | } |
---|
85 | |
---|
86 | private static double tanh(double x) { |
---|
87 | return 2 / (1 + Math.exp(-2 * x)) - 1; |
---|
88 | } |
---|
89 | } |
---|