[44] | 1 | package games.scenarios; |
---|
| 2 | |
---|
| 3 | import ec.util.MersenneTwisterFast; |
---|
| 4 | import games.Board; |
---|
| 5 | import games.BoardGame; |
---|
| 6 | import games.GameMove; |
---|
| 7 | import games.Player; |
---|
| 8 | |
---|
| 9 | import java.util.List; |
---|
| 10 | |
---|
| 11 | import cecj.app.othello.OthelloBoard; |
---|
| 12 | |
---|
| 13 | |
---|
| 14 | public class SelfPlayTDLScenario implements GameScenario { |
---|
| 15 | |
---|
| 16 | private double prob; |
---|
| 17 | private Player player; |
---|
| 18 | private double learningRate; |
---|
| 19 | private MersenneTwisterFast random; |
---|
| 20 | |
---|
| 21 | public SelfPlayTDLScenario(MersenneTwisterFast random, Player player, double prob, |
---|
| 22 | double learningRate) { |
---|
| 23 | this.prob = prob; |
---|
| 24 | this.player = player; |
---|
| 25 | this.random = random; |
---|
| 26 | this.learningRate = learningRate; |
---|
| 27 | } |
---|
| 28 | |
---|
| 29 | public int play(BoardGame game) { |
---|
| 30 | while (!game.endOfGame()) { |
---|
| 31 | List<? extends GameMove> moves = game.findMoves(); |
---|
| 32 | if (!moves.isEmpty()) { |
---|
| 33 | GameMove bestMove = null; |
---|
| 34 | if (random.nextBoolean(prob)) { |
---|
| 35 | game.makeMove(moves.get(random.nextInt(moves.size()))); |
---|
| 36 | } else { |
---|
| 37 | double bestEval = Float.NEGATIVE_INFINITY; |
---|
| 38 | for (GameMove move : moves) { |
---|
| 39 | double eval = game.evalMove(player, move); |
---|
| 40 | if (eval > bestEval) { |
---|
| 41 | bestEval = eval; |
---|
| 42 | bestMove = move; |
---|
| 43 | } |
---|
| 44 | } |
---|
| 45 | |
---|
| 46 | Board previousBoard = game.getBoard().clone(); |
---|
| 47 | game.makeMove(bestMove); |
---|
| 48 | updateEvaluationFunction(previousBoard, game); |
---|
| 49 | } |
---|
| 50 | } |
---|
| 51 | game.switchPlayer(); |
---|
| 52 | } |
---|
| 53 | |
---|
| 54 | return game.getOutcome(); |
---|
| 55 | } |
---|
| 56 | |
---|
| 57 | private void updateEvaluationFunction(Board previousBoard, BoardGame game) { |
---|
| 58 | double evalBefore = tanh(previousBoard.evaluate(player)); |
---|
| 59 | double derivative = (1 - (evalBefore * evalBefore)); |
---|
| 60 | double error; |
---|
| 61 | |
---|
| 62 | if (game.endOfGame()) { |
---|
| 63 | int result; |
---|
| 64 | if (game.getOutcome() > 0) { |
---|
| 65 | result = 1; |
---|
| 66 | } else if (game.getOutcome() < 0) { |
---|
| 67 | result = -1; |
---|
| 68 | } else { |
---|
| 69 | result = 0; |
---|
| 70 | } |
---|
| 71 | error = result - evalBefore; |
---|
| 72 | } else { |
---|
| 73 | double evalAfter = tanh(game.getBoard().evaluate(player)); |
---|
| 74 | error = evalAfter - evalBefore; |
---|
| 75 | } |
---|
| 76 | |
---|
| 77 | double delta = learningRate * error * derivative; |
---|
| 78 | for (int row = 1; row <= OthelloBoard.size(); row++) { |
---|
| 79 | for (int col = 1; col <= OthelloBoard.size(); col++) { |
---|
| 80 | double w = player.getValue(row, col); |
---|
| 81 | player.setValue(row, col, w + (delta * previousBoard.getValueAt(row, col))); |
---|
| 82 | } |
---|
| 83 | } |
---|
| 84 | } |
---|
| 85 | |
---|
| 86 | private static double tanh(double x) { |
---|
| 87 | return 2 / (1 + Math.exp(-2 * x)) - 1; |
---|
| 88 | } |
---|
| 89 | } |
---|