Context Navigation

SelfPlayTDLScenario.java @ 339

Last change on this file since 339 was 193, checked in by Maciej Komosinski, 11 years ago
Set svn:eol-style native for all textual files
Property svn:eol-style set to `native`
File size: 2.2 KB

Line
1	package games.scenarios;
2
3	import ec.util.MersenneTwisterFast;
4	import games.Board;
5	import games.BoardGame;
6	import games.GameMove;
7	import games.Player;
8
9	import java.util.List;
10
11	import cecj.app.othello.OthelloBoard;
12
13
14	public class SelfPlayTDLScenario implements GameScenario {
15
16	private double prob;
17	private Player player;
18	private double learningRate;
19	private MersenneTwisterFast random;
20
21	public SelfPlayTDLScenario(MersenneTwisterFast random, Player player, double prob,
22	double learningRate) {
23	this.prob = prob;
24	this.player = player;
25	this.random = random;
26	this.learningRate = learningRate;
27	}
28
29	public int play(BoardGame game) {
30	while (!game.endOfGame()) {
31	List<? extends GameMove> moves = game.findMoves();
32	if (!moves.isEmpty()) {
33	GameMove bestMove = null;
34	if (random.nextBoolean(prob)) {
35	game.makeMove(moves.get(random.nextInt(moves.size())));
36	} else {
37	double bestEval = Float.NEGATIVE_INFINITY;
38	for (GameMove move : moves) {
39	double eval = game.evalMove(player, move);
40	if (eval > bestEval) {
41	bestEval = eval;
42	bestMove = move;
43	}
44	}
45
46	Board previousBoard = game.getBoard().clone();
47	game.makeMove(bestMove);
48	updateEvaluationFunction(previousBoard, game);
49	}
50	}
51	game.switchPlayer();
52	}
53
54	return game.getOutcome();
55	}
56
57	private void updateEvaluationFunction(Board previousBoard, BoardGame game) {
58	double evalBefore = tanh(previousBoard.evaluate(player));
59	double derivative = (1 - (evalBefore * evalBefore));
60	double error;
61
62	if (game.endOfGame()) {
63	int result;
64	if (game.getOutcome() > 0) {
65	result = 1;
66	} else if (game.getOutcome() < 0) {
67	result = -1;
68	} else {
69	result = 0;
70	}
71	error = result - evalBefore;
72	} else {
73	double evalAfter = tanh(game.getBoard().evaluate(player));
74	error = evalAfter - evalBefore;
75	}
76
77	double delta = learningRate * error * derivative;
78	for (int row = 1; row <= OthelloBoard.size(); row++) {
79	for (int col = 1; col <= OthelloBoard.size(); col++) {
80	double w = player.getValue(row, col);
81	player.setValue(row, col, w + (delta * previousBoard.getValueAt(row, col)));
82	}
83	}
84	}
85
86	private static double tanh(double x) {
87	return 2 / (1 + Math.exp(-2 * x)) - 1;
88	}
89	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: java/ecj/games/scenarios/SelfPlayTDLScenario.java @ 339

Download in other formats: