Context Navigation

SelfPlayTDLScenario.java @ 339

Last change on this file since 339 was 193, checked in by Maciej Komosinski, 11 years ago
Set svn:eol-style native for all textual files
Property svn:eol-style set to `native`
File size: 2.2 KB

Rev	Line
[44]	1	package games.scenarios;
	2
	3	import ec.util.MersenneTwisterFast;
	4	import games.Board;
	5	import games.BoardGame;
	6	import games.GameMove;
	7	import games.Player;
	8
	9	import java.util.List;
	10
	11	import cecj.app.othello.OthelloBoard;
	12
	13
	14	public class SelfPlayTDLScenario implements GameScenario {
	15
	16	private double prob;
	17	private Player player;
	18	private double learningRate;
	19	private MersenneTwisterFast random;
	20
	21	public SelfPlayTDLScenario(MersenneTwisterFast random, Player player, double prob,
	22	double learningRate) {
	23	this.prob = prob;
	24	this.player = player;
	25	this.random = random;
	26	this.learningRate = learningRate;
	27	}
	28
	29	public int play(BoardGame game) {
	30	while (!game.endOfGame()) {
	31	List<? extends GameMove> moves = game.findMoves();
	32	if (!moves.isEmpty()) {
	33	GameMove bestMove = null;
	34	if (random.nextBoolean(prob)) {
	35	game.makeMove(moves.get(random.nextInt(moves.size())));
	36	} else {
	37	double bestEval = Float.NEGATIVE_INFINITY;
	38	for (GameMove move : moves) {
	39	double eval = game.evalMove(player, move);
	40	if (eval > bestEval) {
	41	bestEval = eval;
	42	bestMove = move;
	43	}
	44	}
	45
	46	Board previousBoard = game.getBoard().clone();
	47	game.makeMove(bestMove);
	48	updateEvaluationFunction(previousBoard, game);
	49	}
	50	}
	51	game.switchPlayer();
	52	}
	53
	54	return game.getOutcome();
	55	}
	56
	57	private void updateEvaluationFunction(Board previousBoard, BoardGame game) {
	58	double evalBefore = tanh(previousBoard.evaluate(player));
	59	double derivative = (1 - (evalBefore * evalBefore));
	60	double error;
	61
	62	if (game.endOfGame()) {
	63	int result;
	64	if (game.getOutcome() > 0) {
	65	result = 1;
	66	} else if (game.getOutcome() < 0) {
	67	result = -1;
	68	} else {
	69	result = 0;
	70	}
	71	error = result - evalBefore;
	72	} else {
	73	double evalAfter = tanh(game.getBoard().evaluate(player));
	74	error = evalAfter - evalBefore;
	75	}
	76
	77	double delta = learningRate * error * derivative;
	78	for (int row = 1; row <= OthelloBoard.size(); row++) {
	79	for (int col = 1; col <= OthelloBoard.size(); col++) {
	80	double w = player.getValue(row, col);
	81	player.setValue(row, col, w + (delta * previousBoard.getValueAt(row, col)));
	82	}
	83	}
	84	}
	85
	86	private static double tanh(double x) {
	87	return 2 / (1 + Math.exp(-2 * x)) - 1;
	88	}
	89	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: java/ecj/games/scenarios/SelfPlayTDLScenario.java @ 339

Download in other formats: