/* Copyright 2009 by Marcin Szubert Licensed under the Academic Free License version 3.0 */ package cecj.eval; import ec.EvolutionState; import ec.Individual; import ec.util.Parameter; /** * Wrapper for CoevolutionaryEvaluator performing an additional learning phase before * actual evaluation. * * The only role of this class is improving each individual of the population by running a specific * temporal difference learning (TDL) algorithm before the evaluation. Since the exact * implementation of this algorithm depends on the problem, evaluator delegates the learning task to * the provided TDLImprover interface realization. At the beginning of evolutionary * process individuals may require some preparation for running TDL. For this reason, appropriate * interface methods are invoked before the first evaluation. Clearly, not every problem can be * approached by reinforcement learning paradigm so this class has also a limited scope of * applicability. * * Note that this evaluator realizes the Coevolutionary Reinforcement Learning idea. * * @author Marcin Szubert * @see TDLImprover * */ public class TDLImprovingEvaluator extends CoevolutionaryEvaluator { private static final String P_INNER_EVALUATOR = "inner-evaluator"; private static final String P_TDL_IMPROVER = "tdl-improver"; private static final String P_TDL_FREQUENCY = "tdl-frequency"; private CoevolutionaryEvaluator innerEvaluator; private TDLImprover temporalDifferenceImprover; private boolean firstEvaluation = true; private int tdlFrequency; @Override public void setup(EvolutionState state, Parameter base) { super.setup(state, base); Parameter innerEvaluatorParam = base.push(P_INNER_EVALUATOR); innerEvaluator = (CoevolutionaryEvaluator) (state.parameters.getInstanceForParameter( innerEvaluatorParam, null, CoevolutionaryEvaluator.class)); innerEvaluator.setup(state, innerEvaluatorParam); Parameter tdlImproverParam = base.push(P_TDL_IMPROVER); temporalDifferenceImprover = (TDLImprover) (state.parameters.getInstanceForParameter( tdlImproverParam, null, TDLImprover.class)); temporalDifferenceImprover.setup(state, tdlImproverParam); Parameter tdlImprovingFrequency = base.push(P_TDL_FREQUENCY); tdlFrequency = state.parameters.getIntWithDefault(tdlImprovingFrequency, null, 1); } @Override public void evaluatePopulation(EvolutionState state) { if (firstEvaluation) { for (int subpop = 0; subpop < numSubpopulations; subpop++) { Individual[] inds = state.population.subpops[subpop].individuals; for (Individual ind : inds) { temporalDifferenceImprover.prepareForImproving(state, ind); } } firstEvaluation = false; } if ((state.generation % tdlFrequency) == 0) { for (int subpop = 0; subpop < numSubpopulations; subpop++) { Individual[] inds = state.population.subpops[subpop].individuals; for (Individual ind : inds) { temporalDifferenceImprover.improve(state, ind); } } } innerEvaluator.evaluatePopulation(state); } }