/*
Copyright 2009 by Marcin Szubert
Licensed under the Academic Free License version 3.0
*/
package cecj.eval;
import ec.EvolutionState;
import ec.Individual;
import ec.util.Parameter;
/**
* Wrapper for CoevolutionaryEvaluator
performing an additional learning phase before
* actual evaluation.
*
* The only role of this class is improving each individual of the population by running a specific
* temporal difference learning (TDL) algorithm before the evaluation. Since the exact
* implementation of this algorithm depends on the problem, evaluator delegates the learning task to
* the provided TDLImprover
interface realization. At the beginning of evolutionary
* process individuals may require some preparation for running TDL. For this reason, appropriate
* interface methods are invoked before the first evaluation. Clearly, not every problem can be
* approached by reinforcement learning paradigm so this class has also a limited scope of
* applicability.
*
* Note that this evaluator realizes the Coevolutionary Reinforcement Learning idea.
*
* @author Marcin Szubert
* @see TDLImprover
*
*/
public class TDLImprovingEvaluator extends CoevolutionaryEvaluator {
private static final String P_INNER_EVALUATOR = "inner-evaluator";
private static final String P_TDL_IMPROVER = "tdl-improver";
private static final String P_TDL_FREQUENCY = "tdl-frequency";
private CoevolutionaryEvaluator innerEvaluator;
private TDLImprover temporalDifferenceImprover;
private boolean firstEvaluation = true;
private int tdlFrequency;
@Override
public void setup(EvolutionState state, Parameter base) {
super.setup(state, base);
Parameter innerEvaluatorParam = base.push(P_INNER_EVALUATOR);
innerEvaluator = (CoevolutionaryEvaluator) (state.parameters.getInstanceForParameter(
innerEvaluatorParam, null, CoevolutionaryEvaluator.class));
innerEvaluator.setup(state, innerEvaluatorParam);
Parameter tdlImproverParam = base.push(P_TDL_IMPROVER);
temporalDifferenceImprover = (TDLImprover) (state.parameters.getInstanceForParameter(
tdlImproverParam, null, TDLImprover.class));
temporalDifferenceImprover.setup(state, tdlImproverParam);
Parameter tdlImprovingFrequency = base.push(P_TDL_FREQUENCY);
tdlFrequency = state.parameters.getIntWithDefault(tdlImprovingFrequency, null, 1);
}
@Override
public void evaluatePopulation(EvolutionState state) {
if (firstEvaluation) {
for (int subpop = 0; subpop < numSubpopulations; subpop++) {
Individual[] inds = state.population.subpops[subpop].individuals;
for (Individual ind : inds) {
temporalDifferenceImprover.prepareForImproving(state, ind);
}
}
firstEvaluation = false;
}
if ((state.generation % tdlFrequency) == 0) {
for (int subpop = 0; subpop < numSubpopulations; subpop++) {
Individual[] inds = state.population.subpops[subpop].individuals;
for (Individual ind : inds) {
temporalDifferenceImprover.improve(state, ind);
}
}
}
innerEvaluator.evaluatePopulation(state);
}
}