[44] | 1 | /* |
---|
| 2 | Copyright 2009 by Marcin Szubert |
---|
| 3 | Licensed under the Academic Free License version 3.0 |
---|
| 4 | */ |
---|
| 5 | |
---|
| 6 | package cecj.eval; |
---|
| 7 | |
---|
| 8 | import ec.EvolutionState; |
---|
| 9 | import ec.Individual; |
---|
| 10 | import ec.util.Parameter; |
---|
| 11 | |
---|
| 12 | /** |
---|
| 13 | * Wrapper for <code>CoevolutionaryEvaluator</code> performing an additional learning phase before |
---|
| 14 | * actual evaluation. |
---|
| 15 | * |
---|
| 16 | * The only role of this class is improving each individual of the population by running a specific |
---|
| 17 | * temporal difference learning (TDL) algorithm before the evaluation. Since the exact |
---|
| 18 | * implementation of this algorithm depends on the problem, evaluator delegates the learning task to |
---|
| 19 | * the provided <code>TDLImprover</code> interface realization. At the beginning of evolutionary |
---|
| 20 | * process individuals may require some preparation for running TDL. For this reason, appropriate |
---|
| 21 | * interface methods are invoked before the first evaluation. Clearly, not every problem can be |
---|
| 22 | * approached by reinforcement learning paradigm so this class has also a limited scope of |
---|
| 23 | * applicability. |
---|
| 24 | * |
---|
| 25 | * Note that this evaluator realizes the Coevolutionary Reinforcement Learning idea. |
---|
| 26 | * |
---|
| 27 | * @author Marcin Szubert |
---|
| 28 | * @see TDLImprover |
---|
| 29 | * |
---|
| 30 | */ |
---|
| 31 | public class TDLImprovingEvaluator extends CoevolutionaryEvaluator { |
---|
| 32 | |
---|
| 33 | private static final String P_INNER_EVALUATOR = "inner-evaluator"; |
---|
| 34 | private static final String P_TDL_IMPROVER = "tdl-improver"; |
---|
| 35 | private static final String P_TDL_FREQUENCY = "tdl-frequency"; |
---|
| 36 | |
---|
| 37 | private CoevolutionaryEvaluator innerEvaluator; |
---|
| 38 | private TDLImprover temporalDifferenceImprover; |
---|
| 39 | private boolean firstEvaluation = true; |
---|
| 40 | private int tdlFrequency; |
---|
| 41 | |
---|
| 42 | @Override |
---|
| 43 | public void setup(EvolutionState state, Parameter base) { |
---|
| 44 | super.setup(state, base); |
---|
| 45 | |
---|
| 46 | Parameter innerEvaluatorParam = base.push(P_INNER_EVALUATOR); |
---|
| 47 | innerEvaluator = (CoevolutionaryEvaluator) (state.parameters.getInstanceForParameter( |
---|
| 48 | innerEvaluatorParam, null, CoevolutionaryEvaluator.class)); |
---|
| 49 | innerEvaluator.setup(state, innerEvaluatorParam); |
---|
| 50 | |
---|
| 51 | Parameter tdlImproverParam = base.push(P_TDL_IMPROVER); |
---|
| 52 | temporalDifferenceImprover = (TDLImprover) (state.parameters.getInstanceForParameter( |
---|
| 53 | tdlImproverParam, null, TDLImprover.class)); |
---|
| 54 | temporalDifferenceImprover.setup(state, tdlImproverParam); |
---|
| 55 | |
---|
| 56 | Parameter tdlImprovingFrequency = base.push(P_TDL_FREQUENCY); |
---|
| 57 | tdlFrequency = state.parameters.getIntWithDefault(tdlImprovingFrequency, null, 1); |
---|
| 58 | } |
---|
| 59 | |
---|
| 60 | @Override |
---|
| 61 | public void evaluatePopulation(EvolutionState state) { |
---|
| 62 | if (firstEvaluation) { |
---|
| 63 | for (int subpop = 0; subpop < numSubpopulations; subpop++) { |
---|
| 64 | Individual[] inds = state.population.subpops[subpop].individuals; |
---|
| 65 | for (Individual ind : inds) { |
---|
| 66 | temporalDifferenceImprover.prepareForImproving(state, ind); |
---|
| 67 | } |
---|
| 68 | } |
---|
| 69 | firstEvaluation = false; |
---|
| 70 | } |
---|
| 71 | |
---|
| 72 | if ((state.generation % tdlFrequency) == 0) { |
---|
| 73 | for (int subpop = 0; subpop < numSubpopulations; subpop++) { |
---|
| 74 | Individual[] inds = state.population.subpops[subpop].individuals; |
---|
| 75 | for (Individual ind : inds) { |
---|
| 76 | temporalDifferenceImprover.improve(state, ind); |
---|
| 77 | } |
---|
| 78 | } |
---|
| 79 | } |
---|
| 80 | |
---|
| 81 | innerEvaluator.evaluatePopulation(state); |
---|
| 82 | } |
---|
| 83 | } |
---|