| 1 | /* |
|---|
| 2 | Copyright 2009 by Marcin Szubert |
|---|
| 3 | Licensed under the Academic Free License version 3.0 |
|---|
| 4 | */ |
|---|
| 5 | |
|---|
| 6 | package cecj.eval; |
|---|
| 7 | |
|---|
| 8 | import ec.EvolutionState; |
|---|
| 9 | import ec.Individual; |
|---|
| 10 | import ec.util.Parameter; |
|---|
| 11 | |
|---|
| 12 | /** |
|---|
| 13 | * Wrapper for <code>CoevolutionaryEvaluator</code> performing an additional learning phase before |
|---|
| 14 | * actual evaluation. |
|---|
| 15 | * |
|---|
| 16 | * The only role of this class is improving each individual of the population by running a specific |
|---|
| 17 | * temporal difference learning (TDL) algorithm before the evaluation. Since the exact |
|---|
| 18 | * implementation of this algorithm depends on the problem, evaluator delegates the learning task to |
|---|
| 19 | * the provided <code>TDLImprover</code> interface realization. At the beginning of evolutionary |
|---|
| 20 | * process individuals may require some preparation for running TDL. For this reason, appropriate |
|---|
| 21 | * interface methods are invoked before the first evaluation. Clearly, not every problem can be |
|---|
| 22 | * approached by reinforcement learning paradigm so this class has also a limited scope of |
|---|
| 23 | * applicability. |
|---|
| 24 | * |
|---|
| 25 | * Note that this evaluator realizes the Coevolutionary Reinforcement Learning idea. |
|---|
| 26 | * |
|---|
| 27 | * @author Marcin Szubert |
|---|
| 28 | * @see TDLImprover |
|---|
| 29 | * |
|---|
| 30 | */ |
|---|
| 31 | public class TDLImprovingEvaluator extends CoevolutionaryEvaluator { |
|---|
| 32 | |
|---|
| 33 | private static final String P_INNER_EVALUATOR = "inner-evaluator"; |
|---|
| 34 | private static final String P_TDL_IMPROVER = "tdl-improver"; |
|---|
| 35 | private static final String P_TDL_FREQUENCY = "tdl-frequency"; |
|---|
| 36 | |
|---|
| 37 | private CoevolutionaryEvaluator innerEvaluator; |
|---|
| 38 | private TDLImprover temporalDifferenceImprover; |
|---|
| 39 | private boolean firstEvaluation = true; |
|---|
| 40 | private int tdlFrequency; |
|---|
| 41 | |
|---|
| 42 | @Override |
|---|
| 43 | public void setup(EvolutionState state, Parameter base) { |
|---|
| 44 | super.setup(state, base); |
|---|
| 45 | |
|---|
| 46 | Parameter innerEvaluatorParam = base.push(P_INNER_EVALUATOR); |
|---|
| 47 | innerEvaluator = (CoevolutionaryEvaluator) (state.parameters.getInstanceForParameter( |
|---|
| 48 | innerEvaluatorParam, null, CoevolutionaryEvaluator.class)); |
|---|
| 49 | innerEvaluator.setup(state, innerEvaluatorParam); |
|---|
| 50 | |
|---|
| 51 | Parameter tdlImproverParam = base.push(P_TDL_IMPROVER); |
|---|
| 52 | temporalDifferenceImprover = (TDLImprover) (state.parameters.getInstanceForParameter( |
|---|
| 53 | tdlImproverParam, null, TDLImprover.class)); |
|---|
| 54 | temporalDifferenceImprover.setup(state, tdlImproverParam); |
|---|
| 55 | |
|---|
| 56 | Parameter tdlImprovingFrequency = base.push(P_TDL_FREQUENCY); |
|---|
| 57 | tdlFrequency = state.parameters.getIntWithDefault(tdlImprovingFrequency, null, 1); |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | @Override |
|---|
| 61 | public void evaluatePopulation(EvolutionState state) { |
|---|
| 62 | if (firstEvaluation) { |
|---|
| 63 | for (int subpop = 0; subpop < numSubpopulations; subpop++) { |
|---|
| 64 | Individual[] inds = state.population.subpops[subpop].individuals; |
|---|
| 65 | for (Individual ind : inds) { |
|---|
| 66 | temporalDifferenceImprover.prepareForImproving(state, ind); |
|---|
| 67 | } |
|---|
| 68 | } |
|---|
| 69 | firstEvaluation = false; |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | if ((state.generation % tdlFrequency) == 0) { |
|---|
| 73 | for (int subpop = 0; subpop < numSubpopulations; subpop++) { |
|---|
| 74 | Individual[] inds = state.population.subpops[subpop].individuals; |
|---|
| 75 | for (Individual ind : inds) { |
|---|
| 76 | temporalDifferenceImprover.improve(state, ind); |
|---|
| 77 | } |
|---|
| 78 | } |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | innerEvaluator.evaluatePopulation(state); |
|---|
| 82 | } |
|---|
| 83 | } |
|---|