1 | /* |
---|
2 | Copyright 2009 by Marcin Szubert |
---|
3 | Licensed under the Academic Free License version 3.0 |
---|
4 | */ |
---|
5 | |
---|
6 | package cecj.eval; |
---|
7 | |
---|
8 | import ec.EvolutionState; |
---|
9 | import ec.Individual; |
---|
10 | import ec.util.Parameter; |
---|
11 | |
---|
12 | /** |
---|
13 | * Wrapper for <code>CoevolutionaryEvaluator</code> performing an additional learning phase before |
---|
14 | * actual evaluation. |
---|
15 | * |
---|
16 | * The only role of this class is improving each individual of the population by running a specific |
---|
17 | * temporal difference learning (TDL) algorithm before the evaluation. Since the exact |
---|
18 | * implementation of this algorithm depends on the problem, evaluator delegates the learning task to |
---|
19 | * the provided <code>TDLImprover</code> interface realization. At the beginning of evolutionary |
---|
20 | * process individuals may require some preparation for running TDL. For this reason, appropriate |
---|
21 | * interface methods are invoked before the first evaluation. Clearly, not every problem can be |
---|
22 | * approached by reinforcement learning paradigm so this class has also a limited scope of |
---|
23 | * applicability. |
---|
24 | * |
---|
25 | * Note that this evaluator realizes the Coevolutionary Reinforcement Learning idea. |
---|
26 | * |
---|
27 | * @author Marcin Szubert |
---|
28 | * @see TDLImprover |
---|
29 | * |
---|
30 | */ |
---|
31 | public class TDLImprovingEvaluator extends CoevolutionaryEvaluator { |
---|
32 | |
---|
33 | private static final String P_INNER_EVALUATOR = "inner-evaluator"; |
---|
34 | private static final String P_TDL_IMPROVER = "tdl-improver"; |
---|
35 | private static final String P_TDL_FREQUENCY = "tdl-frequency"; |
---|
36 | |
---|
37 | private CoevolutionaryEvaluator innerEvaluator; |
---|
38 | private TDLImprover temporalDifferenceImprover; |
---|
39 | private boolean firstEvaluation = true; |
---|
40 | private int tdlFrequency; |
---|
41 | |
---|
42 | @Override |
---|
43 | public void setup(EvolutionState state, Parameter base) { |
---|
44 | super.setup(state, base); |
---|
45 | |
---|
46 | Parameter innerEvaluatorParam = base.push(P_INNER_EVALUATOR); |
---|
47 | innerEvaluator = (CoevolutionaryEvaluator) (state.parameters.getInstanceForParameter( |
---|
48 | innerEvaluatorParam, null, CoevolutionaryEvaluator.class)); |
---|
49 | innerEvaluator.setup(state, innerEvaluatorParam); |
---|
50 | |
---|
51 | Parameter tdlImproverParam = base.push(P_TDL_IMPROVER); |
---|
52 | temporalDifferenceImprover = (TDLImprover) (state.parameters.getInstanceForParameter( |
---|
53 | tdlImproverParam, null, TDLImprover.class)); |
---|
54 | temporalDifferenceImprover.setup(state, tdlImproverParam); |
---|
55 | |
---|
56 | Parameter tdlImprovingFrequency = base.push(P_TDL_FREQUENCY); |
---|
57 | tdlFrequency = state.parameters.getIntWithDefault(tdlImprovingFrequency, null, 1); |
---|
58 | } |
---|
59 | |
---|
60 | @Override |
---|
61 | public void evaluatePopulation(EvolutionState state) { |
---|
62 | if (firstEvaluation) { |
---|
63 | for (int subpop = 0; subpop < numSubpopulations; subpop++) { |
---|
64 | Individual[] inds = state.population.subpops[subpop].individuals; |
---|
65 | for (Individual ind : inds) { |
---|
66 | temporalDifferenceImprover.prepareForImproving(state, ind); |
---|
67 | } |
---|
68 | } |
---|
69 | firstEvaluation = false; |
---|
70 | } |
---|
71 | |
---|
72 | if ((state.generation % tdlFrequency) == 0) { |
---|
73 | for (int subpop = 0; subpop < numSubpopulations; subpop++) { |
---|
74 | Individual[] inds = state.population.subpops[subpop].individuals; |
---|
75 | for (Individual ind : inds) { |
---|
76 | temporalDifferenceImprover.improve(state, ind); |
---|
77 | } |
---|
78 | } |
---|
79 | } |
---|
80 | |
---|
81 | innerEvaluator.evaluatePopulation(state); |
---|
82 | } |
---|
83 | } |
---|