source: cpp/frams/genetics/genooperators.h @ 1334

Last change on this file since 1334 was 1313, checked in by Maciej Komosinski, 7 months ago

Color mutations in f1 and f4, and a new syntax for "allowed modifiers" (opposite to previous "excluded modifiers") with optional probabilities for each modifier

  • Property svn:eol-style set to native
File size: 18.7 KB
Line 
1// This file is a part of Framsticks SDK.  http://www.framsticks.com/
2// Copyright (C) 1999-2024  Maciej Komosinski and Szymon Ulatowski.
3// See LICENSE.txt for details.
4
5#ifndef _GENO_OPERATORS_H_
6#define _GENO_OPERATORS_H_
7
8#include <common/nonstd.h>
9#include <frams/model/model.h>
10
11/** @file */
12
13/** \name Return codes for genetic operators */
14//@{
15#define GENOPER_OK          0 ///<operation successful
16#define GENOPER_OPFAIL     -1 ///<operation failed or could not be completed
17#define GENOPER_REPAIR     -2 ///<do not use in Geno_fx. GenMan uses it in checkValidity()... but will not. only f4 uses it
18#define GENOPER_NOOPER     -3 ///<do not use in Geno_fx. GenMan uses it for "no suitable operator for this genotype format"
19//@}
20
21/** \name gene/character predefined styles (for style() method) */
22//@{
23#define GENSTYLE_NONE       0 ///<no style specified (=normal font)
24#define GENSTYLE_INVALID    1 ///<this char cannot be accepted
25#define GENSTYLE_BOLD       2 ///<bold
26#define GENSTYLE_ITALIC     4 ///<italic
27#define GENSTYLE_STRIKEOUT  8 ///<strikeout (not recommended)
28//UNDERLINE used to mark errors
29//@}
30
31/** \name other useful style/color macros */
32//@{
33#define GENRGB(r,g,b) ((uint32_t)(((uint8_t)(r)|((uint16_t)((uint8_t)(g))<<8))|(((uint32_t)(uint8_t)(b))<<16)))
34#define GENSTYLE_RGBS(r,g,b,s) ((uint32_t)((uint8_t)s)<<24 | GENRGB(r,g,b))
35#define GENSTYLE_CS(rgb,s) ((uint32_t)((uint8_t)s)<<24 | rgb)
36
37#define GENGETSTYLE(style) ((style)>>24)
38#define GENGETCOLOR(style) ((style)&0x00ffffff)
39#define GENGET_R(style) ((style)&0xff)
40#define GENGET_G(style) ((style>>8)&0xff)
41#define GENGET_B(style) ((style>>16)&0xff)
42
43#define GENCOLOR_TEXT    GENRGB(0,0,0) ///<recommended color to use for text genes
44#define GENCOLOR_NUMBER  GENRGB(200,0,0) ///<recommended color to use for number genes
45//@}
46
47///Base class for genetic operations on genotypes of some genetic format
48/**\author Maciej Komosinski
49
50When designing genetic operations on some representation, inherit your class
51(for example GenoOper_fMy) from GenoOperators. Define some methods,
52like mutate(), in your class, to allow for evolution.
53Ensure they have the same names and arguments as the corresponding
54virtual methods in Geno_fx. Set the 'supported_format' variable to the
55appropriate genetic representation ID.
56Whenever arguments are genotypes, they are without
57trailing characters which describe genetic format
58(for example, "p:", not "//0\np:").
59When allocating/reallocating char* parameters, use malloc, free, realloc, strdup, etc.
60Do not use new and delete.
61
62All the methods you might define are:
63- checkValidity()
64- validate()
65- mutate()
66- crossOver()
67- getSimplest()
68- style()
69
70Your code must not cause errors (like invalid memory access, memory
71leaks) on any arguments, even 'random' ones. GENOPER_OPFAIL should
72be returned when an operator cannot cope with its argument genotype.
73
74To compile your code, you may also need some SDK files.
75A simple example is Geno_ftest class (see \ref geno_ftest_example "C++ code" for details).
76A more realistic example is Geno_f4 derived from Geno_fx: refer to
77the available source on developmental encoding and f4 genotype format.*/
78
79class GenoOperators
80{
81public:
82        Param par;
83        SString supported_format; ///<genotype format which is supported by this class ("6" for GenoOper_f6, "Latent" for GenoOper_fLatent, etc.). Must be initialized in constructor.
84        string name; ///<short human-friendly name of this genetic representation/set of genetic operators
85        const char **mutation_method_names; ///<array of names for mutation methods. If initialized (by new const char*[]), must have entries for each method index returned by mutate(geno,chg,METHOD).  If initialized, it is automatically freed by this destructor.
86        GenoOperators() : par(empty_paramtab) { supported_format = "x"; name = "Default"; mutation_method_names = NULL; setDefaults(); }
87
88        /**Used to perform initializations of Param parameters that are not handled by the Param itself
89        (i.e. string parameters or fields that require some complex logic may be initialized here)*/
90        virtual void setDefaults() {}
91
92        /**Checks a genotype for minor mistakes and major errors.
93        \param geno genotype to be checked
94        \param genoname name of the genotype to be checked
95        \retval error_position 1-based (or 1 if no exact error position known)
96        \retval GENOPER_OK when the genotype is fully valid, and can be translated by the converter with \b no modifications nor tweaks*/
97        virtual int checkValidity(const char *geno, const char *genoname) { return GENOPER_NOOPER; }
98
99        /**Validates a genotype. The purpose of this function is to validate
100        obvious/minor errors (range overruns, invalid links, etc.). Do not try
101        to introduce entirely new genes in place of an error.
102        \param geno input/output: genotype to be validated
103        \param genoname name of the genotype to be validated
104        \retval GENOPER_OK must be returned in any case ("did my best to validate")*/
105        virtual int validate(char *&geno, const char *genoname) { return GENOPER_NOOPER; }
106
107        /**Mutates a genotype. Mutation should always change something.
108
109        Avoid unnecessary calls in your code. Every genotype argument passed to this
110        function is first checked, and validated if checkValidity() reported an error (or
111        if there is no checkValidity() implemented). Every resulting genotype is subject
112        to the same procedure, unless GENOPER_OPFAIL was returned. Thus you do not have
113        to call these functions on input and output genotypes, because they are validated
114        if needed.
115        \param geno input/output: genotype to be mutated
116        \param chg output: initialize with a value (in most cases 0..1) corresponding
117        to the amount of genotype mutated. For example, it could be the number of changed
118        genes divided by the total number of genes before mutation.
119        \param chg method: initialize with the ID (number) of mutation method used.
120        \retval GENOPER_OK
121        \retval GENOPER_OPFAIL
122        \sa
123        Mutation example to illustrate the exchange of pointers for \a geno.
124        The mutation adds random letter at the beginning or removes last letter from \a geno.
125        \code
126        {
127        int len=strlen(geno);
128        if (len==0 || random(2)==0) //add
129        {
130        method=0;
131        char* mutated=(char*)malloc(mutated,len+2); //allocate for mutated genotype
132        mutated[0]='A'+random(10); //first char random
133        strcpy(mutated+1,geno); //the rest is original
134        free(geno); //must take care of the original allocation
135        geno=mutated;
136        } else
137        {
138        method=1;
139        geno[len-1]=0; //simply shorten the string - remove last char
140        }
141        chg=1.0/max(len,1); //estimation of mutation strength, divby0-safe
142        } \endcode
143        */
144        virtual int mutate(char *&geno, float& chg, int &method) { method = -1; chg = -1; return GENOPER_NOOPER; }
145
146        /**Crosses over two genotypes. It is sufficient to return only one child (in \a g1) and set \a chg1 only, then \a g2 must be "".
147
148        Avoid unnecessary calls in your code. Every genotype argument passed to this
149        function is first checked, and validated if checkValidity() reported an error (or
150        if there is no checkValidity() implemented). Every resulting genotype is subject
151        to the same procedure, unless GENOPER_OPFAIL was returned. Thus you do not have
152        to call these functions on input and output genotypes, because they are validated
153        if needed.
154        \param g1 input/output: parent1 genotype, initialize with child1
155        \param g2 input/output: parent2 genotype, initialize with child2 if both children are available
156        \param chg1 output: initialize with the fraction of parent1 genes in child1 (parent2 has the rest)
157        \param chg2 output: initialize with the fraction of parent2 genes in child2 (parent1 has the rest)
158        \retval GENOPER_OK
159        \retval GENOPER_OPFAIL
160        \sa mutate() for an example*/
161        virtual int crossOver(char *&g1, char *&g2, float& chg1, float& chg2) { chg1 = chg2 = -1; return GENOPER_NOOPER; }
162
163        /**\return a pointer to the simplest genotype string*/
164        virtual const char* getSimplest() { return NULL; }
165
166        /**Provides color styles for individual characters of the genotype. For efficiency,
167        this function may be approximate and do not perform the full, proper analysis of the syntax.
168        \param geno genotype
169        \param pos 0-based char offset
170        \retval number-encoded visual style (and validity) of the genotype char at \a geno[pos]. Assume white background.
171        \sa GENSTYLE_* macros, like GENSTYLE_BOLD*/
172        virtual uint32_t style(const char *geno, int pos) { return GENSTYLE_RGBS(0, 0, 0, GENSTYLE_NONE); }
173
174        ///currently not used (similarity of two genotypes)
175        virtual float similarity(const char*, const char*) { return GENOPER_NOOPER; }
176        virtual ~GenoOperators() { if (mutation_method_names) { delete[]mutation_method_names; mutation_method_names = NULL; } }
177        //   virtual char getFormat() {return 255;} //returns supported genotype format, for ex. '1'
178        //   virtual int enabled() {return 1;} // should be enabled by default
179
180        /** \name Some helpful methods for you */
181        //@{
182
183        static const int NEUROCLASS_PROP_OFFSET = 100; //a NeuroClass property is identified by some functions below as a single-value integer index, yet a property is either "standard" or "extra" (two separate lists), hence this offset to tell one case from the other.
184
185        static int roulette(const double *probtab, const int count); ///<returns a random index according to probabilities in the \a probtab table or -1 if all probs are zero. \a count is the number of elements in \a probtab.
186        static int roulette(const vector<double> &probtab); ///<returns a random index according to probabilities in the \a probtab table or -1 if all probs are zero.
187        static bool getMinMaxDef(ParamInterface *p, int propindex, double &mn, double &mx, double &def); ///<perhaps a more useful (higher-level) way to obtain min/max/def info for integer and double properties. Returns true if min/max/def was really available (otherwise it is just invented).
188        static bool mutateRandomNeuroClassProperty(Neuro* n); ///<high-level neuron mutation function, will select and mutate a random property of Neuron's NeuroClass. Returns true if successful and some property was actually mutated. Could return false when the NeuroClass of the Neuron have no properties, or when a randomly selected property was not suitable for mutation (for example a string or another non-number type).
189        static int selectRandomNeuroClassProperty(Neuro* n); ///<selects random property (either 0-based extraproperty of NeuroClass or NEUROCLASS_PROP_OFFSET-based standard property of NeuroClass). -1 if Neuroclass has no properties.
190        static double getMutatedNeuroClassProperty(double current, Neuro *n, int propindex); ///<returns value \a current mutated for the property \a propindex of Neuron's NeuroClass or for extraproperty (\a propindex - NEUROCLASS_PROP_OFFSET) of Neuron's NeuroClass. Neuro \a n is used as read-only.
191        static double getMutatedNeuronConnectionWeight(double current); ///<returns mutated value of \a current.
192        static bool mutatePropertyNaive(ParamInterface &p, int propindex); ///<creep-mutate selected property. Returns true when success. mutateProperty() should be used instead of this function.
193        static bool mutateProperty(ParamInterface &p, int propindex); ///<like mutatePropertyNaive(), but uses special probability distributions for some neuron properties.
194        static bool getMutatedProperty(ParamInterface &p, int i, double oldval, double &newval); ///<like mutateProperty(), but just returns \a newval, does not get nor set it using \a p.
195        static double mutateCreepNoLimit(char type, double current, double stddev, bool limit_precision_3digits); ///<returns \a current value creep-mutated with Gaussian distribution and \a stddev standard deviation. Precision limited to 3 digits after comma when \a limit_precision_3digits is true. \a type must be either 'd' (integer) or 'f' (float/double).
196        static double mutateCreep(char type, double current, double mn, double mx, double stddev, bool limit_precision_3digits); ///<just as mutateCreepNoLimit(), but forces mutated value into the [mn,mx] range using the 'reflect' approach.
197        static double mutateCreep(char type, double current, double mn, double mx, bool limit_precision_3digits); ///<just as mutateCreepNoLimit(), but forces mutated value into the [\a mn,\a mx] range using the 'reflect' approach and assumes standard deviation to be a fraction of the mx-mn interval width.
198        static void setIntFromDoubleWithProbabilisticDithering(ParamInterface &p, int index, double value); ///<sets a double value in an integer field; when a value is non-integer, applies stochastic rounding (random "dithering") so that both lower and higher integer value have some chance to be set.
199        static void linearMix(vector<double> &p1, vector<double> &p2, double proportion); ///<mixes two real-valued vectors; inherited proportion should be within [0,1]; 1.0 does not change values (all inherited), 0.5 causes both vectors to become their average, 0.0 swaps values (none inherited).
200        static void linearMix(ParamInterface &p1, int i1, ParamInterface &p2, int i2, double proportion); ///<mixes i1'th and i2'th properties of p1 and p2; inherited proportion should be within [0,1]; 1.0 does not change values (all inherited), 0.5 causes both properties to become their average, 0.0 swaps values (none inherited). For integer properties applies random "dithering" when necessary.
201
202        static int getActiveNeuroClassCount(Model::ShapeType for_shape_type); ///<returns active class count
203        static NeuroClass* getRandomNeuroClass(Model::ShapeType for_shape_type); ///<returns random neuroclass or NULL when no active classes.
204        static NeuroClass* getRandomNeuroClassWithOutput(Model::ShapeType for_shape_type); ///<returns random neuroclass with output or NULL when no active classes.
205        static NeuroClass* getRandomNeuroClassWithInput(Model::ShapeType for_shape_type); ///<returns random neuroclass with input or NULL when no active classes.
206        static NeuroClass* getRandomNeuroClassWithOutputAndWantingNoInputs(Model::ShapeType for_shape_type); ///<returns random sensor or NULL when no active classes. Note: only neuroclasses that prefer 0 inputs are considered, not those that prefer any number of inputs (thus including 0) - see getRandomNeuroClassWithOutputAndWantingNoOrAnyInputs().
207        static NeuroClass* getRandomNeuroClassWithOutputAndWantingNoOrAnyInputs(Model::ShapeType for_shape_type); ///<returns random neuron or NULL when no active classes. Note: both neuroclasses that prefer 0 inputs and those that prefer any number of inputs (thus including 0) are considered.
208        static int getRandomNeuroClassWithOutput(const vector<NeuroClass*>& NClist); ///<returns index of random NeuroClass from the NClist or -1 when no neurons in the list provide output \a NClist list of available neuron classes
209        static int getRandomNeuroClassWithInput(const vector<NeuroClass*>& NClist); ///<returns index of random NeuroClass from the NClist or -1 when no neurons in the list want input(s) \a NClist list of available neuron classes
210        static NeuroClass* parseNeuroClass(char *&s, ModelEnum::ShapeType for_shape_type); ///<returns the longest matching neuroclass that supports for_shape_type (ModelEnum::SHAPETYPE_BALL_AND_STICK or ModelEnum::SHAPETYPE_SOLIDS) or NULL if the string does not begin with an appropriate neuroclass name. Advances the \a s pointer if the neuroclass is found.
211        static Neuro* findNeuro(const Model *m, const NeuroClass *nc); ///<returns pointer to first Neuro of class \a nc, or NULL if there is no such Neuro.
212        static int neuroClassProp(char *&s, NeuroClass *nc, bool also_v1_N_props = false); ///<returns 0-based extraproperty of NeuroClass or NEUROCLASS_PROP_OFFSET-based standard property of NeuroClass, or -1 if the string does not begin with a valid property name. Advance the \a s pointer if success.
213        static bool canStartNeuroClassName(const char firstchar); ///<determines if \a firstchar may start NeuroClass name. If not, it might start NeuroClass' (or Neuro's) property name.
214
215        static bool isWS(const char c); ///<is \a c a whitespace char?
216        static void skipWS(char *&s); ///<advances pointer \a s skipping whitespaces.
217        static bool areAlike(char*, char*); ///<compares two text strings skipping whitespaces. Returns 1 when equal, 0 when different.
218        static char* strchr_no0(const char *str, char ch); ///<like strchr, but does not find ascii=0 char in \a str.
219
220        static double probOfModifier(const char* mod_def); //returns a probability of a modifier: either 1.0 (default) or parsed value if the probability is given in the appended parentheses (...). For example, "G(0.3)" will return 0.3, and "G" will return 1.0.
221        static char getRandomModifier(const char *choices); ///<returns a random character from \a choices (note that the special syntax with probabilities in parentheses is supported), or 0 when \a choices is empty or probabilities were insufficient for a random chance to choose some character.
222        static char getRandomColorModifier(const char *choices, const char *color_modifiers); //finds all color_modifiers in choices and returns a color modifier drawn randomly proportionally to the optional probabilities defined in choices. Returns 0 when \a choices does not have any color modifier with a positive probability.
223        static string simplifiedModifiers_rR(const string& str); ///<finds all 'r' and 'R' in \a str and returns the shortest sequence of 'r' and 'R that is equivalent to all these found in \a str.
224        static string simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts); ///<returns a sequence of chars from \a str_of_char_pairs based on how many times each char occurred in \a char_counts. Assume that an even-index char and the following odd-index char have the opposite influence, so they cancel out. We don't use this function, because a fixed order imposed by this function means that the number of different parameter values produced by a sequence of modifiers is lowered (N same-letter upper- and lower-case chars yield only 2*N different values). Due to how modifiers work, the effect of aaA, aAa, Aaa etc. is different (N same-letter upper- and lower-case chars yield 2^N different values), so simplifying modifiers should not impose any order, should not interfere with their original order, and should not cancel out antagonistic modifiers - see \a simplifiedModifiers() and geneprops_test.cpp.
225        //@}
226        static string simplifiedModifiers(const string &original, const char* colorgenes); ///<from the \a original sequence removes modifiers that are too numerous (exceeding a defined threshold number), starting the removal from the least-significant, leftmost (="oldest" when interpreting the sequence from left to right) ones. Contrary to \a simplifiedModifiersFixedOrder(), this kind of simplification preserves 2^N different sequences for each upper/lower-case modifier and thus 2^N different values of a given property (see geneprops.cpp), but the values resulting from these sequences constitute a landscape not as easy for optimization as in the case of 2*N, where the effect of each mutation could be independent and additive (no epistasis). So for a given sequence length, the 2^N case allows for a higher resolution at the cost of a more rugged fitness landscape than the 2*N case.
227};
228
229#endif
Note: See TracBrowser for help on using the repository browser.