[286] | 1 | // This file is a part of Framsticks SDK. http://www.framsticks.com/ |
---|
[1313] | 2 | // Copyright (C) 1999-2024 Maciej Komosinski and Szymon Ulatowski. |
---|
[286] | 3 | // See LICENSE.txt for details. |
---|
[109] | 4 | |
---|
[121] | 5 | #ifndef _GENO_OPERATORS_H_ |
---|
| 6 | #define _GENO_OPERATORS_H_ |
---|
[109] | 7 | |
---|
| 8 | #include <common/nonstd.h> |
---|
| 9 | #include <frams/model/model.h> |
---|
| 10 | |
---|
| 11 | /** @file */ |
---|
| 12 | |
---|
| 13 | /** \name Return codes for genetic operators */ |
---|
| 14 | //@{ |
---|
| 15 | #define GENOPER_OK 0 ///<operation successful |
---|
| 16 | #define GENOPER_OPFAIL -1 ///<operation failed or could not be completed |
---|
| 17 | #define GENOPER_REPAIR -2 ///<do not use in Geno_fx. GenMan uses it in checkValidity()... but will not. only f4 uses it |
---|
| 18 | #define GENOPER_NOOPER -3 ///<do not use in Geno_fx. GenMan uses it for "no suitable operator for this genotype format" |
---|
| 19 | //@} |
---|
| 20 | |
---|
| 21 | /** \name gene/character predefined styles (for style() method) */ |
---|
| 22 | //@{ |
---|
| 23 | #define GENSTYLE_NONE 0 ///<no style specified (=normal font) |
---|
| 24 | #define GENSTYLE_INVALID 1 ///<this char cannot be accepted |
---|
| 25 | #define GENSTYLE_BOLD 2 ///<bold |
---|
| 26 | #define GENSTYLE_ITALIC 4 ///<italic |
---|
| 27 | #define GENSTYLE_STRIKEOUT 8 ///<strikeout (not recommended) |
---|
| 28 | //UNDERLINE used to mark errors |
---|
| 29 | //@} |
---|
| 30 | |
---|
| 31 | /** \name other useful style/color macros */ |
---|
| 32 | //@{ |
---|
[247] | 33 | #define GENRGB(r,g,b) ((uint32_t)(((uint8_t)(r)|((uint16_t)((uint8_t)(g))<<8))|(((uint32_t)(uint8_t)(b))<<16))) |
---|
| 34 | #define GENSTYLE_RGBS(r,g,b,s) ((uint32_t)((uint8_t)s)<<24 | GENRGB(r,g,b)) |
---|
| 35 | #define GENSTYLE_CS(rgb,s) ((uint32_t)((uint8_t)s)<<24 | rgb) |
---|
[109] | 36 | |
---|
| 37 | #define GENGETSTYLE(style) ((style)>>24) |
---|
| 38 | #define GENGETCOLOR(style) ((style)&0x00ffffff) |
---|
| 39 | #define GENGET_R(style) ((style)&0xff) |
---|
| 40 | #define GENGET_G(style) ((style>>8)&0xff) |
---|
| 41 | #define GENGET_B(style) ((style>>16)&0xff) |
---|
| 42 | |
---|
| 43 | #define GENCOLOR_TEXT GENRGB(0,0,0) ///<recommended color to use for text genes |
---|
| 44 | #define GENCOLOR_NUMBER GENRGB(200,0,0) ///<recommended color to use for number genes |
---|
| 45 | //@} |
---|
| 46 | |
---|
| 47 | ///Base class for genetic operations on genotypes of some genetic format |
---|
| 48 | /**\author Maciej Komosinski |
---|
| 49 | |
---|
| 50 | When designing genetic operations on some representation, inherit your class |
---|
[121] | 51 | (for example GenoOper_fMy) from GenoOperators. Define some methods, |
---|
[109] | 52 | like mutate(), in your class, to allow for evolution. |
---|
| 53 | Ensure they have the same names and arguments as the corresponding |
---|
| 54 | virtual methods in Geno_fx. Set the 'supported_format' variable to the |
---|
| 55 | appropriate genetic representation ID. |
---|
| 56 | Whenever arguments are genotypes, they are without |
---|
| 57 | trailing characters which describe genetic format |
---|
| 58 | (for example, "p:", not "//0\np:"). |
---|
| 59 | When allocating/reallocating char* parameters, use malloc, free, realloc, strdup, etc. |
---|
| 60 | Do not use new and delete. |
---|
| 61 | |
---|
| 62 | All the methods you might define are: |
---|
| 63 | - checkValidity() |
---|
| 64 | - validate() |
---|
| 65 | - mutate() |
---|
| 66 | - crossOver() |
---|
| 67 | - getSimplest() |
---|
| 68 | - style() |
---|
| 69 | |
---|
| 70 | Your code must not cause errors (like invalid memory access, memory |
---|
| 71 | leaks) on any arguments, even 'random' ones. GENOPER_OPFAIL should |
---|
| 72 | be returned when an operator cannot cope with its argument genotype. |
---|
| 73 | |
---|
[287] | 74 | To compile your code, you may also need some SDK files. |
---|
[109] | 75 | A simple example is Geno_ftest class (see \ref geno_ftest_example "C++ code" for details). |
---|
| 76 | A more realistic example is Geno_f4 derived from Geno_fx: refer to |
---|
| 77 | the available source on developmental encoding and f4 genotype format.*/ |
---|
| 78 | |
---|
[121] | 79 | class GenoOperators |
---|
[109] | 80 | { |
---|
[675] | 81 | public: |
---|
| 82 | Param par; |
---|
[955] | 83 | SString supported_format; ///<genotype format which is supported by this class ("6" for GenoOper_f6, "Latent" for GenoOper_fLatent, etc.). Must be initialized in constructor. |
---|
| 84 | string name; ///<short human-friendly name of this genetic representation/set of genetic operators |
---|
[675] | 85 | const char **mutation_method_names; ///<array of names for mutation methods. If initialized (by new const char*[]), must have entries for each method index returned by mutate(geno,chg,METHOD). If initialized, it is automatically freed by this destructor. |
---|
[955] | 86 | GenoOperators() : par(empty_paramtab) { supported_format = "x"; name = "Default"; mutation_method_names = NULL; setDefaults(); } |
---|
[109] | 87 | |
---|
[675] | 88 | /**Used to perform initializations of Param parameters that are not handled by the Param itself |
---|
| 89 | (i.e. string parameters or fields that require some complex logic may be initialized here)*/ |
---|
| 90 | virtual void setDefaults() {} |
---|
[109] | 91 | |
---|
[675] | 92 | /**Checks a genotype for minor mistakes and major errors. |
---|
| 93 | \param geno genotype to be checked |
---|
| 94 | \param genoname name of the genotype to be checked |
---|
| 95 | \retval error_position 1-based (or 1 if no exact error position known) |
---|
| 96 | \retval GENOPER_OK when the genotype is fully valid, and can be translated by the converter with \b no modifications nor tweaks*/ |
---|
| 97 | virtual int checkValidity(const char *geno, const char *genoname) { return GENOPER_NOOPER; } |
---|
[109] | 98 | |
---|
[675] | 99 | /**Validates a genotype. The purpose of this function is to validate |
---|
| 100 | obvious/minor errors (range overruns, invalid links, etc.). Do not try |
---|
| 101 | to introduce entirely new genes in place of an error. |
---|
| 102 | \param geno input/output: genotype to be validated |
---|
| 103 | \param genoname name of the genotype to be validated |
---|
| 104 | \retval GENOPER_OK must be returned in any case ("did my best to validate")*/ |
---|
| 105 | virtual int validate(char *&geno, const char *genoname) { return GENOPER_NOOPER; } |
---|
[109] | 106 | |
---|
[675] | 107 | /**Mutates a genotype. Mutation should always change something. |
---|
[109] | 108 | |
---|
[675] | 109 | Avoid unnecessary calls in your code. Every genotype argument passed to this |
---|
| 110 | function is first checked, and validated if checkValidity() reported an error (or |
---|
| 111 | if there is no checkValidity() implemented). Every resulting genotype is subject |
---|
| 112 | to the same procedure, unless GENOPER_OPFAIL was returned. Thus you do not have |
---|
| 113 | to call these functions on input and output genotypes, because they are validated |
---|
| 114 | if needed. |
---|
| 115 | \param geno input/output: genotype to be mutated |
---|
| 116 | \param chg output: initialize with a value (in most cases 0..1) corresponding |
---|
| 117 | to the amount of genotype mutated. For example, it could be the number of changed |
---|
| 118 | genes divided by the total number of genes before mutation. |
---|
| 119 | \param chg method: initialize with the ID (number) of mutation method used. |
---|
| 120 | \retval GENOPER_OK |
---|
| 121 | \retval GENOPER_OPFAIL |
---|
| 122 | \sa |
---|
[1233] | 123 | Mutation example to illustrate the exchange of pointers for \a geno. |
---|
| 124 | The mutation adds random letter at the beginning or removes last letter from \a geno. |
---|
[675] | 125 | \code |
---|
| 126 | { |
---|
[749] | 127 | int len=strlen(geno); |
---|
| 128 | if (len==0 || random(2)==0) //add |
---|
| 129 | { |
---|
| 130 | method=0; |
---|
| 131 | char* mutated=(char*)malloc(mutated,len+2); //allocate for mutated genotype |
---|
| 132 | mutated[0]='A'+random(10); //first char random |
---|
| 133 | strcpy(mutated+1,geno); //the rest is original |
---|
| 134 | free(geno); //must take care of the original allocation |
---|
| 135 | geno=mutated; |
---|
| 136 | } else |
---|
| 137 | { |
---|
| 138 | method=1; |
---|
| 139 | geno[len-1]=0; //simply shorten the string - remove last char |
---|
| 140 | } |
---|
| 141 | chg=1.0/max(len,1); //estimation of mutation strength, divby0-safe |
---|
[675] | 142 | } \endcode |
---|
| 143 | */ |
---|
| 144 | virtual int mutate(char *&geno, float& chg, int &method) { method = -1; chg = -1; return GENOPER_NOOPER; } |
---|
[109] | 145 | |
---|
[1313] | 146 | /**Crosses over two genotypes. It is sufficient to return only one child (in \a g1) and set \a chg1 only, then \a g2 must be "". |
---|
[109] | 147 | |
---|
[675] | 148 | Avoid unnecessary calls in your code. Every genotype argument passed to this |
---|
| 149 | function is first checked, and validated if checkValidity() reported an error (or |
---|
| 150 | if there is no checkValidity() implemented). Every resulting genotype is subject |
---|
| 151 | to the same procedure, unless GENOPER_OPFAIL was returned. Thus you do not have |
---|
| 152 | to call these functions on input and output genotypes, because they are validated |
---|
| 153 | if needed. |
---|
| 154 | \param g1 input/output: parent1 genotype, initialize with child1 |
---|
| 155 | \param g2 input/output: parent2 genotype, initialize with child2 if both children are available |
---|
| 156 | \param chg1 output: initialize with the fraction of parent1 genes in child1 (parent2 has the rest) |
---|
| 157 | \param chg2 output: initialize with the fraction of parent2 genes in child2 (parent1 has the rest) |
---|
| 158 | \retval GENOPER_OK |
---|
| 159 | \retval GENOPER_OPFAIL |
---|
| 160 | \sa mutate() for an example*/ |
---|
| 161 | virtual int crossOver(char *&g1, char *&g2, float& chg1, float& chg2) { chg1 = chg2 = -1; return GENOPER_NOOPER; } |
---|
[109] | 162 | |
---|
[675] | 163 | /**\return a pointer to the simplest genotype string*/ |
---|
| 164 | virtual const char* getSimplest() { return NULL; } |
---|
[109] | 165 | |
---|
[1273] | 166 | /**Provides color styles for individual characters of the genotype. For efficiency, |
---|
| 167 | this function may be approximate and do not perform the full, proper analysis of the syntax. |
---|
[675] | 168 | \param geno genotype |
---|
| 169 | \param pos 0-based char offset |
---|
[1273] | 170 | \retval number-encoded visual style (and validity) of the genotype char at \a geno[pos]. Assume white background. |
---|
[675] | 171 | \sa GENSTYLE_* macros, like GENSTYLE_BOLD*/ |
---|
| 172 | virtual uint32_t style(const char *geno, int pos) { return GENSTYLE_RGBS(0, 0, 0, GENSTYLE_NONE); } |
---|
[109] | 173 | |
---|
[675] | 174 | ///currently not used (similarity of two genotypes) |
---|
| 175 | virtual float similarity(const char*, const char*) { return GENOPER_NOOPER; } |
---|
| 176 | virtual ~GenoOperators() { if (mutation_method_names) { delete[]mutation_method_names; mutation_method_names = NULL; } } |
---|
| 177 | // virtual char getFormat() {return 255;} //returns supported genotype format, for ex. '1' |
---|
| 178 | // virtual int enabled() {return 1;} // should be enabled by default |
---|
[109] | 179 | |
---|
[675] | 180 | /** \name Some helpful methods for you */ |
---|
| 181 | //@{ |
---|
[957] | 182 | |
---|
[967] | 183 | static const int NEUROCLASS_PROP_OFFSET = 100; //a NeuroClass property is identified by some functions below as a single-value integer index, yet a property is either "standard" or "extra" (two separate lists), hence this offset to tell one case from the other. |
---|
[957] | 184 | |
---|
[1313] | 185 | static int roulette(const double *probtab, const int count); ///<returns a random index according to probabilities in the \a probtab table or -1 if all probs are zero. \a count is the number of elements in \a probtab. |
---|
| 186 | static int roulette(const vector<double> &probtab); ///<returns a random index according to probabilities in the \a probtab table or -1 if all probs are zero. |
---|
[675] | 187 | static bool getMinMaxDef(ParamInterface *p, int propindex, double &mn, double &mx, double &def); ///<perhaps a more useful (higher-level) way to obtain min/max/def info for integer and double properties. Returns true if min/max/def was really available (otherwise it is just invented). |
---|
[967] | 188 | static bool mutateRandomNeuroClassProperty(Neuro* n); ///<high-level neuron mutation function, will select and mutate a random property of Neuron's NeuroClass. Returns true if successful and some property was actually mutated. Could return false when the NeuroClass of the Neuron have no properties, or when a randomly selected property was not suitable for mutation (for example a string or another non-number type). |
---|
| 189 | static int selectRandomNeuroClassProperty(Neuro* n); ///<selects random property (either 0-based extraproperty of NeuroClass or NEUROCLASS_PROP_OFFSET-based standard property of NeuroClass). -1 if Neuroclass has no properties. |
---|
[1233] | 190 | static double getMutatedNeuroClassProperty(double current, Neuro *n, int propindex); ///<returns value \a current mutated for the property \a propindex of Neuron's NeuroClass or for extraproperty (\a propindex - NEUROCLASS_PROP_OFFSET) of Neuron's NeuroClass. Neuro \a n is used as read-only. |
---|
| 191 | static double getMutatedNeuronConnectionWeight(double current); ///<returns mutated value of \a current. |
---|
[675] | 192 | static bool mutatePropertyNaive(ParamInterface &p, int propindex); ///<creep-mutate selected property. Returns true when success. mutateProperty() should be used instead of this function. |
---|
| 193 | static bool mutateProperty(ParamInterface &p, int propindex); ///<like mutatePropertyNaive(), but uses special probability distributions for some neuron properties. |
---|
[1233] | 194 | static bool getMutatedProperty(ParamInterface &p, int i, double oldval, double &newval); ///<like mutateProperty(), but just returns \a newval, does not get nor set it using \a p. |
---|
| 195 | static double mutateCreepNoLimit(char type, double current, double stddev, bool limit_precision_3digits); ///<returns \a current value creep-mutated with Gaussian distribution and \a stddev standard deviation. Precision limited to 3 digits after comma when \a limit_precision_3digits is true. \a type must be either 'd' (integer) or 'f' (float/double). |
---|
[751] | 196 | static double mutateCreep(char type, double current, double mn, double mx, double stddev, bool limit_precision_3digits); ///<just as mutateCreepNoLimit(), but forces mutated value into the [mn,mx] range using the 'reflect' approach. |
---|
[1233] | 197 | static double mutateCreep(char type, double current, double mn, double mx, bool limit_precision_3digits); ///<just as mutateCreepNoLimit(), but forces mutated value into the [\a mn,\a mx] range using the 'reflect' approach and assumes standard deviation to be a fraction of the mx-mn interval width. |
---|
[1254] | 198 | static void setIntFromDoubleWithProbabilisticDithering(ParamInterface &p, int index, double value); ///<sets a double value in an integer field; when a value is non-integer, applies stochastic rounding (random "dithering") so that both lower and higher integer value have some chance to be set. |
---|
[749] | 199 | static void linearMix(vector<double> &p1, vector<double> &p2, double proportion); ///<mixes two real-valued vectors; inherited proportion should be within [0,1]; 1.0 does not change values (all inherited), 0.5 causes both vectors to become their average, 0.0 swaps values (none inherited). |
---|
| 200 | static void linearMix(ParamInterface &p1, int i1, ParamInterface &p2, int i2, double proportion); ///<mixes i1'th and i2'th properties of p1 and p2; inherited proportion should be within [0,1]; 1.0 does not change values (all inherited), 0.5 causes both properties to become their average, 0.0 swaps values (none inherited). For integer properties applies random "dithering" when necessary. |
---|
[1233] | 201 | |
---|
[935] | 202 | static int getActiveNeuroClassCount(Model::ShapeType for_shape_type); ///<returns active class count |
---|
| 203 | static NeuroClass* getRandomNeuroClass(Model::ShapeType for_shape_type); ///<returns random neuroclass or NULL when no active classes. |
---|
| 204 | static NeuroClass* getRandomNeuroClassWithOutput(Model::ShapeType for_shape_type); ///<returns random neuroclass with output or NULL when no active classes. |
---|
| 205 | static NeuroClass* getRandomNeuroClassWithInput(Model::ShapeType for_shape_type); ///<returns random neuroclass with input or NULL when no active classes. |
---|
[1226] | 206 | static NeuroClass* getRandomNeuroClassWithOutputAndWantingNoInputs(Model::ShapeType for_shape_type); ///<returns random sensor or NULL when no active classes. Note: only neuroclasses that prefer 0 inputs are considered, not those that prefer any number of inputs (thus including 0) - see getRandomNeuroClassWithOutputAndWantingNoOrAnyInputs(). |
---|
| 207 | static NeuroClass* getRandomNeuroClassWithOutputAndWantingNoOrAnyInputs(Model::ShapeType for_shape_type); ///<returns random neuron or NULL when no active classes. Note: both neuroclasses that prefer 0 inputs and those that prefer any number of inputs (thus including 0) are considered. |
---|
[1233] | 208 | static int getRandomNeuroClassWithOutput(const vector<NeuroClass*>& NClist); ///<returns index of random NeuroClass from the NClist or -1 when no neurons in the list provide output \a NClist list of available neuron classes |
---|
| 209 | static int getRandomNeuroClassWithInput(const vector<NeuroClass*>& NClist); ///<returns index of random NeuroClass from the NClist or -1 when no neurons in the list want input(s) \a NClist list of available neuron classes |
---|
| 210 | static NeuroClass* parseNeuroClass(char *&s, ModelEnum::ShapeType for_shape_type); ///<returns the longest matching neuroclass that supports for_shape_type (ModelEnum::SHAPETYPE_BALL_AND_STICK or ModelEnum::SHAPETYPE_SOLIDS) or NULL if the string does not begin with an appropriate neuroclass name. Advances the \a s pointer if the neuroclass is found. |
---|
| 211 | static Neuro* findNeuro(const Model *m, const NeuroClass *nc); ///<returns pointer to first Neuro of class \a nc, or NULL if there is no such Neuro. |
---|
| 212 | static int neuroClassProp(char *&s, NeuroClass *nc, bool also_v1_N_props = false); ///<returns 0-based extraproperty of NeuroClass or NEUROCLASS_PROP_OFFSET-based standard property of NeuroClass, or -1 if the string does not begin with a valid property name. Advance the \a s pointer if success. |
---|
| 213 | static bool canStartNeuroClassName(const char firstchar); ///<determines if \a firstchar may start NeuroClass name. If not, it might start NeuroClass' (or Neuro's) property name. |
---|
| 214 | |
---|
| 215 | static bool isWS(const char c); ///<is \a c a whitespace char? |
---|
| 216 | static void skipWS(char *&s); ///<advances pointer \a s skipping whitespaces. |
---|
[675] | 217 | static bool areAlike(char*, char*); ///<compares two text strings skipping whitespaces. Returns 1 when equal, 0 when different. |
---|
[1313] | 218 | static char* strchr_no0(const char *str, char ch); ///<like strchr, but does not find ascii=0 char in \a str. |
---|
[1023] | 219 | |
---|
[1313] | 220 | static double probOfModifier(const char* mod_def); //returns a probability of a modifier: either 1.0 (default) or parsed value if the probability is given in the appended parentheses (...). For example, "G(0.3)" will return 0.3, and "G" will return 1.0. |
---|
| 221 | static char getRandomModifier(const char *choices); ///<returns a random character from \a choices (note that the special syntax with probabilities in parentheses is supported), or 0 when \a choices is empty or probabilities were insufficient for a random chance to choose some character. |
---|
| 222 | static char getRandomColorModifier(const char *choices, const char *color_modifiers); //finds all color_modifiers in choices and returns a color modifier drawn randomly proportionally to the optional probabilities defined in choices. Returns 0 when \a choices does not have any color modifier with a positive probability. |
---|
[1243] | 223 | static string simplifiedModifiers_rR(const string& str); ///<finds all 'r' and 'R' in \a str and returns the shortest sequence of 'r' and 'R that is equivalent to all these found in \a str. |
---|
| 224 | static string simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts); ///<returns a sequence of chars from \a str_of_char_pairs based on how many times each char occurred in \a char_counts. Assume that an even-index char and the following odd-index char have the opposite influence, so they cancel out. We don't use this function, because a fixed order imposed by this function means that the number of different parameter values produced by a sequence of modifiers is lowered (N same-letter upper- and lower-case chars yield only 2*N different values). Due to how modifiers work, the effect of aaA, aAa, Aaa etc. is different (N same-letter upper- and lower-case chars yield 2^N different values), so simplifying modifiers should not impose any order, should not interfere with their original order, and should not cancel out antagonistic modifiers - see \a simplifiedModifiers() and geneprops_test.cpp. |
---|
[675] | 225 | //@} |
---|
[1313] | 226 | static string simplifiedModifiers(const string &original, const char* colorgenes); ///<from the \a original sequence removes modifiers that are too numerous (exceeding a defined threshold number), starting the removal from the least-significant, leftmost (="oldest" when interpreting the sequence from left to right) ones. Contrary to \a simplifiedModifiersFixedOrder(), this kind of simplification preserves 2^N different sequences for each upper/lower-case modifier and thus 2^N different values of a given property (see geneprops.cpp), but the values resulting from these sequences constitute a landscape not as easy for optimization as in the case of 2*N, where the effect of each mutation could be independent and additive (no epistasis). So for a given sequence length, the 2^N case allows for a higher resolution at the cost of a more rugged fitness landscape than the 2*N case. |
---|
[109] | 227 | }; |
---|
| 228 | |
---|
| 229 | #endif |
---|