1 | // This file is a part of Framsticks SDK. http://www.framsticks.com/ |
---|
2 | // Copyright (C) 1999-2023 Maciej Komosinski and Szymon Ulatowski. |
---|
3 | // See LICENSE.txt for details. |
---|
4 | |
---|
5 | #include <ctype.h> //isupper() |
---|
6 | #include "genooperators.h" |
---|
7 | #include <common/log.h> |
---|
8 | #include <common/nonstd_math.h> |
---|
9 | #include <frams/util/rndutil.h> |
---|
10 | #include <algorithm> // std::min, std::max |
---|
11 | |
---|
12 | // |
---|
13 | // custom distributions for mutations of various parameters |
---|
14 | // |
---|
15 | static double distrib_force[] = // for '!' |
---|
16 | { |
---|
17 | 3, // distribution 0 -__/ +1 |
---|
18 | 0.001, 0.2, // "slow" neurons |
---|
19 | 0.001, 1, |
---|
20 | 1, 1, // "fast" neurons |
---|
21 | }; |
---|
22 | static double distrib_inertia[] = // for '=' |
---|
23 | { |
---|
24 | 2, // distribution 0 |..- +1 |
---|
25 | 0, 0, // "fast" neurons |
---|
26 | 0.7, 0.98, |
---|
27 | }; |
---|
28 | static double distrib_sigmo[] = // for '/' |
---|
29 | { |
---|
30 | 5, // distribution -999 -..-^-..- +999 |
---|
31 | -999, -999, //"perceptron" |
---|
32 | 999, 999, |
---|
33 | -5, -1, // nonlinear |
---|
34 | 1, 5, |
---|
35 | -1, 1, // ~linear |
---|
36 | }; |
---|
37 | /* |
---|
38 | static double distrib_weight[] = |
---|
39 | { |
---|
40 | 5, // distribution -999 _-^_^-_ +999 |
---|
41 | -999, 999, // each weight value may be useful, especially... |
---|
42 | -5, -0.3, // ...little non-zero values |
---|
43 | -3, -0.6, |
---|
44 | 0.6, 3, |
---|
45 | 0.3, 5, |
---|
46 | }; |
---|
47 | */ |
---|
48 | |
---|
49 | int GenoOperators::roulette(const double *probtab, const int count) |
---|
50 | { |
---|
51 | double sum = 0; |
---|
52 | int i; |
---|
53 | for (i = 0; i < count; i++) sum += probtab[i]; |
---|
54 | double sel = rndDouble(sum); |
---|
55 | for (sum = 0, i = 0; i < count; i++) { sum += probtab[i]; if (sel < sum) return i; } |
---|
56 | return -1; |
---|
57 | } |
---|
58 | |
---|
59 | bool GenoOperators::getMinMaxDef(ParamInterface *p, int i, double &mn, double &mx, double &def) |
---|
60 | { |
---|
61 | mn = mx = def = 0; |
---|
62 | int defined = 0; |
---|
63 | if (p->type(i)[0] == 'f') |
---|
64 | { |
---|
65 | double _mn = 0, _mx = 1, _def = 0.5; |
---|
66 | defined = p->getMinMaxDouble(i, _mn, _mx, _def); |
---|
67 | if (defined == 1) _mx = _mn + 1000.0; //only min was defined, so let's set some arbitrary range, just to have some freedom. Assumes _mn is not close to maxdouble... |
---|
68 | if (_mx < _mn && defined == 3) //only default was defined, so let's assume some arbitrary range. Again, no check for min/maxdouble... |
---|
69 | { |
---|
70 | _mn = _def - 500.0; |
---|
71 | _mx = _def + 500.0; |
---|
72 | } |
---|
73 | if (defined < 3) _def = (_mn + _mx) / 2.0; |
---|
74 | mn = _mn; mx = _mx; def = _def; |
---|
75 | } |
---|
76 | if (p->type(i)[0] == 'd') |
---|
77 | { |
---|
78 | paInt _mn = 0, _mx = 1, _def = 0; |
---|
79 | defined = p->getMinMaxInt(i, _mn, _mx, _def); |
---|
80 | if (defined == 1) _mx = _mn + 1000; //only min was defined, so let's set some arbitrary range, just to have some freedom. Assumes _mn is not close to maxint... |
---|
81 | if (_mx < _mn && defined == 3) //only default was defined, so let's assume some arbitrary range. Again, no check for min/maxint... |
---|
82 | { |
---|
83 | _mn = _def - 500; |
---|
84 | _mx = _def + 500; |
---|
85 | } |
---|
86 | if (defined < 3) _def = (_mn + _mx) / 2; |
---|
87 | mn = _mn; mx = _mx; def = _def; |
---|
88 | } |
---|
89 | return defined == 3; |
---|
90 | } |
---|
91 | |
---|
92 | bool GenoOperators::mutateRandomNeuroClassProperty(Neuro* n) |
---|
93 | { |
---|
94 | bool mutated = false; |
---|
95 | int prop = selectRandomNeuroClassProperty(n); |
---|
96 | if (prop >= 0) |
---|
97 | { |
---|
98 | if (prop >= GenoOperators::NEUROCLASS_PROP_OFFSET) |
---|
99 | { |
---|
100 | SyntParam par = n->classProperties(); //commits changes when this object is destroyed |
---|
101 | mutated = mutateProperty(par, prop - GenoOperators::NEUROCLASS_PROP_OFFSET); |
---|
102 | } |
---|
103 | else |
---|
104 | { |
---|
105 | Param par = n->extraProperties(); |
---|
106 | mutated = mutateProperty(par, prop); |
---|
107 | } |
---|
108 | } |
---|
109 | return mutated; |
---|
110 | } |
---|
111 | |
---|
112 | int GenoOperators::selectRandomNeuroClassProperty(Neuro *n) |
---|
113 | { |
---|
114 | int neuext = n->extraProperties().getPropCount(), |
---|
115 | neucls = n->getClass() == NULL ? 0 : n->getClass()->getProperties().getPropCount(); |
---|
116 | if (neuext + neucls == 0) return -1; //no properties in this neuron |
---|
117 | int index = rndUint(neuext + neucls); |
---|
118 | if (index >= neuext) index = index - neuext + NEUROCLASS_PROP_OFFSET; |
---|
119 | return index; |
---|
120 | } |
---|
121 | |
---|
122 | double GenoOperators::getMutatedNeuroClassProperty(double current, Neuro *n, int i) |
---|
123 | { |
---|
124 | if (i == -1) |
---|
125 | { |
---|
126 | logPrintf("GenoOperators", "getMutatedNeuroClassProperty", LOG_WARN, "Deprecated usage in C++ source: to mutate connection weight, use getMutatedNeuronConnectionWeight()."); |
---|
127 | return getMutatedNeuronConnectionWeight(current); |
---|
128 | } |
---|
129 | Param p; |
---|
130 | if (i >= NEUROCLASS_PROP_OFFSET) { i -= NEUROCLASS_PROP_OFFSET; p = n->getClass()->getProperties(); } |
---|
131 | else p = n->extraProperties(); |
---|
132 | double newval = current; |
---|
133 | /*bool ok=*/getMutatedProperty(p, i, current, newval); |
---|
134 | return newval; |
---|
135 | } |
---|
136 | |
---|
137 | double GenoOperators::getMutatedNeuronConnectionWeight(double current) |
---|
138 | { |
---|
139 | return mutateCreepNoLimit('f', current, 2, true); |
---|
140 | } |
---|
141 | |
---|
142 | bool GenoOperators::mutatePropertyNaive(ParamInterface &p, int i) |
---|
143 | { |
---|
144 | double mn, mx, df; |
---|
145 | if (p.type(i)[0] != 'f' && p.type(i)[0] != 'd') return false; //don't know how to mutate |
---|
146 | getMinMaxDef(&p, i, mn, mx, df); |
---|
147 | |
---|
148 | ExtValue ev; |
---|
149 | p.get(i, ev); |
---|
150 | ev.setDouble(mutateCreep(p.type(i)[0], ev.getDouble(), mn, mx, true)); |
---|
151 | p.set(i, ev); |
---|
152 | return true; |
---|
153 | } |
---|
154 | |
---|
155 | bool GenoOperators::mutateProperty(ParamInterface &p, int i) |
---|
156 | { |
---|
157 | double newval; |
---|
158 | ExtValue ev; |
---|
159 | p.get(i, ev); |
---|
160 | bool ok = getMutatedProperty(p, i, ev.getDouble(), newval); |
---|
161 | if (ok) { ev.setDouble(newval); p.set(i, ev); } |
---|
162 | return ok; |
---|
163 | } |
---|
164 | |
---|
165 | bool GenoOperators::getMutatedProperty(ParamInterface &p, int i, double oldval, double &newval) |
---|
166 | { |
---|
167 | newval = 0; |
---|
168 | if (p.type(i)[0] != 'f' && p.type(i)[0] != 'd') return false; //don't know how to mutate |
---|
169 | const char *n = p.id(i), *na = p.name(i); |
---|
170 | if (strcmp(n, "si") == 0 && strcmp(na, "Sigmoid") == 0) newval = round(CustomRnd(distrib_sigmo), 3); else |
---|
171 | if (strcmp(n, "in") == 0 && strcmp(na, "Inertia") == 0) newval = round(CustomRnd(distrib_inertia), 3); else |
---|
172 | if (strcmp(n, "fo") == 0 && strcmp(na, "Force") == 0) newval = round(CustomRnd(distrib_force), 3); else |
---|
173 | { |
---|
174 | double mn, mx, df; |
---|
175 | getMinMaxDef(&p, i, mn, mx, df); |
---|
176 | newval = mutateCreep(p.type(i)[0], oldval, mn, mx, true); |
---|
177 | } |
---|
178 | return true; |
---|
179 | } |
---|
180 | |
---|
181 | double GenoOperators::mutateCreepNoLimit(char type, double current, double stddev, bool limit_precision_3digits) |
---|
182 | { |
---|
183 | double result = RndGen.Gauss(current, stddev); |
---|
184 | if (type == 'd') |
---|
185 | { |
---|
186 | result = int(result + 0.5); |
---|
187 | if (result == current) result += rndUint(2) * 2 - 1; //force some change |
---|
188 | } |
---|
189 | else |
---|
190 | { |
---|
191 | if (limit_precision_3digits) |
---|
192 | result = round(result, 3); |
---|
193 | } |
---|
194 | return result; |
---|
195 | } |
---|
196 | |
---|
197 | double GenoOperators::mutateCreep(char type, double current, double mn, double mx, double stddev, bool limit_precision_3digits) |
---|
198 | { |
---|
199 | double result = mutateCreepNoLimit(type, current, stddev, limit_precision_3digits); |
---|
200 | if (result<mn || result>mx) //exceeds boundary, so bring to the allowed range |
---|
201 | { |
---|
202 | //reflect: |
---|
203 | if (result > mx) result = mx - (result - mx); else |
---|
204 | if (result < mn) result = mn + (mn - result); |
---|
205 | //wrap (just in case 'result' exceeded the allowed range so much that after reflection above it exceeded the other boundary): |
---|
206 | if (result > mx) result = mn + fmod(result - mx, mx - mn); else |
---|
207 | if (result < mn) result = mn + fmod(mn - result, mx - mn); |
---|
208 | if (limit_precision_3digits) |
---|
209 | { |
---|
210 | //reflect and wrap above may have changed the (limited) precision, so try to round again (maybe unnecessarily, because we don't know if reflect+wrap above were triggered) |
---|
211 | double result_try = round(result, 3); |
---|
212 | if (mn <= result_try && result_try <= mx) result = result_try; //after rounding still witin allowed range, so keep rounded value |
---|
213 | } |
---|
214 | } |
---|
215 | return result; |
---|
216 | } |
---|
217 | |
---|
218 | double GenoOperators::mutateCreep(char type, double current, double mn, double mx, bool limit_precision_3digits) |
---|
219 | { |
---|
220 | double stddev = (mx - mn) / 2 / 5; // magic arbitrary formula for stddev, which becomes /halfinterval, 5 times narrower |
---|
221 | return mutateCreep(type, current, mn, mx, stddev, limit_precision_3digits); |
---|
222 | } |
---|
223 | |
---|
224 | void GenoOperators::setIntFromDoubleWithProbabilisticDithering(ParamInterface &p, int index, double value) //TODO |
---|
225 | { |
---|
226 | p.setInt(index, (paInt)(value + 0.5)); //TODO value=2.499 will result in 2 and 2.5 will result in 3, but we want these cases to be 2 or 3 with almost equal probability. value=2.1 should be mostly 2, rarely 3. Careful with negative values (test it!) |
---|
227 | } |
---|
228 | |
---|
229 | void GenoOperators::linearMix(vector<double> &p1, vector<double> &p2, double proportion) |
---|
230 | { |
---|
231 | if (p1.size() != p2.size()) |
---|
232 | { |
---|
233 | logPrintf("GenoOperators", "linearMix", LOG_ERROR, "Cannot mix vectors of different length (%d and %d)", p1.size(), p2.size()); |
---|
234 | return; |
---|
235 | } |
---|
236 | for (unsigned int i = 0; i < p1.size(); i++) |
---|
237 | { |
---|
238 | double v1 = p1[i]; |
---|
239 | double v2 = p2[i]; |
---|
240 | p1[i] = v1 * proportion + v2 * (1 - proportion); |
---|
241 | p2[i] = v2 * proportion + v1 * (1 - proportion); |
---|
242 | } |
---|
243 | } |
---|
244 | |
---|
245 | void GenoOperators::linearMix(ParamInterface &p1, int i1, ParamInterface &p2, int i2, double proportion) |
---|
246 | { |
---|
247 | char type1 = p1.type(i1)[0]; |
---|
248 | char type2 = p2.type(i2)[0]; |
---|
249 | if (type1 == 'f' && type2 == 'f') |
---|
250 | { |
---|
251 | double v1 = p1.getDouble(i1); |
---|
252 | double v2 = p2.getDouble(i2); |
---|
253 | p1.setDouble(i1, v1 * proportion + v2 * (1 - proportion)); |
---|
254 | p2.setDouble(i2, v2 * proportion + v1 * (1 - proportion)); |
---|
255 | } |
---|
256 | else |
---|
257 | if (type1 == 'd' && type2 == 'd') |
---|
258 | { |
---|
259 | int v1 = p1.getInt(i1); |
---|
260 | int v2 = p2.getInt(i2); |
---|
261 | setIntFromDoubleWithProbabilisticDithering(p1, i1, v1 * proportion + v2 * (1 - proportion)); |
---|
262 | setIntFromDoubleWithProbabilisticDithering(p2, i2, v2 * proportion + v1 * (1 - proportion)); |
---|
263 | } |
---|
264 | else |
---|
265 | logPrintf("GenoOperators", "linearMix", LOG_WARN, "Cannot mix values of types '%c' and '%c'", type1, type2); |
---|
266 | } |
---|
267 | |
---|
268 | int GenoOperators::getActiveNeuroClassCount(Model::ShapeType for_shape_type) |
---|
269 | { |
---|
270 | int count = 0; |
---|
271 | for (int i = 0; i < Neuro::getClassCount(); i++) |
---|
272 | { |
---|
273 | NeuroClass *nc = Neuro::getClass(i); |
---|
274 | if (nc->isShapeTypeSupported(for_shape_type) && nc->genactive) |
---|
275 | count++; |
---|
276 | } |
---|
277 | return count; |
---|
278 | } |
---|
279 | |
---|
280 | NeuroClass *GenoOperators::getRandomNeuroClass(Model::ShapeType for_shape_type) |
---|
281 | { |
---|
282 | vector<NeuroClass *> active; |
---|
283 | for (int i = 0; i < Neuro::getClassCount(); i++) |
---|
284 | { |
---|
285 | NeuroClass *nc = Neuro::getClass(i); |
---|
286 | if (nc->isShapeTypeSupported(for_shape_type) && nc->genactive) |
---|
287 | active.push_back(nc); |
---|
288 | } |
---|
289 | if (active.size() == 0) return NULL; else return active[rndUint(active.size())]; |
---|
290 | } |
---|
291 | |
---|
292 | NeuroClass *GenoOperators::getRandomNeuroClassWithOutput(Model::ShapeType for_shape_type) |
---|
293 | { |
---|
294 | vector<NeuroClass *> active; |
---|
295 | for (int i = 0; i < Neuro::getClassCount(); i++) |
---|
296 | { |
---|
297 | NeuroClass *nc = Neuro::getClass(i); |
---|
298 | if (nc->isShapeTypeSupported(for_shape_type) && nc->genactive && nc->getPreferredOutput() != 0) |
---|
299 | active.push_back(nc); |
---|
300 | } |
---|
301 | if (active.size() == 0) return NULL; else return active[rndUint(active.size())]; |
---|
302 | } |
---|
303 | |
---|
304 | NeuroClass *GenoOperators::getRandomNeuroClassWithInput(Model::ShapeType for_shape_type) |
---|
305 | { |
---|
306 | vector<NeuroClass *> active; |
---|
307 | for (int i = 0; i < Neuro::getClassCount(); i++) |
---|
308 | { |
---|
309 | NeuroClass *nc = Neuro::getClass(i); |
---|
310 | if (nc->isShapeTypeSupported(for_shape_type) && nc->genactive && nc->getPreferredInputs() != 0) |
---|
311 | active.push_back(nc); |
---|
312 | } |
---|
313 | if (active.size() == 0) return NULL; else return active[rndUint(active.size())]; |
---|
314 | } |
---|
315 | |
---|
316 | NeuroClass *GenoOperators::getRandomNeuroClassWithOutputAndWantingNoInputs(Model::ShapeType for_shape_type) |
---|
317 | { |
---|
318 | vector<NeuroClass *> active; |
---|
319 | for (int i = 0; i < Neuro::getClassCount(); i++) |
---|
320 | { |
---|
321 | NeuroClass *nc = Neuro::getClass(i); |
---|
322 | if (nc->isShapeTypeSupported(for_shape_type) && nc->genactive && nc->getPreferredOutput() != 0 && nc->getPreferredInputs() == 0) |
---|
323 | active.push_back(nc); |
---|
324 | } |
---|
325 | if (active.size() == 0) return NULL; else return active[rndUint(active.size())]; |
---|
326 | } |
---|
327 | |
---|
328 | NeuroClass *GenoOperators::getRandomNeuroClassWithOutputAndWantingNoOrAnyInputs(Model::ShapeType for_shape_type) |
---|
329 | { |
---|
330 | vector<NeuroClass *> active; |
---|
331 | for (int i = 0; i < Neuro::getClassCount(); i++) |
---|
332 | { |
---|
333 | NeuroClass *nc = Neuro::getClass(i); |
---|
334 | if (nc->isShapeTypeSupported(for_shape_type) && nc->genactive && nc->getPreferredOutput() != 0 && nc->getPreferredInputs() <= 0) // getPreferredInputs() should be 0 or -1 (any) |
---|
335 | active.push_back(nc); |
---|
336 | } |
---|
337 | if (active.size() == 0) return NULL; else return active[rndUint(active.size())]; |
---|
338 | } |
---|
339 | |
---|
340 | int GenoOperators::getRandomNeuroClassWithOutput(const vector<NeuroClass *> &NClist) |
---|
341 | { |
---|
342 | vector<int> allowed; |
---|
343 | for (size_t i = 0; i < NClist.size(); i++) |
---|
344 | if (NClist[i]->getPreferredOutput() != 0) //this NeuroClass provides output |
---|
345 | allowed.push_back(i); |
---|
346 | if (allowed.size() == 0) return -1; else return allowed[rndUint(allowed.size())]; |
---|
347 | } |
---|
348 | |
---|
349 | int GenoOperators::getRandomNeuroClassWithInput(const vector<NeuroClass *> &NClist) |
---|
350 | { |
---|
351 | vector<int> allowed; |
---|
352 | for (size_t i = 0; i < NClist.size(); i++) |
---|
353 | if (NClist[i]->getPreferredInputs() != 0) //this NeuroClass wants one input connection or more |
---|
354 | allowed.push_back(i); |
---|
355 | if (allowed.size() == 0) return -1; else return allowed[rndUint(allowed.size())]; |
---|
356 | } |
---|
357 | |
---|
358 | NeuroClass *GenoOperators::parseNeuroClass(char *&s, ModelEnum::ShapeType for_shape_type) |
---|
359 | { |
---|
360 | int maxlen = (int)strlen(s); |
---|
361 | int NClen = 0; |
---|
362 | NeuroClass *NC = NULL; |
---|
363 | for (int i = 0; i < Neuro::getClassCount(); i++) |
---|
364 | { |
---|
365 | NeuroClass *nci = Neuro::getClass(i); |
---|
366 | if (!nci->isShapeTypeSupported(for_shape_type)) |
---|
367 | continue; |
---|
368 | const char *nciname = nci->name.c_str(); |
---|
369 | int ncinamelen = (int)strlen(nciname); |
---|
370 | if (maxlen >= ncinamelen && ncinamelen > NClen && (strncmp(s, nciname, ncinamelen) == 0)) |
---|
371 | { |
---|
372 | NC = nci; |
---|
373 | NClen = ncinamelen; |
---|
374 | } |
---|
375 | } |
---|
376 | s += NClen; |
---|
377 | return NC; |
---|
378 | } |
---|
379 | |
---|
380 | Neuro *GenoOperators::findNeuro(const Model *m, const NeuroClass *nc) |
---|
381 | { |
---|
382 | if (!m) return NULL; |
---|
383 | for (int i = 0; i < m->getNeuroCount(); i++) |
---|
384 | if (m->getNeuro(i)->getClass() == nc) return m->getNeuro(i); |
---|
385 | return NULL; //neuron of class 'nc' was not found |
---|
386 | } |
---|
387 | |
---|
388 | int GenoOperators::neuroClassProp(char *&s, NeuroClass *nc, bool also_v1_N_props) |
---|
389 | { |
---|
390 | int len = (int)strlen(s); |
---|
391 | int Len = 0, I = -1; |
---|
392 | if (nc) |
---|
393 | { |
---|
394 | Param p = nc->getProperties(); |
---|
395 | for (int i = 0; i < p.getPropCount(); i++) |
---|
396 | { |
---|
397 | const char *n = p.id(i); |
---|
398 | int l = (int)strlen(n); |
---|
399 | if (len >= l && l > Len && (strncmp(s, n, l) == 0)) { I = NEUROCLASS_PROP_OFFSET + i; Len = l; } |
---|
400 | if (also_v1_N_props) //recognize old symbols of properties: /=! |
---|
401 | { |
---|
402 | if (strcmp(n, "si") == 0) n = "/"; else |
---|
403 | if (strcmp(n, "in") == 0) n = "="; else |
---|
404 | if (strcmp(n, "fo") == 0) n = "!"; |
---|
405 | l = (int)strlen(n); |
---|
406 | if (len >= l && l > Len && (strncmp(s, n, l) == 0)) { I = NEUROCLASS_PROP_OFFSET + i; Len = l; } |
---|
407 | } |
---|
408 | } |
---|
409 | } |
---|
410 | Neuro n; |
---|
411 | Param p = n.extraProperties(); |
---|
412 | for (int i = 0; i < p.getPropCount(); i++) |
---|
413 | { |
---|
414 | const char *n = p.id(i); |
---|
415 | int l = (int)strlen(n); |
---|
416 | if (len >= l && l > Len && (strncmp(s, n, l) == 0)) { I = i; Len = l; } |
---|
417 | } |
---|
418 | s += Len; |
---|
419 | return I; |
---|
420 | } |
---|
421 | |
---|
422 | bool GenoOperators::canStartNeuroClassName(const char firstchar) |
---|
423 | { |
---|
424 | return isupper(firstchar) || firstchar == '|' || firstchar == '@' || firstchar == '*'; |
---|
425 | } |
---|
426 | |
---|
427 | bool GenoOperators::isWS(const char c) |
---|
428 | { |
---|
429 | return c == ' ' || c == '\n' || c == '\t' || c == '\r'; |
---|
430 | } |
---|
431 | |
---|
432 | void GenoOperators::skipWS(char *&s) |
---|
433 | { |
---|
434 | if (s == NULL) |
---|
435 | logMessage("GenoOperators", "skipWS", LOG_WARN, "NULL reference!"); |
---|
436 | else |
---|
437 | while (isWS(*s)) s++; |
---|
438 | } |
---|
439 | |
---|
440 | bool GenoOperators::areAlike(char *g1, char *g2) |
---|
441 | { |
---|
442 | while (*g1 || *g2) |
---|
443 | { |
---|
444 | skipWS(g1); |
---|
445 | skipWS(g2); |
---|
446 | if (*g1 != *g2) return false; //when difference |
---|
447 | if (!*g1 && !*g2) break; //both end |
---|
448 | g1++; |
---|
449 | g2++; |
---|
450 | } |
---|
451 | return true; //equal |
---|
452 | } |
---|
453 | |
---|
454 | char *GenoOperators::strchrn0(const char *str, char ch) |
---|
455 | { |
---|
456 | return ch == 0 ? NULL : strchr((char *)str, ch); |
---|
457 | } |
---|
458 | |
---|
459 | int GenoOperators::getRandomChar(const char *choices, const char *excluded) |
---|
460 | { |
---|
461 | int allowed_count = 0; |
---|
462 | for (size_t i = 0; i < strlen(choices); i++) if (!strchrn0(excluded, choices[i])) allowed_count++; |
---|
463 | if (allowed_count == 0) return -1; //no char is allowed |
---|
464 | int rnd_index = rndUint(allowed_count) + 1; |
---|
465 | allowed_count = 0; |
---|
466 | for (size_t i = 0; i < strlen(choices); i++) |
---|
467 | { |
---|
468 | if (!strchrn0(excluded, choices[i])) allowed_count++; |
---|
469 | if (allowed_count == rnd_index) return int(i); |
---|
470 | } |
---|
471 | return -1; //never happens |
---|
472 | } |
---|
473 | |
---|
474 | string GenoOperators::simplifiedModifiers_rR(const string& str) |
---|
475 | { |
---|
476 | int R = 0; //positive means more 'R', negative means more 'r' |
---|
477 | for (char c : str) |
---|
478 | { |
---|
479 | if (c == 'R') R++; else |
---|
480 | if (c == 'r') R--; |
---|
481 | } |
---|
482 | R %= 8; // 8 * 45 degrees = 360 degrees. After this, we get R=-7..+7 |
---|
483 | |
---|
484 | /* now, simplify homogeneous sequences of rR longer than 4: for example, rrrrr == RRR and RRRRRR == rr |
---|
485 | -7 1 |
---|
486 | -6 2 |
---|
487 | -5 3 |
---|
488 | -4 -4 (or 4; we choose +4 meaning we will never see rrrr) |
---|
489 | -3..3 (no changes) |
---|
490 | 4 4 (or -4) |
---|
491 | 5 -3 |
---|
492 | 6 -2 |
---|
493 | 7 -1 |
---|
494 | */ |
---|
495 | if (R <= -4) R += 8; //-4 => +4 |
---|
496 | else if (R >= 5) R -= 8; |
---|
497 | |
---|
498 | return R == 0 ? "" : (R > 0 ? string(R, 'R') : string(-R, 'r')); |
---|
499 | } |
---|
500 | |
---|
501 | //#include <cassert> |
---|
502 | string GenoOperators::simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts) |
---|
503 | { |
---|
504 | // assert(strlen(str_of_char_pairs) == char_counts.size()); |
---|
505 | // assert(char_counts.size() % 2 == 0); |
---|
506 | const int MAX_NUMBER_SAME_TYPE = 8; // max. number of modifiers of each type (case-sensitive) - mainly for rR, even though for rR, 4 would be sufficient if we assume lower or upper can be chosen as required for minimal length just as simplifiedModifiers_rR() does, e.g. rrrrr==RRR, RRRRRR==rr |
---|
507 | string simplified; |
---|
508 | //#define CLUMP_IDENTICAL_MODIFIERS //if GeneProps::normalizeBiol4() is used, this is not good because properties are calculated incrementally, non-linearly, their values are updated after each modifier character and some properties interact with each other due to normalization so they can saturate when clumped, therefore it is better keep the modifiers dispersed to equalize their effects |
---|
509 | #ifdef CLUMP_IDENTICAL_MODIFIERS |
---|
510 | for (size_t i = 0; i < strlen(str_of_char_pairs); i++) |
---|
511 | if ((i % 2) == 0) //only even index "i" in str_of_char_pairs |
---|
512 | for (int j = 0; j < std::min(MAX_NUMBER_SAME_TYPE, abs(char_counts[i] - char_counts[i + 1])); j++) //assume that an even-index char and the following odd-index char have the opposite influence, so they cancel out. |
---|
513 | simplified += str_of_char_pairs[i + (char_counts[i + 1] > char_counts[i])]; //inner loop adds a sequence of same chars such as rrrrr or QQQ |
---|
514 | #else |
---|
515 | for (size_t i = 0; i < strlen(str_of_char_pairs); i++) |
---|
516 | if ((i % 2) == 0) //only even index "i" in str_of_char_pairs |
---|
517 | { |
---|
518 | char_counts[i] -= char_counts[i + 1]; //from now on, even items in the vector store the difference between antagonistic modifier symbols; odd items are not needed |
---|
519 | char_counts[i] = std::min(std::max(char_counts[i], -MAX_NUMBER_SAME_TYPE), MAX_NUMBER_SAME_TYPE); |
---|
520 | } |
---|
521 | int remaining; |
---|
522 | do { |
---|
523 | remaining = 0; |
---|
524 | for (size_t i = 0; i < strlen(str_of_char_pairs); i++) |
---|
525 | if ((i % 2) == 0) //only even index "i" in str_of_char_pairs |
---|
526 | if (char_counts[i] != 0) |
---|
527 | { |
---|
528 | simplified += str_of_char_pairs[i + (char_counts[i] < 0)]; |
---|
529 | char_counts[i] += char_counts[i] > 0 ? -1 : +1; //decrease the difference towards zero |
---|
530 | remaining += abs(char_counts[i]); |
---|
531 | } |
---|
532 | } while (remaining > 0); |
---|
533 | #endif |
---|
534 | return simplified; |
---|
535 | } |
---|
536 | |
---|
537 | string GenoOperators::simplifiedModifiers(const string & original) |
---|
538 | { |
---|
539 | const int MAX_NUMBER_SAME_TYPE = 6; // max. number of modifiers of each type (case-insensitive). rR is treated separately in simplification because their influence follows different (i.e., simple additive) logic - so the simplifiedModifiersFixedOrder() logic with cancelling out antagonistic modifiers is appropriate for rR. |
---|
540 | int counter[256] = {}; //initialize with zeros; 256 is unnecessarily too big and redundant, but enables very fast access (indexed directly by the ascii code) |
---|
541 | string simplified = ""; |
---|
542 | for (int i = original.size() - 1; i >= 0; i--) //iterate from end to begin so it is easier to remove "oldest" = first modifiers |
---|
543 | { |
---|
544 | unsigned char c = original[i]; |
---|
545 | if (!std::isalpha(c) || c == 'r' || c == 'R') //ignore non-alphabet characters; also, 'r' and 'R' are handled separately by simplifiedModifiers_rR() |
---|
546 | continue; |
---|
547 | unsigned char lower = std::tolower(c); |
---|
548 | counter[lower]++; |
---|
549 | if (counter[lower] <= MAX_NUMBER_SAME_TYPE) //get rid of modifiers that are too numerous, but get rid of the first ones in the string (="oldest", the last ones looking from the end), because their influence on the parameter value is the smallest |
---|
550 | simplified += c; |
---|
551 | } |
---|
552 | std::reverse(simplified.begin(), simplified.end()); //"simplified" was built in reverse order, so need to restore the order that corresponds to "original" |
---|
553 | return simplifiedModifiers_rR(original) + simplified; |
---|
554 | } |
---|