- Timestamp:
- 05/04/23 01:45:37 (20 months ago)
- Location:
- cpp/frams/genetics/f4
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
cpp/frams/genetics/f4/f4_general.cpp
r1232 r1234 7 7 8 8 #include "f4_general.h" 9 #include "../genooperators.h" // for GENOPER_ constants9 #include "../genooperators.h" // for GENOPER_ constants 10 10 #include <common/nonstd_stl.h> 11 11 #include <common/log.h> … … 306 306 break; 307 307 } 308 case 'r': case 'R': 308 case 'r': 309 case 'R': 309 310 { 310 311 // error: if neuron … … 340 341 // error: if neuron 341 342 if (type == CELL_NEURON) //some neurons have the same single-letter names as modifiers (for example G,S,D), but they are supposed to have is_neuroclass==true so they should indeed not be handled here 342 {//however, what we see here is actually modifiers such as IdqEbWL (so not valid neuroclasses) that occurred within an already differentiated cell type==CELL_NEURON.343 {//however, what we see here is actually modifiers such as IdqEbWL (so not valid neuroclasses) that occurred within an already differentiated cell of type==CELL_NEURON. 343 344 //printf("Handled as a modifier, but type==CELL_NEURON: '%c'\n", name); 344 345 // fix: delete it … … 691 692 int f4_Cells::simulate() 692 693 { 693 const expr bool print_debugging = false; //print the state of cells during development694 const bool PRINT_CELLS_DEVELOPMENT = false; //print the state of cells 694 695 errorcode = GENOPER_OK; 695 696 696 697 for (int i = 0; i < cell_count; i++) C[i]->active = true; 697 698 698 if ( print_debugging) f4_Node::print_tree(C[0]->genot, 0);699 if ( print_debugging) print_cells("Initialization");699 if (PRINT_CELLS_DEVELOPMENT) f4_Node::print_tree(C[0]->genot, 0); 700 if (PRINT_CELLS_DEVELOPMENT) print_cells("Initialization"); 700 701 701 702 // execute oneStep() in a cycle 702 while (oneStep()) if ( print_debugging) print_cells("Development step");703 if ( print_debugging) print_cells("After last development step");703 while (oneStep()) if (PRINT_CELLS_DEVELOPMENT) print_cells("Development step"); 704 if (PRINT_CELLS_DEVELOPMENT) print_cells("After last development step"); 704 705 705 706 #ifdef EXTRA_STEP_CELL_DEVELOPMENT 706 707 if (errorcode == GENOPER_OK) 707 708 { 708 oneStep(); if ( print_debugging) print_cells("After extra step"); //for these "halted" (yielding) cells (they have active==false) that wait for other cells to develop. Without this step, the last, recently halted one(s) may miss the "retrying" step if all active==true cells became active==false in the last step.709 oneStep(); if (PRINT_CELLS_DEVELOPMENT) print_cells("After extra step"); //for these "halted" (yielding) cells (they have active==false) that wait for other cells to develop. Without this step, the last, recently halted one(s) may miss the "retrying" step if all active==true cells became active==false in the last step. 709 710 } 710 711 #endif … … 745 746 //DB( printf("Cell simulation done, %d cells. \n", nc); ) 746 747 747 if ( print_debugging) print_cells("Final");748 if (PRINT_CELLS_DEVELOPMENT) print_cells("Final"); 748 749 749 750 return errorcode; … … 1058 1059 { 1059 1060 for (int i = 0; i < indent; i++) printf(" "); 1060 printf("%s (%d)", root->name.c_str(), root->count());1061 printf("%s%s%s (%d)", root->neuclass != NULL ? "N:" : "", root->name.c_str(), root->name == "#" ? std::to_string(root->reps).c_str() : "", root->count() - 1); 1061 1062 if (root->name == "[") 1062 1063 printf(" from=%-3d weight=%g", root->conn_from, root->conn_weight); … … 1253 1254 } 1254 1255 1255 // scan genotype string and build tree1256 // scan genotype string and build a tree 1256 1257 // return >1 for error (errorpos) 1257 int f4_processRecur(const char* genot, int &pos_inout, f4_Node *parent) 1258 { 1258 int f4_processRecur(const char* genot, const int genot_len, int &pos_inout, f4_Node *parent) 1259 { 1260 static const char *all_modifiers_no_comma = F14_MODIFIERS; //I did experiments with added comma (see all_modifiers_for_simplify below) which had the advantage of commas not breaking sequences of modifiers, thus longer sequences of modifiers (including commas) could be simplified and genetic bloat was further reduced. But since we impose a limit on the number of modifier chars in GenoOperators::simplifiedModifiers(), it would also influence commas (e.g. no more than 8 commas per sequence), so in order to leave commas entirely unlimited let's exclude them from simplification. Note that currently 'X' or any other non-F14_MODIFIERS char also separates the sequence to be simplified, so if we wanted a really intensive simplification, it should occur during development, when we know precisely which genes influence each f4_Cell. 1261 //const char *Geno_f4::all_modifiers_for_simplify = F14_MODIFIERS ",\1"; //'\1' added to keep the number of chars even, avoid exceptions in logic and save the simple rule that the sequence is made of pairs (gene,contradictory gene), where a comma has no contradictory gene and \1 is unlikely to occur in the f4 genotype (and not allowed), so no risk it will cancel out a comma during simplification. 1262 1263 1259 1264 f4_Node *par = parent; 1260 1265 1261 if (pos_inout >= (int)strlen(genot)) 1262 return (int)strlen(genot) + 1; 1263 1264 while (pos_inout < (int)strlen(genot)) 1265 { 1266 //#define PRINT_PARSING_LOCATION 1267 #ifdef PRINT_PARSING_LOCATION 1268 printf("%s\n", genot); 1269 for (int i = 0; i < pos_inout; i++) printf(" "); 1270 printf("^\n"); 1271 #endif 1266 if (pos_inout >= genot_len) 1267 return genot_len + 1; 1268 1269 while (pos_inout < genot_len) 1270 { 1271 const bool PRINT_PARSING_LOCATION = false; 1272 if (PRINT_PARSING_LOCATION) 1273 { 1274 printf("%s\n", genot); 1275 for (int i = 0; i < pos_inout; i++) printf(" "); 1276 printf("^\n"); 1277 } 1272 1278 switch (genot[pos_inout]) 1273 1279 { … … 1277 1283 par = node; 1278 1284 pos_inout++; //move after '<' 1279 int res = f4_processRecur(genot, pos_inout, par);1285 int res = f4_processRecur(genot, genot_len, pos_inout, par); 1280 1286 if (res) return res; 1281 if (pos_inout < (int)strlen(genot))1282 { 1283 res = f4_processRecur(genot, pos_inout, par);1287 if (pos_inout < genot_len) 1288 { 1289 res = f4_processRecur(genot, genot_len, pos_inout, par); 1284 1290 if (res) return res; 1285 1291 } … … 1287 1293 { 1288 1294 //MacKo 2023-04, more strict behavior: instead of silent repair (no visible effect to the user, genotype stays invalid but is interpreted and reported as valid), we now point out where the error is. For example <X> or <X><X or <X><N:N> 1289 return (int)strlen(genot)+ 1;1295 return genot_len + 1; 1290 1296 //old silent repair: 1291 //node = new f4_Node(">", par, int(strlen(genot))- 1);1297 //node = new f4_Node(">", par, genot_len - 1); 1292 1298 } 1293 1299 return 0; // OK … … 1310 1316 // skip number 1311 1317 pos_inout += end - (genot + pos_inout); 1312 int res = f4_processRecur(genot, pos_inout, node);1318 int res = f4_processRecur(genot, genot_len, pos_inout, node); 1313 1319 if (res) return res; 1314 if (pos_inout < (int)strlen(genot))1315 { 1316 res = f4_processRecur(genot, pos_inout, node);1320 if (pos_inout < genot_len) 1321 { 1322 res = f4_processRecur(genot, genot_len, pos_inout, node); 1317 1323 if (res) return res; 1318 1324 } 1319 1325 else // ran out 1320 1326 { 1321 return (int)strlen(genot)+ 1; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences?1327 return genot_len + 1; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences? 1322 1328 //earlier apporach - silently treating this problem (we don't ever see where the error is because it gets corrected in some way here, while parsing the genotype, and error location in the genotype is never reported): 1323 //node = new f4_Node(">", par, int(strlen(genot))- 1); // check if needed and if this is really the best repair operation; seemed to happen too many times in succession for some genotypes even though they were only a result of f4 operators, not manually created... and the operators should not generate invalid genotypes, right? Or maybe crossover does? Seems like too many #N's for closing >'s; removing #N or adding > helped. Operators somehow don't do it properly sometimes? But F4_ADD_REP adds '>'... (TODO)1329 //node = new f4_Node(">", par, genot_len - 1); // check if needed and if this is really the best repair operation; seemed to happen too many times in succession for some genotypes even though they were only a result of f4 operators, not manually created... and the operators should not generate invalid genotypes, right? Or maybe crossover does? Seems like too many #N's for closing >'s; removing #N or adding > helped. Operators somehow don't do it properly sometimes? But F4_ADD_REP adds '>'... (TODO) 1324 1330 } 1325 1331 return 0; // OK … … 1391 1397 break; 1392 1398 } 1393 default: // 'X' and ',' and all modifiers and also invalid symbols - add a node , for invalid symbols buildwill give the error or repair1399 default: // 'X' and ',' and all modifiers and also invalid symbols - add a node. For symbols that are not valid in f4, the cell development process will give the error or repair 1394 1400 { 1395 1401 //printf("any regular character '%c'\n", genot[pos_inout]); 1396 //TODO here: read a continuous sequence of modifiers, sort and optimize ("collapse") it like in f1, then add to tree 1402 #define F4_SIMPLIFY_MODIFIERS //avoid long sequences like ...<X>llmlIilImmimiimmimifmfl<fifmmimilimmmiimiliffmfliIfififlliflimfliffififmiffmflllfflimlififfiiffifIr<r<... - another option, instead of simplifying while parsing here, would be mutations: when they add/modify/remove a modifier node, they could "clean" the tree by removing nodes when they encounter contradictory modifiers on the same subpath, and also limit the number of modifiers just as GenoOperators::simplifiedModifiers() does. 1403 #ifdef F4_SIMPLIFY_MODIFIERS 1404 char *ptr = (char*)(genot + pos_inout); 1405 1406 #ifdef __BORLANDC__ // "[bcc32c Error] cannot compile this non-trivial TLS destruction yet" (C++B 10.4u2) 1407 static 1408 #else 1409 thread_local 1410 #endif 1411 vector<int> modifs_counts(strlen(all_modifiers_no_comma)); ///<an array with a known constant size storing counters of each modifier symbol from all_modifiers_no_comma, created once to avoid reallocation every time when modifier genes are simplified during parsing. Initialization of required size; it will never be resized. 1412 std::fill(modifs_counts.begin(), modifs_counts.end(), 0); //zeroing only needed if we encountered a char from all_modifiers_no_comma and enter the 'while' loop below 1413 1414 while (char *m = GenoOperators::strchrn0(all_modifiers_no_comma, *ptr)) //only processes a section of chars known in all_modifiers_no_comma, other characters will exit the loop 1415 { 1416 modifs_counts[m - all_modifiers_no_comma]++; 1417 GenoOperators::skipWS(++ptr); //advance and ignore whitespace 1418 } 1419 int advanced = ptr - (genot + pos_inout); 1420 if (advanced > 0) //found modifiers 1421 { 1422 string simplified = GenoOperators::simplifiedModifiers(all_modifiers_no_comma, modifs_counts); 1423 // add a node for each char in "simplified" 1424 for (size_t i = 0; i < simplified.length(); i++) 1425 { 1426 int pos = GenoOperators::strchrn0(genot + pos_inout, simplified[i]) - genot; //unnecessarily finding the same char, if it occurrs multiple times in simplified 1427 f4_Node *node = new f4_Node(simplified[i], par, pos); //location is approximate. In the simplification process we don't trace where the origin(s) of the simplified[i] gene were. We provide 'pos' as the first occurrence of simplified[i] (for example, all 'L' will have the same location assigned, but at least this is where 'L' occurred in the genotype, so in case of any modification of a node (repair, removal, whatever... even mapping of genes) the indicated gene will be one of the responsible ones) 1428 par = node; 1429 } 1430 pos_inout += advanced; 1431 } 1432 else // genot[pos_inout] is a character not present in all_modifiers_no_comma, so treat it as a regular individual char just as it would be without simplification 1433 { 1434 f4_Node *node = new f4_Node(genot[pos_inout], par, pos_inout); 1435 par = node; 1436 pos_inout++; 1437 } 1438 #else 1397 1439 f4_Node *node = new f4_Node(genot[pos_inout], par, pos_inout); 1398 1440 par = node; 1399 1441 pos_inout++; 1442 #endif // F4_SIMPLIFY_MODIFIERS 1400 1443 break; 1401 1444 } … … 1406 1449 if (par && par->name != ">") 1407 1450 { 1408 //happens when pos_inout == strlen(genot)1451 //happens when pos_inout == genot_len 1409 1452 //return pos_inout; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>. 1410 new f4_Node('>', par, int(strlen(genot))- 1);1453 new f4_Node('>', par, genot_len - 1); 1411 1454 } 1412 1455 … … 1417 1460 { 1418 1461 int pos = 0; 1419 int res = f4_processRecur(genot, pos, root);1462 int res = f4_processRecur(genot, (int)strlen(genot), pos, root); 1420 1463 if (res > 0) 1421 1464 return res; //parsing error -
cpp/frams/genetics/f4/f4_general.h
r1232 r1234 471 471 * This is an internal function; for regular cases, use f4_process(). 472 472 * @param genot the string with the entire genotype 473 * @param genot_len length of genot (precomputed for efficiency) 473 474 * @param pos_inout the current position of processing in string (advanced by the function) 474 475 * @param parent current parent of the analysed branch of the genotype 475 476 * @return 0 if processing was successful, otherwise returns the position of an error in the genotype 476 477 */ 477 int f4_processRecur(const char *genot, int &pos_inout, f4_Node *parent);478 int f4_processRecur(const char *genot, const int genot_len, int &pos_inout, f4_Node *parent); 478 479 479 480 /** -
cpp/frams/genetics/f4/f4_oper.cpp
r1232 r1234 11 11 // may help, but it would be better to improve the source code to make genetic operators neutral in terms of genotype length. Adding such a penalty 12 12 // removes "work in progress" changes in genotypes thus promoting immediate, straightforward improvements while hindering slower, multifaceted progress. 13 // TODO getting rid of redundancy (valid genotypes with a lot of "junk code") in this representation looks like a good idea; many improvements to this end have already been done in April & May 2023 .13 // TODO getting rid of redundancy (valid genotypes with a lot of "junk code") in this representation looks like a good idea; many improvements to this end have already been done in April & May 2023, so maybe it is not a big problem now? 14 14 // 15 15 // 16 16 // TODO the behavior of neuron input indexes during mutation seems badly implemented (see also TREAT_BAD_CONNECTIONS_AS_INVALID_GENO). Are they kept properly maintained when nodes are added and removed? This could be done well because during mutation we operate on the tree structure with cross-references between nodes (so they should not be affected by local changes in the tree), and then convert the tree back to string. Yet, the f4_Node.conn_from is an integer and these fields in nodes do not seem to be maintained on tree node adding/removal... change these integer offsets to references to node objects? But actually, do the offsets that constitute relative connection references concern the f4_Node tree structure (and all these sophisticated calculations of offsets during mutation are useful) or rather they concern the f4_Cells development? verify all situations in f4_Cell::oneStep(), case '['. 17 // TODO add simplifying sequences of modifiers (so capital and small letter cancel out, like in f1) - but seems like each single modifier is a separate f4_Node? and perhaps we don't want to use the repair mechanism for this... maybe mutations, when they add/modify/remove a modifier node, should be "cleaning" the tree by removing nodes when they encounter contradictory modifiers on the same subpath, and also limit the number of modifiers of each type just like in f1? To avoid sequences like ...<X>llmlIilImmimiimmimifmfl<fifmmimilimmmiimiliffmfliIfififlliflimfliffififmiffmfliflifmIlimimiflimfiffmllliflmimifllifliliflifmIlimimiflimfiffmllliflmimifllfmIlimimiflimfiffmllliflmimiflliflimimmiflimfliffmiflifmfiffllIlififliffififmiffmfliflifIliflimimflimflfflimimifllfflifllfflimlififfiiffifIr<r<...18 17 // TODO in mutation, adding the '#' gene does not seem to be effective. The gene is added and genotypes are valid, but hardly ever #n is effective, i.e., it hardly ever multiplicates body or brain parts... investigate! 19 18 // TODO add support for properties of (any class of) neurons - not just sigmoid/force/intertia (':' syntax) for N 20 19 // TODO add mapping genotype character ranges for neural [connections] 21 20 // TODO change the default branching plane (to match f1) so they do not grow perfectly vertical (cheating vertpos) so easily? (so they require Rr or other modifiers) 21 // TODO for some genotypes, #defining/undefining F4_SIMPLIFY_MODIFIERS produces significantly different phenotypes (e.g. length of some Joint changes from 1.25 to 1.499, coordinates of Parts change, friction of some part changes from 1.28 to 0.32). Comparing f4_Node trees, the simplification works as intended, there are no huge changes apart from removing contradicting modifiers like 'R' and 'r' or 'L' and 'l', and dispersing the modifiers (changed order). There is no reason for such a significant influence of this. A hypothesis is that something may be wrong with calculating the influence of individual modifiers, e.g. some strong nonlinearity is introduced where it should not be, or some compensation between modifiers that should not influence each other (like L and R), or some modifier f4_Nodes are skipped/ignored when applying? Investigate. Example genotype: /*4*/,i<qlM,C<X>N:*#1>>,r<MRF<Xcm>N:Gpart>#5#1#2MLL#1>#1>>>>#5ML#2L#1>>>Lf,r<#1>rM<CqmLlCfqiFLqXFfl><F,<<XI>iN:|[-1:4.346]><XF><<XrRQ>N:G#3>>QiXFMR>fXM#2MfcR>R#3>>X 22 22 23 23 24 … … 32 33 33 34 34 const char *Geno_f4::all_modifiers = F14_MODIFIERS ","; //comma in f4 is handled the same way (simple node, F4_ADD_SIMP) as modifiers 35 const char *Geno_f4::all_modifiers = F14_MODIFIERS ","; //comma in f4 is handled the same way (simple node, F4_ADD_SIMP) as modifiers. See also all_modifiers_no_comma in f4_general.cpp. 35 36 36 37 // codes that can be changed (apart from being added/deleted) … … 294 295 { 295 296 // add simple node 296 // choose a simple node from ADD_SIMPLE_CODES 297 int modifier_index = GenoOperators::getRandomChar(all_modifiers, excluded_modifiers.c_str()); 298 if (modifier_index < 0) 299 return GENOPER_OPFAIL; 297 300 node_mutated->parent->removeChild(node_mutated); 298 // f4_Node *n2 = new f4_Node(ADD_SIMPLE_CODES[rndUint(strlen(ADD_SIMPLE_CODES))], n1->parent, n1->parent->pos);299 int modifierid = GenoOperators::getRandomChar(all_modifiers, excluded_modifiers.c_str());300 f4_Node *n2 = new f4_Node(all_modifiers[modifier id], node_mutated->parent, node_mutated->parent->pos);301 // old source: choose a simple node from ADD_SIMPLE_CODES 302 //f4_Node *n2 = new f4_Node(ADD_SIMPLE_CODES[rndUint(strlen(ADD_SIMPLE_CODES))], node_mutated->parent, node_mutated->parent->pos); 303 f4_Node *n2 = new f4_Node(all_modifiers[modifier_index], node_mutated->parent, node_mutated->parent->pos); 301 304 n2->addChild(node_mutated); 302 305 node_mutated->parent = n2; -
cpp/frams/genetics/f4/f4_oper.h
r1231 r1234 62 62 static const char *all_modifiers; 63 63 64 pr otected:64 private: 65 65 66 66 /**
Note: See TracChangeset
for help on using the changeset viewer.