Changeset 1230 for cpp/frams/genetics
- Timestamp:
- 05/01/23 02:14:27 (21 months ago)
- Location:
- cpp/frams/genetics/f4
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
cpp/frams/genetics/f4/f4_general.cpp
r1229 r1230 31 31 { 32 32 *v += 0.7853; // 0.7853981 45 degrees 33 }34 35 int scanRecur(const char* s, int slen, char stopchar)36 {37 int i = 0;38 //DB( printf(" scan('%s', '%c')\n", s, stopchar); )39 while (1)40 {41 if (i >= slen) // ran out the string, should never happen with a correct string42 return 1; //TODO MacKo 2023-04: interesting: why was this situation made undistinguishable from s[1]==stopchar ? does this have any bad consequences or is "1" just used to tell "advance as little as possible"? Anyway, this function can be eliminated when parsing is simplified.43 if (stopchar == s[i]) // bumped into stopchar44 return int(i);45 if (i < slen - 1) // s[i] is not the last char46 {47 if (s[i] == '(') //not an allowed char in f4, perhaps a remnant of old experiments with code48 {49 i += 2 + scanRecur(s + i + 1, slen - i - 1, ')');50 continue;51 }52 if (s[i] == '<')53 {54 i += 2 + scanRecur(s + i + 1, slen - i - 1, '>');55 continue;56 }57 if (s[i] == '#')58 {59 i += 2 + scanRecur(s + i + 1, slen - i - 1, '>');60 continue;61 }62 }63 // s[i] is a non-special character64 i++;65 }66 return i;67 33 } 68 34 … … 703 669 // transform geno from string to nodes 704 670 f4rootnode = new f4_Node(); 705 int res = f4_processRecur(genome.c_str(), 0, f4rootnode); 671 int _ = 0; 672 int res = f4_processRecur(genome.c_str(), _, f4rootnode); 706 673 if (res || (f4rootnode->childCount() != 1)) 707 674 { … … 1329 1296 // scan genotype string and build tree 1330 1297 // return >1 for error (errorpos) 1331 int f4_processRecur(const char* genot, unsigned int pos0, f4_Node *parent) 1332 { 1333 unsigned int gpos = pos0; //MacKo 2023-04 (TODO): these two variables are often updated before return which has no effect since they are local. Seems like a half step towards making them (or just gpos) in/out parameter which would solve many issues and simplify parsing (getting rid of scanRecur()) while making it more strict. 1298 int f4_processRecur(const char* genot, int &pos_inout, f4_Node *parent) 1299 { 1334 1300 f4_Node *par = parent; 1335 1301 1336 if ( gpos >=strlen(genot))1302 if (pos_inout >= (int)strlen(genot)) 1337 1303 return (int)strlen(genot) + 1; 1338 1304 1339 while (gpos < strlen(genot)) 1340 { 1341 // first switch across cell dividers and old semantics 1342 switch (genot[gpos]) 1305 while (pos_inout < (int)strlen(genot)) 1306 { 1307 //#define PRINT_PARSING_LOCATION 1308 #ifdef PRINT_PARSING_LOCATION 1309 printf("%s\n", genot); 1310 for (int i = 0; i < pos_inout; i++) printf(" "); 1311 printf("^\n"); 1312 #endif 1313 switch (genot[pos_inout]) 1343 1314 { 1344 1315 case '<': 1345 1316 { 1346 // find out genotype start for child 1347 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), '>'); 1348 1349 f4_Node *node = new f4_Node("<", par, gpos); 1317 f4_Node *node = new f4_Node("<", par, pos_inout); 1350 1318 par = node; 1351 int res = f4_processRecur(genot, gpos + 1, par); 1319 pos_inout++; //move after '<' 1320 int res = f4_processRecur(genot, pos_inout, par); 1352 1321 if (res) return res; 1353 if ( gpos + stopchar_offset + 2 <strlen(genot))1354 { 1355 res = f4_processRecur(genot, gpos + stopchar_offset + 2, par);1322 if (pos_inout < (int)strlen(genot)) 1323 { 1324 res = f4_processRecur(genot, pos_inout, par); 1356 1325 if (res) return res; 1357 1326 } … … 1359 1328 { 1360 1329 //MacKo 2023-04, more strict behavior: instead of silent repair (no visible effect to the user, genotype stays invalid but is interpreted and reported as valid), we now point out where the error is. For example <X> or <X><X or <X><N:N> 1361 return gpos + 1; //the problem starts here, occurs because second child (branch) <1..>2..> is not completed1330 return (int)strlen(genot) + 1; 1362 1331 //old silent repair: 1363 1332 //node = new f4_Node(">", par, int(strlen(genot)) - 1); 1364 //par = node; 1365 } 1366 gpos++; 1333 } 1367 1334 return 0; // OK 1368 1335 } 1369 1336 case '>': 1370 1337 { 1371 f4_Node *node = new f4_Node(">", par, gpos); 1372 par = node; 1373 //gpos = (unsigned int)strlen(genot); //MacKo 2023-04: first of all, 'gpos' is a local variable so no effect; second, '>' may be internal (i.e., not the last one in the genotype), so it is a bad hint to assign strlen(). 'par' above is also local... 1338 new f4_Node(">", par, pos_inout); 1339 pos_inout++; //move after '>' 1374 1340 return 0; // OK 1375 1341 } … … 1378 1344 // repetition marker, 1 by default 1379 1345 ExtValue val; 1380 const char* end = val.parseNumber(genot + gpos + 1, ExtPType::TInt); 1346 const char* end = val.parseNumber(genot + pos_inout + 1, ExtPType::TInt); 1347 //TODO end==NULL? -> error! 1381 1348 int reps = (end == NULL) ? 1 : val.getInt(); 1382 // find out genotype start for continuation1383 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), '>');1349 f4_Node *node = new f4_Node("#", par, pos_inout); 1350 node->reps = reps; 1384 1351 // skip number 1385 unsigned int oldpos = gpos; 1386 gpos += end - (genot + gpos); 1387 //gpos++; 1388 //while ((genot[gpos] >= '0') && (genot[gpos] <= '9')) gpos++; node1 = new f4_Node("#", par, oldpos); 1389 f4_Node *node = new f4_Node("#", par, oldpos); 1390 node->reps = reps; 1391 par = node; 1392 int res = f4_processRecur(genot, gpos, node); 1352 pos_inout += end - (genot + pos_inout); 1353 int res = f4_processRecur(genot, pos_inout, node); 1393 1354 if (res) return res; 1394 if ( oldpos + stopchar_offset + 2 <strlen(genot))1395 { 1396 res = f4_processRecur(genot, oldpos + stopchar_offset + 2, node);1355 if (pos_inout < (int)strlen(genot)) 1356 { 1357 res = f4_processRecur(genot, pos_inout, node); 1397 1358 if (res) return res; 1398 1359 } 1399 1360 else // ran out 1400 1361 { 1401 return gpos; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences?1362 return (int)strlen(genot) + 1; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences? 1402 1363 //earlier apporach - silently treating this problem (we don't ever see where the error is because it gets corrected in some way here, while parsing the genotype, and error location in the genotype is never reported): 1403 1364 //node = new f4_Node(">", par, int(strlen(genot)) - 1); // check if needed and if this is really the best repair operation; seemed to happen too many times in succession for some genotypes even though they were only a result of f4 operators, not manually created... and the operators should not generate invalid genotypes, right? Or maybe crossover does? Seems like too many #N's for closing >'s; removing #N or adding > helped. Operators somehow don't do it properly sometimes? But F4_ADD_REP adds '>'... (TODO) … … 1411 1372 { 1412 1373 // whitespace: ignore 1413 gpos++;1374 pos_inout++; 1414 1375 break; 1415 1376 } 1416 1377 case 'N': 1417 1378 { 1418 int forgenorange = gpos;1419 if (genot[ gpos+ 1] != ':')1420 return gpos+ 1; //error1421 gpos+= 2; //skipping "N:"1422 unsigned int begin_index = gpos;1423 char* end = (char*)genot + begin_index;1424 NeuroClass *neuclass = GenoOperators::parseNeuroClass( end, ModelEnum::SHAPETYPE_BALL_AND_STICK);1379 int forgenorange = pos_inout; 1380 if (genot[pos_inout + 1] != ':') 1381 return pos_inout + 1; //error 1382 pos_inout += 2; //skipping "N:" 1383 unsigned int neuroclass_begin = pos_inout; 1384 char* neuroclass_end = (char*)genot + neuroclass_begin; 1385 NeuroClass *neuclass = GenoOperators::parseNeuroClass(neuroclass_end, ModelEnum::SHAPETYPE_BALL_AND_STICK); //advances neuroclass_end 1425 1386 if (neuclass == NULL) 1426 return gpos+ 1; //error1427 gpos += end - genot - begin_index;1428 string neutype = string(genot + begin_index, genot + gpos);1387 return pos_inout + 1; //error 1388 pos_inout += neuroclass_end - genot - neuroclass_begin; 1389 string neutype = string(genot + neuroclass_begin, genot + pos_inout); 1429 1390 f4_Node *node = new f4_Node(neutype, par, forgenorange); 1430 1391 node->neuclass = neuclass; … … 1438 1399 // in the future this could be generalized to all neuron properties, for example N:|:power:0.6:range:1.4, or can even use '=' or ',' instead of ':' if no ambiguity 1439 1400 char prop_dir, prop_symbol, prop_end[2]; // prop_end is only to ensure that neuron parameter definition is completed 1440 if (sscanf(genot + gpos, ":%c%c%1[:]", &prop_dir, &prop_symbol, &prop_end) != 3)1401 if (sscanf(genot + pos_inout, ":%c%c%1[:]", &prop_dir, &prop_symbol, &prop_end) != 3) 1441 1402 // error: incorrect format 1442 return gpos+ 1 + 1;1403 return pos_inout + 1 + 1; 1443 1404 if (prop_dir != '-' && prop_dir != '+') 1444 return gpos+ 1 + 1; //error1405 return pos_inout + 1 + 1; //error 1445 1406 switch (prop_symbol) 1446 1407 { 1447 1408 case '!': case '=': case '/': break; 1448 1409 default: 1449 return gpos+ 1 + 1; //error1450 } 1451 f4_Node *node = new f4_Node(":", par, gpos);1410 return pos_inout + 1 + 1; //error 1411 } 1412 f4_Node *node = new f4_Node(":", par, pos_inout); 1452 1413 node->prop_symbol = prop_symbol; 1453 1414 node->prop_increase = prop_dir == '+' ? true : false; // + or - 1454 1415 par = node; 1455 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), ':'); 1456 gpos += stopchar_offset + 2; 1416 pos_inout += 4; //skipping :ds: 1457 1417 break; 1458 1418 } … … 1461 1421 double weight = 0; 1462 1422 int relfrom; 1463 const char *end = parseConnection(genot + gpos, relfrom, weight);1423 const char *end = parseConnection(genot + pos_inout, relfrom, weight); 1464 1424 if (end == NULL) 1465 return gpos+ 1; //error1466 1467 f4_Node *node = new f4_Node("[", par, gpos);1425 return pos_inout + 1; //error 1426 1427 f4_Node *node = new f4_Node("[", par, pos_inout); 1468 1428 node->conn_from = relfrom; 1469 1429 node->conn_weight = weight; 1470 1430 par = node; 1471 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), ']'); 1472 gpos += stopchar_offset + 2; 1431 pos_inout += end - (genot + pos_inout); 1473 1432 break; 1474 1433 } 1475 1434 default: // 'X' and ',' and all modifiers and also invalid symbols - add a node, for invalid symbols build will give the error or repair 1476 1435 { 1477 //printf("any regular character '%c'\n", genot[gpos]); 1478 f4_Node *node = new f4_Node(genot[gpos], par, gpos); 1436 //printf("any regular character '%c'\n", genot[pos_inout]); 1437 //TODO here: read a continuous sequence of modifiers, sort and optimize ("collapse") it like in f1, then add to tree 1438 f4_Node *node = new f4_Node(genot[pos_inout], par, pos_inout); 1479 1439 par = node; 1480 gpos++;1440 pos_inout++; 1481 1441 break; 1482 1442 } … … 1487 1447 if (par && par->name != ">") 1488 1448 { 1489 //happens when gpos == strlen(genot) 1490 //return gpos; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>. 1491 f4_Node *node = new f4_Node('>', par, int(strlen(genot)) - 1); 1492 par = node; 1493 } 1494 1495 return 0; 1449 //happens when pos_inout == strlen(genot) 1450 //return pos_inout; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>. 1451 new f4_Node('>', par, int(strlen(genot)) - 1); 1452 } 1453 1454 return 0; // OK 1496 1455 } 1497 1456 -
cpp/frams/genetics/f4/f4_general.h
r1229 r1230 40 40 #define CELL_NEURON 42 ///<differentiated to neuron, can divide 41 41 //@} 42 43 /**44 * TODO MacKo 2023-04: not sure if this function is needed and if f4_processRecur() would not suffice45 * if it advanced the string pointer (in/out parameter) while processing. Its returned value is always used after46 * f4_processRecur() anyway, and in two cases likely incorrectly (for [...] to detect closing ']'47 * and for :...: to detect closing ':') - we don't need recursion in these cases, a simple linear48 * scan would suffice, but even this would not be needed - since we are parsing the actual characters in these cases,49 * we do scanning anyway. So looks like this function doubles the work already done more thoroughly by f4_processRecur().50 *51 * Scans f4 genotype string for a stopping character and returns the position of52 * this stopping character or 1 if the end of string was reached. This method is used53 * for closing braces, like ), >, ]. It runs recursively when opening braces54 * like (, <, # are found.55 * @param s string with the f4 genotype56 * @param slen length of a given string57 * @param stopchar character to be found58 * @return 1 if end of string was reached, or position of found character in sequence59 */60 int scanRecur(const char* s, int slen, char stopchar);61 42 62 43 … … 501 482 * @return 0 if processing was successful, otherwise returns the position of an error in the genotype 502 483 */ 503 int f4_processRecur(const char *genot, unsigned int pos0, f4_Node *parent);484 int f4_processRecur(const char *genot, int &pos_inout, f4_Node *parent); 504 485 505 486 /** -
cpp/frams/genetics/f4/f4_oper.cpp
r1229 r1230 16 16 // 17 17 // TODO the behavior of neuron input indexes during mutation seems badly implemented (see also TREAT_BAD_CONNECTIONS_AS_INVALID_GENO). Are they kept properly maintained when nodes are added and removed? This could be done well because during mutation we operate on the tree structure with cross-references between nodes (so they should not be affected by local changes in the tree), and then convert the tree back to string. Yet, the f4_Node.conn_from is an integer and these fields in nodes do not seem to be maintained on tree node adding/removal... change these integer offsets to references to node objects? But actually, do the offsets that constitute relative connection references concern the f4_Node tree structure (and all these sophisticated calculations of offsets during mutation are useful) or rather they concern the f4_Cells development? verify all situations in f4_Cell::oneStep(), case '['. 18 // TODO add simplifying sequences of modifiers (so capital and small letter cancel out, like in f1) - but seems like each single modifier is a separate f4_Node? and perhaps we don't want to use the repair mechanism for this... maybe mutations, when they add/modify/remove a modifier node, should be "cleaning" the tree by removing nodes when they encounter contradictory modifiers on the same subpath, and also limit the number of modifiers of each type just like in f1? To avoid s quences like ...<X>llmlIilImmimiimmimifmfl<fifmmimilimmmiimiliffmfliIfififlliflimfliffififmiffmfliflifmIlimimiflimfiffmllliflmimifllifliliflifmIlimimiflimfiffmllliflmimifllfmIlimimiflimfiffmllliflmimiflliflimimmiflimfliffmiflifmfiffllIlififliffififmiffmfliflifIliflimimflimflfflimimifllfflifllfflimlififfiiffifIr<r<...18 // TODO add simplifying sequences of modifiers (so capital and small letter cancel out, like in f1) - but seems like each single modifier is a separate f4_Node? and perhaps we don't want to use the repair mechanism for this... maybe mutations, when they add/modify/remove a modifier node, should be "cleaning" the tree by removing nodes when they encounter contradictory modifiers on the same subpath, and also limit the number of modifiers of each type just like in f1? To avoid sequences like ...<X>llmlIilImmimiimmimifmfl<fifmmimilimmmiimiliffmfliIfififlliflimfliffififmiffmfliflifmIlimimiflimfiffmllliflmimifllifliliflifmIlimimiflimfiffmllliflmimifllfmIlimimiflimfiffmllliflmimiflliflimimmiflimfliffmiflifmfiffllIlififliffififmiffmfliflifIliflimimflimflfflimimifllfflifllfflimlififfiiffifIr<r<... 19 19 // TODO add support for properties of (any class of) neurons - not just sigmoid/force/intertia (':' syntax) for N 20 20 // TODO add mapping genotype character ranges for neural [connections] 21 // TODO change the default branching plane (to match f1) so they do not grow perfectly vertical (cheating vertpos) so easily? (so they require Rr or other modifiers) 21 22 22 23 … … 122 123 // convert geno to a tree, then try to validate 123 124 f4_Node root; 124 if (f4_processRecur(geno, 0, &root) || root.childCount() != 1) return GENOPER_OK; // cannot repair 125 int _ = 0; 126 if (f4_processRecur(geno, _, &root) || root.childCount() != 1) return GENOPER_OK; // cannot repair 125 127 126 128 const int VALIDATE_TRIALS = 20; … … 137 139 { 138 140 f4_Node root; 139 int res = f4_processRecur(geno, 0, &root); 141 int _ = 0; 142 int res = f4_processRecur(geno, _, &root); 140 143 if (res) return res; // errorpos, >0 141 144 if (root.childCount() != 1) return 1; //earlier: GENOPER_OPFAIL … … 549 552 { 550 553 f4_Node *root = new f4_Node; 551 if (f4_processRecur(g, 0, root) || root->childCount() != 1) 554 int _ = 0; 555 if (f4_processRecur(g, _, root) || root->childCount() != 1) 552 556 { 553 557 delete root; … … 673 677 674 678 // convert genotype strings into tree structures 675 if (f4_processRecur(g1, 0, &root1) || (root1.childCount() != 1)) return GENOPER_OPFAIL; 676 if (f4_processRecur(g2, 0, &root2) || (root2.childCount() != 1)) return GENOPER_OPFAIL; 679 int _1 = 0, _2 = 0; 680 if (f4_processRecur(g1, _1, &root1) || (root1.childCount() != 1)) return GENOPER_OPFAIL; 681 if (f4_processRecur(g2, _2, &root2) || (root2.childCount() != 1)) return GENOPER_OPFAIL; 677 682 678 683 // decide amounts of crossover, 0.1-0.9
Note: See TracChangeset
for help on using the changeset viewer.