Changeset 1229 for cpp


Ignore:
Timestamp:
04/30/23 02:11:46 (19 months ago)
Author:
Maciej Komosinski
Message:
  • More strict parsing (reporting errors instead of implicit fixes)
  • Simplified and optimized parsing of neuron class names
  • Added a number of comments on parsing peculiarities
Location:
cpp/frams/genetics/f4
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • cpp/frams/genetics/f4/f4_general.cpp

    r1228 r1229  
    2525void rolling_dec(double *v)
    2626{
    27         *v -= 0.7853;  // 0.7853981  45 degrees
     27        *v -= 0.7853;  // 0.7853981  45 degrees = pi/4 like in f1
    2828}
    2929
     
    4040        {
    4141                if (i >= slen)  // ran out the string, should never happen with a correct string
    42                         return 1;
     42                        return 1; //TODO MacKo 2023-04: interesting: why was this situation made undistinguishable from s[1]==stopchar ? does this have any bad consequences or is "1" just used to tell "advance as little as possible"? Anyway, this function can be eliminated when parsing is simplified.
    4343                if (stopchar == s[i])  // bumped into stopchar
    4444                        return int(i);
    4545                if (i < slen - 1) // s[i] is not the last char
    4646                {
    47                         if (s[i] == '(')
     47                        if (s[i] == '(') //not an allowed char in f4, perhaps a remnant of old experiments with code
    4848                        {
    4949                                i += 2 + scanRecur(s + i + 1, slen - i - 1, ')');
     
    192192                // the current genotype code is processed
    193193                //genoRange.add(gcur->pos,gcur->pos+gcur->name.length()-1);
    194                 bool neuclasshandler = false; // if set to true, then a separate neuron handler below will identify the neuroclass and assign the cell to the neuron type
    195194
    196195                // To detect what genes are valid neuroclass names, but do NOT have is_neuroclass==true
    197                 // (just as a curiosity to ensure we properly distinguish between, for example, the "G" neuron and "G" modifier):
     196                // (just as a curiosity to ensure we properly distinguish between, for example, the "G" neuron and the "G" modifier):
    198197                //char *TMP = (char*)gcur->name.c_str();
    199                 //if (gcur->is_neuroclass==false && GenoOperators::parseNeuroClass(TMP,ModelEnum::SHAPETYPE_BALL_AND_STICK))
     198                //if (gcur->is_neuroclass==false && GenoOperators::parseNeuroClass(TMP, ModelEnum::SHAPETYPE_BALL_AND_STICK))
    200199                //      printf("Could be a valid neuroclass, but is_neuroclass==false: %s\n", gcur->name.c_str());
    201200
    202                 if (gcur->name.length() == 1 && gcur->neuclass == NULL) //one-character genes and not neuroclass names
    203                 {
     201                if (gcur->neuclass == NULL) //not a neuron
     202                {
     203                        if (gcur->name.length() > 1)
     204                                logPrintf("f4_Cell", "oneStep", LOG_WARN, "Multiple-character code that is not a neuron class name: '%s'", gcur->name.c_str()); //let's see an example of such a code...
     205
    204206                        genoRange.add(gcur->pos, gcur->pos);
    205207                        char name = gcur->name[0];
     
    542544                        default:
    543545                        {
    544                                 // because there are one-character neuron classes, default is move control to neuclasshandler
    545                                 neuclasshandler = true;
    546                         }
    547                         }
    548                 }
    549                 else
    550                 {
    551                         // if many characters or single character but is_neuroclass, then it will be handled below
    552                         neuclasshandler = true;
    553                 }
    554 
    555                 if (neuclasshandler)
    556                 {
    557                         genoRange.add(gcur->pos, gcur->pos + int(gcur->name.length()) + 2 - 1); // +2 for N:
    558                         if (type != CELL_UNDIFF)
    559                         {
    560                                 // fix: delete this node
    561                                 org->setRepairRemove(gcur->pos, gcur);
    562                                 return 1;  // stop
    563                         }
    564                         // error: if no previous
    565                         if (dadlink == NULL)
    566                         {
    567                                 // fix: delete it
    568                                 org->setRepairRemove(gcur->pos, gcur);
    569                                 return 1;  // stop
    570                         }
    571                         // multiple characters are neuron types. Let's check if exists in the current configuration of Framsticks
    572                         char *temp = (char*)gcur->name.c_str();
    573                         neuclass = GenoOperators::parseNeuroClass(temp, ModelEnum::SHAPETYPE_BALL_AND_STICK);
    574                         if (neuclass == NULL)
    575                         {
    576546                                // error: unknown code
    577547                                string buf = "Unknown code '" + gcur->name + "'";
     
    580550                                return 1;
    581551                        }
     552                        }
     553                }
     554                else
     555                {
     556                        genoRange.add(gcur->pos, gcur->pos + int(gcur->name.length()) + 2 - 1); // +2 for N:
     557                        if (type != CELL_UNDIFF)
     558                        {
     559                                // fix: delete this node
     560                                org->setRepairRemove(gcur->pos, gcur);
     561                                return 1;  // stop
     562                        }
     563                        // error: if no previous
     564                        if (dadlink == NULL)
     565                        {
     566                                // fix: delete it
     567                                org->setRepairRemove(gcur->pos, gcur);
     568                                return 1;  // stop
     569                        }
     570                        neuclass = gcur->neuclass;
    582571                        type = CELL_NEURON;
    583572                        // change of type also halts development, to give other
     
    13001289        len = out.length();
    13011290        if (len > 1)
    1302                 if (out[len - 1] == '>') { (out.directWrite())[len - 1] = 0; out.endWrite(); };
     1291                if (out[len - 1] == '>') { (out.directWrite())[len - 1] = 0; out.endWrite(); }; //Macko 2023-04 TODO "can be omitted", but should we remove it as a rule even in generated genotypes? see if I can somehow detect junk characters after top-level '>' ends properly: /*4*/<X>N:N>whatever
    13031292        // copy back to string
    13041293        // if new is longer, reallocate buf
     
    13401329// scan genotype string and build tree
    13411330// return >1 for error (errorpos)
    1342 int f4_processRecur(const char* genot, unsigned pos0, f4_Node *parent)
    1343 {
    1344         unsigned int gpos;
    1345         f4_Node *par;
    1346 
    1347         gpos = pos0;
    1348         par = parent;
    1349         if (gpos >= strlen(genot)) return 1;
     1331int f4_processRecur(const char* genot, unsigned int pos0, f4_Node *parent)
     1332{
     1333        unsigned int gpos = pos0; //MacKo 2023-04 (TODO): these two variables are often updated before return which has no effect since they are local. Seems like a half step towards making them (or just gpos) in/out parameter which would solve many issues and simplify parsing (getting rid of scanRecur()) while making it more strict.
     1334        f4_Node *par = parent;
     1335
     1336        if (gpos >= strlen(genot))
     1337                return (int)strlen(genot) + 1;
     1338
    13501339        while (gpos < strlen(genot))
    13511340        {
     
    13691358                        else // ran out
    13701359                        {
    1371                                 node = new f4_Node(">", par, int(strlen(genot)) - 1);
    1372                                 par = node;
     1360                                //MacKo 2023-04, more strict behavior: instead of silent repair (no visible effect to the user, genotype stays invalid but is interpreted and reported as valid), we now point out where the error is. For example <X> or <X><X or <X><N:N>
     1361                                return gpos + 1; //the problem starts here, occurs because second child (branch) <1..>2..> is not completed
     1362                                //old silent repair:
     1363                                //node = new f4_Node(">", par, int(strlen(genot)) - 1);
     1364                                //par = node;
    13731365                        }
    13741366                        gpos++;
     
    13791371                        f4_Node *node = new f4_Node(">", par, gpos);
    13801372                        par = node;
    1381                         gpos = (unsigned int)strlen(genot);
     1373                        //gpos = (unsigned int)strlen(genot); //MacKo 2023-04: first of all, 'gpos' is a local variable so no effect; second, '>' may be internal (i.e., not the last one in the genotype), so it is a bad hint to assign strlen(). 'par' above is also local...
    13821374                        return 0;  // OK
    13831375                }
     
    13941386                        gpos += end - (genot + gpos);
    13951387                        //gpos++;
    1396                         //while ((genot[gpos] >= '0') && (genot[gpos] <= '9')) gpos++;node1 = new f4_Node("#", par, oldpos);
     1388                        //while ((genot[gpos] >= '0') && (genot[gpos] <= '9')) gpos++; node1 = new f4_Node("#", par, oldpos);
    13971389                        f4_Node *node = new f4_Node("#", par, oldpos);
    13981390                        node->reps = reps;
     
    14071399                        else // ran out
    14081400                        {
    1409                                 node = new f4_Node(">", par, int(strlen(genot)) - 1);
     1401                                return gpos; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences?
     1402                                //earlier apporach - silently treating this problem (we don't ever see where the error is because it gets corrected in some way here, while parsing the genotype, and error location in the genotype is never reported):
     1403                                //node = new f4_Node(">", par, int(strlen(genot)) - 1); // check if needed and if this is really the best repair operation; seemed to happen too many times in succession for some genotypes even though they were only a result of f4 operators, not manually created... and the operators should not generate invalid genotypes, right? Or maybe crossover does? Seems like too many #N's for closing >'s; removing #N or adding > helped. Operators somehow don't do it properly sometimes? But F4_ADD_REP adds '>'... (TODO)
    14101404                        }
    14111405                        return 0;  // OK
     
    14911485
    14921486        // should end with a '>'
    1493         if (par)
    1494         {
    1495                 if (par->name != ">")
    1496                 {
    1497                         f4_Node *node = new f4_Node('>', par, int(strlen(genot)) - 1);
    1498                         par = node;
    1499                 }
     1487        if (par && par->name != ">")
     1488        {
     1489                //happens when gpos == strlen(genot)
     1490                //return gpos; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>.
     1491                f4_Node *node = new f4_Node('>', par, int(strlen(genot)) - 1);
     1492                par = node;
    15001493        }
    15011494
  • cpp/frams/genetics/f4/f4_general.h

    r1228 r1229  
    4242
    4343/**
     44 * TODO MacKo 2023-04: not sure if this function is needed and if f4_processRecur() would not suffice
     45 * if it advanced the string pointer (in/out parameter) while processing. Its returned value is always used after
     46 * f4_processRecur() anyway, and in two cases likely incorrectly (for [...] to detect closing ']'
     47 * and for :...: to detect closing ':') - we don't need recursion in these cases, a simple linear
     48 * scan would suffice, but even this would not be needed - since we are parsing the actual characters in these cases,
     49 * we do scanning anyway. So looks like this function doubles the work already done more thoroughly by f4_processRecur().
     50 *
    4451 * Scans f4 genotype string for a stopping character and returns the position of
    4552 * this stopping character or 1 if the end of string was reached. This method is used
     
    494501 * @return 0 if processing was successful, otherwise returns the position of an error in the genotype
    495502 */
    496 int f4_processRecur(const char *genot, unsigned pos0, f4_Node *parent);
     503int f4_processRecur(const char *genot, unsigned int pos0, f4_Node *parent);
    497504
    498505/**
  • cpp/frams/genetics/f4/f4_oper.cpp

    r1228 r1229  
    1616//
    1717// TODO the behavior of neuron input indexes during mutation seems badly implemented (see also TREAT_BAD_CONNECTIONS_AS_INVALID_GENO). Are they kept properly maintained when nodes are added and removed? This could be done well because during mutation we operate on the tree structure with cross-references between nodes (so they should not be affected by local changes in the tree), and then convert the tree back to string. Yet, the f4_Node.conn_from is an integer and these fields in nodes do not seem to be maintained on tree node adding/removal... change these integer offsets to references to node objects? But actually, do the offsets that constitute relative connection references concern the f4_Node tree structure (and all these sophisticated calculations of offsets during mutation are useful) or rather they concern the f4_Cells development? verify all situations in f4_Cell::oneStep(), case '['.
    18 // TODO add simplifying sequences of modifiers (so capital and small letter cancel out, like in f1) - but seems like each single modifier is a separate f4_Node? and perhaps we don't want to use the repair mechanism for this... maybe mutations, when they add/modify/remove a modifier node, should be "cleaning" the tree by removing nodes when they encounter contradictory modifiers on the same subpath?
     18// TODO add simplifying sequences of modifiers (so capital and small letter cancel out, like in f1) - but seems like each single modifier is a separate f4_Node? and perhaps we don't want to use the repair mechanism for this... maybe mutations, when they add/modify/remove a modifier node, should be "cleaning" the tree by removing nodes when they encounter contradictory modifiers on the same subpath, and also limit the number of modifiers of each type just like in f1? To avoid squences like ...<X>llmlIilImmimiimmimifmfl<fifmmimilimmmiimiliffmfliIfififlliflimfliffififmiffmfliflifmIlimimiflimfiffmllliflmimifllifliliflifmIlimimiflimfiffmllliflmimifllfmIlimimiflimfiffmllliflmimiflliflimimmiflimfliffmiflifmfiffllIlififliffififmiffmfliflifIliflimimflimflfflimimifllfflifllfflimlififfiiffifIr<r<...
    1919// TODO add support for properties of (any class of) neurons - not just sigmoid/force/intertia (':' syntax) for N
    2020// TODO add mapping genotype character ranges for neural [connections]
     
    120120int Geno_f4::validate(char *& geno, const char *genoname)
    121121{
    122         // convert geno to tree, then try to validate 20 times
     122        // convert geno to a tree, then try to validate
    123123        f4_Node root;
    124124        if (f4_processRecur(geno, 0, &root) || root.childCount() != 1) return GENOPER_OK; // cannot repair
    125         if (ValidateRec(&root, 20) == GENOPER_REPAIR) // if repaired, make it back to string
     125
     126        const int VALIDATE_TRIALS = 20;
     127        if (ValidateRec(&root, VALIDATE_TRIALS) == GENOPER_REPAIR) // if repaired, make it back to string
    126128        {
    127129                geno[0] = 0;
     
    674676        if (f4_processRecur(g2, 0, &root2) || (root2.childCount() != 1)) return GENOPER_OPFAIL;
    675677
    676         // decide amounts of crossover, 0.25-0.75
    677         // adam: seems 0.1-0.9 -- MacKo
     678        // decide amounts of crossover, 0.1-0.9
    678679        chg1 = 0.1 + rndDouble(0.8);
    679680        chg2 = 0.1 + rndDouble(0.8);
Note: See TracChangeset for help on using the changeset viewer.