Changeset 1231 for cpp/frams/genetics/f4


Ignore:
Timestamp:
05/02/23 01:36:15 (21 months ago)
Author:
Maciej Komosinski
Message:
  • Thanks to r1230, it is possible to detect (and repair=remove) junk trailing genes that are left after successful parsing (after last '>')
  • The validate() function may attempt to repair a genotype where earlier it would give up
  • Stricter parsing of the '#' gene
Location:
cpp/frams/genetics/f4
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • cpp/frams/genetics/f4/f4_conv.cpp

    r1227 r1231  
    2727SString GenoConv_f40::convert(SString &in, MultiMap *map, bool using_checkpoints)
    2828{
    29         int res;
    3029        f4_Model *model = new f4_Model();
    31         res = model->buildFromF4(in, using_checkpoints);
    32         if (GENOPER_OK != res)
     30        int res = model->buildFromF4(in, using_checkpoints);
     31        if (res != GENOPER_OK)
    3332        {
    3433                delete model;
     
    5554SString GenoConv_F41_TestOnly::convert(SString &in, MultiMap *map, bool using_checkpoints)
    5655{
    57         int res;
    5856        f4_Model *model = new f4_Model();
    59         res = model->buildFromF4(in, using_checkpoints);
    60         if (GENOPER_OK != res)
     57        int res = model->buildFromF4(in, using_checkpoints);
     58        if (res != GENOPER_OK)
    6159        {
    6260                delete model;
     
    8280int f4_Model::buildFromF4(SString &geno, bool using_checkpoints)
    8381{
    84         int i;
    85 
    8682        error = GENOPER_OK;
    8783        errorpos = -1;
    8884
     85        // transform geno from string to nodes
     86        f4_Node f4rootnode;
     87        int res = f4_process(geno.c_str(), &f4rootnode);
     88        if (res || (f4rootnode.childCount() != 1)) //consider any error fatal, preventing building a model
     89        {
     90                error = GENOPER_OPFAIL;
     91                errorpos = res;
     92                return error;
     93        }
     94
    8995        // build cells, and simulate
    9096        if (cells) delete cells;
    91         cells = new f4_Cells(geno, 0);
    92         if (GENOPER_OK != cells->getErrorCode())
     97        cells = new f4_Cells(f4rootnode.child, false);
     98        if (cells->getErrorCode() != GENOPER_OK)
    9399        {
    94100                error = cells->getErrorCode();
     
    99105
    100106        cells->simulate();
    101         if (GENOPER_OK != cells->getErrorCode())
     107        if (cells->getErrorCode() != GENOPER_OK)
    102108        {
    103109                error = cells->getErrorCode();
     
    107113
    108114        // reset recursive traverse flags
    109         for (i = 0; i < cells->cell_count; i++)
     115        for (int i = 0; i < cells->cell_count; i++)
    110116                cells->C[i]->recProcessedFlag = 0;
    111117
     
    113119
    114120        // process every cell
    115         int res;
    116         for (i = 0; i < cells->cell_count; i++)
    117         {
    118                 res = buildModelRec(cells->C[i]);
     121        for (int i = 0; i < cells->cell_count; i++)
     122        {
     123                int res = buildModelRec(cells->C[i]);
    119124                if (res)
    120125                {
    121                         logMessage("f4_Model", "buildFromF4", LOG_ERROR, "Error in building a Model");
     126                        logPrintf("f4_Model", "buildFromF4", LOG_ERROR, "Error %d when building a Model", res);
    122127                        error = res;
    123128                        break;
     
    125130        }
    126131
    127         res = close();
    128         if (0 == res) // invalid
     132        int res_close = close();
     133        if (res_close == 0) // invalid
     134        {
     135                logPrintf("f4_Model", "buildFromF4", LOG_ERROR, "Error %d when closing a Model", res_close);
    129136                error = -10;
     137        }
    130138
    131139        return error;
  • cpp/frams/genetics/f4/f4_general.cpp

    r1230 r1231  
    640640
    641641
    642 f4_Cells::f4_Cells(f4_Node *genome, int nrepair)
    643 {
    644         // create ancestor cell
     642f4_Cells::f4_Cells(f4_Node *genome, bool nrepair)
     643{
    645644        repair = nrepair;
    646645        errorcode = GENOPER_OK;
     
    650649        repair_insert = NULL;
    651650        tmpcel = NULL;
    652         f4rootnode = NULL;
     651
     652        // create ancestor cell
    653653        C[0] = new f4_Cell(this, 0, genome, genome, NULL, 0, GeneProps::standard_values);
    654654        cell_count = 1;
     
    656656
    657657
    658 f4_Cells::f4_Cells(SString & genome, int nrepair)
    659 {
    660         repair = nrepair;
    661         errorcode = GENOPER_OK;
    662         errorpos = -1;
    663         repair_remove = NULL;
    664         repair_parent = NULL;
    665         repair_insert = NULL;
    666         tmpcel = NULL;
    667         f4rootnode = NULL;
    668 
    669         // transform geno from string to nodes
    670         f4rootnode = new f4_Node();
    671         int _ = 0;
    672         int res = f4_processRecur(genome.c_str(), _, f4rootnode);
    673         if (res || (f4rootnode->childCount() != 1))
    674         {
    675                 errorcode = GENOPER_OPFAIL;
    676                 errorpos = -1;
    677         }
    678 
    679         // create ancestor cell
    680         C[0] = new f4_Cell(this, 0, f4rootnode->child, f4rootnode->child, NULL, 0, GeneProps::standard_values);
    681         cell_count = 1;
    682 }
    683658
    684659f4_Cells::~f4_Cells()
    685660{
    686661        // release cells
    687         int i;
    688662        if (cell_count)
    689663        {
    690                 for (i = cell_count - 1; i >= 0; i--)
     664                for (int i = cell_count - 1; i >= 0; i--)
    691665                        delete C[i];
    692666                cell_count = 0;
    693667        }
    694         if (f4rootnode)
    695                 delete f4rootnode;
    696668}
    697669
     
    11301102int f4_Node::childCount()
    11311103{
    1132         if (child != NULL)
    1133         {
    1134                 if (child2 != NULL) return 2;
    1135                 else return 1;
    1136         }
    1137         else
    1138         {
    1139                 if (child2 != NULL) return 1;
    1140                 else return 0;
    1141         }
     1104        return int(child != NULL) + int(child2 != NULL); //0, 1 or 2
    11421105}
    11431106
     
    12561219        len = out.length();
    12571220        if (len > 1)
    1258                 if (out[len - 1] == '>') { (out.directWrite())[len - 1] = 0; out.endWrite(); }; //Macko 2023-04 TODO "can be omitted", but should we remove it as a rule even in generated genotypes? see if I can somehow detect junk characters after top-level '>' ends properly: /*4*/<X>N:N>whatever
     1221                if (out[len - 1] == '>') { (out.directWrite())[len - 1] = 0; out.endWrite(); }; //Macko 2023-04 "can be omitted", but it is removed as a rule even in generated genotypes :)
    12591222        // copy back to string
    12601223        // if new is longer, reallocate buf
     
    13051268        while (pos_inout < (int)strlen(genot))
    13061269        {
    1307                 //#define PRINT_PARSING_LOCATION
     1270//#define PRINT_PARSING_LOCATION
    13081271#ifdef PRINT_PARSING_LOCATION
    13091272                printf("%s\n", genot);
     
    13421305                case '#':
    13431306                {
    1344                         // repetition marker, 1 by default
    1345                         ExtValue val;
    1346                         const char* end = val.parseNumber(genot + pos_inout + 1, ExtPType::TInt);
    1347                         //TODO end==NULL? -> error!
    1348                         int reps = (end == NULL) ? 1 : val.getInt();
     1307                        // repetition marker
     1308                        ExtValue reps;
     1309                        const char* end = reps.parseNumber(genot + pos_inout + 1, ExtPType::TInt);
     1310                        if (end == NULL)
     1311                                return pos_inout + 1; //error
    13491312                        f4_Node *node = new f4_Node("#", par, pos_inout);
    1350                         node->reps = reps;
     1313                        node->reps = reps.getInt();
    13511314                        // skip number
    13521315                        pos_inout += end - (genot + pos_inout);
     
    14551418}
    14561419
     1420int f4_process(const char *genot, f4_Node *root)
     1421{
     1422        int pos = 0;
     1423        int res = f4_processRecur(genot, pos, root);
     1424        if (res > 0)
     1425                return res; //error
     1426        else if (genot[pos] == 0) //parsed until the end - OK!
     1427                return 0;
     1428        else return pos + 1; //junk, unparsed genes after successful parsing, for example /*4*/<X>N:N>whatever or /*4*/<X>X>>>
     1429}
     1430
    14571431const char* parseConnection(const char *fragm, int& relfrom, double &weight)
    14581432{
  • cpp/frams/genetics/f4/f4_general.h

    r1230 r1231  
    242242         * Constructor taking genotype in a form of a tree.
    243243         * @param genome genotype tree
    244          * @param nrepair 0 if nothing to repair
    245          */
    246         f4_Cells(f4_Node *genome, int nrepair);
    247 
    248         /**
    249          * Constructor taking genotype in a form of a string.
    250          * @param genome genotype string
    251          * @param nrepair 0 if nothing to repair
    252          */
    253         f4_Cells(SString &genome, int nrepair);
     244         * @param nrepair false if nothing to repair
     245         */
     246        f4_Cells(f4_Node *genome, bool nrepair);
    254247
    255248        /**
     
    336329private:
    337330        // for error reporting / genotype fixing
    338         int repair;
     331        bool repair;
    339332        int errorcode;
    340333        int errorpos;
     
    343336        f4_Node *repair_insert;
    344337        void toF1GenoRec(int curc, SString &out);
    345         f4_Cell *tmpcel;                // needed by toF1Geno
    346         f4_Node *f4rootnode;          // used by constructor
     338        f4_Cell *tmpcel;  // needed by toF1Geno
    347339};
    348340
     
    477469 * semantic aspect is neuron class name extraction, where the GenoOperators
    478470 * class is used to parse the potential neuron class name.
    479  * @param genot the string holding all the genotype
    480  * @param pos0 the current position of processing in string
     471 * This is an internal function; for regular cases, use f4_process().
     472 * @param genot the string with the entire genotype
     473 * @param pos_inout the current position of processing in string (advanced by the function)
    481474 * @param parent current parent of the analysed branch of the genotype
    482475 * @return 0 if processing was successful, otherwise returns the position of an error in the genotype
    483476 */
    484477int f4_processRecur(const char *genot, int &pos_inout, f4_Node *parent);
     478
     479/**
     480 * A wrapper for f4_processRecur(). Creates a tree of f4_Node objects corresponding to
     481 * the provided genotype.
     482 * @param genot the string with the entire genotype
     483 * @param root root of the tree corresponding to the genotype
     484 * @return 0 if processing was successful, otherwise returns the position of an error in the genotype
     485 */
     486int f4_process(const char *genot, f4_Node *root);
    485487
    486488/**
  • cpp/frams/genetics/f4/f4_oper.cpp

    r1230 r1231  
    1111// may help, but it would be better to improve the source code to make genetic operators neutral in terms of genotype length. Adding such a penalty
    1212// removes "work in progress" changes in genotypes thus promoting immediate, straightforward improvements while hindering slower, multifaceted progress.
    13 // TODO getting rid of redundancy (having valid genotypes with a lot of "junk code") in this representation looks like a good idea.
     13// TODO getting rid of redundancy (valid genotypes with a lot of "junk code") in this representation looks like a good idea; many improvements to this end have already been done in April & May 2023.
    1414//
    15 // Note: symbols after the last > are ignored, for example /*4*/<X>N:N>blablaN:N[2:-0.5]XXXwhatever but since they are not parsed into the f4_Node tree, they will be lost after any mutation.
    1615//
    1716// TODO the behavior of neuron input indexes during mutation seems badly implemented (see also TREAT_BAD_CONNECTIONS_AS_INVALID_GENO). Are they kept properly maintained when nodes are added and removed? This could be done well because during mutation we operate on the tree structure with cross-references between nodes (so they should not be affected by local changes in the tree), and then convert the tree back to string. Yet, the f4_Node.conn_from is an integer and these fields in nodes do not seem to be maintained on tree node adding/removal... change these integer offsets to references to node objects? But actually, do the offsets that constitute relative connection references concern the f4_Node tree structure (and all these sophisticated calculations of offsets during mutation are useful) or rather they concern the f4_Cells development? verify all situations in f4_Cell::oneStep(), case '['.
     
    8786}
    8887
    89 int Geno_f4::ValidateRec(f4_Node *geno, int retrycount) const
     88int Geno_f4::ValidateRecur(f4_Node *geno, int retrycount) const
    9089{
    9190        // ! the genotype is geno->child (not geno) !
    9291        // build from it with repair on
    9392
    94         f4_Cells cells(geno->child, 1);
     93        f4_Cells cells(geno->child, true);
    9594        cells.simulate();  //we should simulate?!
    9695
     
    109108                int res2 = GENOPER_OK;
    110109                if (retrycount > 0)
    111                         res2 = ValidateRec(geno, retrycount - 1);
     110                        res2 = ValidateRecur(geno, retrycount - 1);
    112111
    113112                if (res2 == GENOPER_OK) return GENOPER_REPAIR;
     
    123122        // convert geno to a tree, then try to validate
    124123        f4_Node root;
    125         int _ = 0;
    126         if (f4_processRecur(geno, _, &root) || root.childCount() != 1) return GENOPER_OK; // cannot repair
    127 
     124        int res = f4_process(geno, &root);
     125        if (res == 0 || root.childCount() != 1) return GENOPER_OK; // either parsing says the genotype is OK or the resulting tree will not be repairable (fatal flaw; root must have exactly one child) - do not attempt repair
     126
     127        // here we have a genotype with res>0 (for sure has some error) and root.childCount()==1 (still something was parsed into a tree)
    128128        const int VALIDATE_TRIALS = 20;
    129         if (ValidateRec(&root, VALIDATE_TRIALS) == GENOPER_REPAIR) // if repaired, make it back to string
     129        res = ValidateRecur(&root, VALIDATE_TRIALS);
     130        if (res != GENOPER_OPFAIL) // if repaired (GENOPER_REPAIR) or had no errors (GENOPER_OK, e.g. the genotype had some errors that were ignored during tree creation or had junk genes appended at the end, so the tree was OK but the genotype was not),
    130131        {
    131132                geno[0] = 0;
    132                 root.child->sprintAdj(geno);
     133                root.child->sprintAdj(geno); //make it back to string
    133134        }
    134135        return GENOPER_OK;
     
    139140{
    140141        f4_Node root;
    141         int _ = 0;
    142         int res = f4_processRecur(geno, _, &root);
     142        int res = f4_process(geno, &root);
    143143        if (res) return res;  // errorpos, >0
    144         if (root.childCount() != 1) return 1; //earlier: GENOPER_OPFAIL
    145         f4_Cells cells(root.child, 0);
     144        if (root.childCount() != 1) return 1; // fatal flaw; root must have exactly one child
     145        f4_Cells cells(root.child, false);
    146146        cells.simulate();
    147147        if (cells.getErrorCode() == GENOPER_OPFAIL || cells.getErrorCode() == GENOPER_REPAIR)
    148148        {
    149149                if (cells.getErrorPos() >= 0) return 1 + cells.getErrorPos();
    150                 else return 1; //earlier: GENOPER_OPFAIL;
     150                else return 1; //error, no known position
    151151        }
    152152        else return GENOPER_OK;
     
    525525                }
    526526                // try to validate it
    527                 res = ValidateRec(gcopy, 10);
     527                res = ValidateRecur(gcopy, 10);
    528528                // accept if it is OK, or was repaired
    529529                if (GENOPER_OK == res)
     
    552552{
    553553        f4_Node *root = new f4_Node;
    554         int _ = 0;
    555         if (f4_processRecur(g, _, root) || root->childCount() != 1)
     554        if (f4_process(g, root) || root->childCount() != 1)
    556555        {
    557556                delete root;
     
    677676
    678677        // convert genotype strings into tree structures
    679         int _1 = 0, _2 = 0;
    680         if (f4_processRecur(g1, _1, &root1) || (root1.childCount() != 1)) return GENOPER_OPFAIL;
    681         if (f4_processRecur(g2, _2, &root2) || (root2.childCount() != 1)) return GENOPER_OPFAIL;
     678        if (f4_process(g1, &root1) || (root1.childCount() != 1)) return GENOPER_OPFAIL;
     679        if (f4_process(g2, &root2) || (root2.childCount() != 1)) return GENOPER_OPFAIL;
    682680
    683681        // decide amounts of crossover, 0.1-0.9
  • cpp/frams/genetics/f4/f4_oper.h

    r1227 r1231  
    7272         * @return GENOOPER_OK if genotype is valid, GENOPER_REPAIR if genotype can be repaired, GENOPER_OPFAIL if genotype can't be repaired
    7373         */
    74         int  ValidateRec(f4_Node *geno, int retrycount) const;
     74        int  ValidateRecur(f4_Node *geno, int retrycount) const;
    7575
    7676        /**
Note: See TracChangeset for help on using the changeset viewer.