Changeset 1273 for cpp/frams/genetics/fB


Ignore:
Timestamp:
09/09/23 15:10:49 (8 months ago)
Author:
Maciej Komosinski
Message:

fH, fB, fL: improved default parameter values, syntax coloring and code logic

Location:
cpp/frams/genetics/fB
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • cpp/frams/genetics/fB/fB_oper.cpp

    r1130 r1273  
    11// This file is a part of Framsticks SDK.  http://www.framsticks.com/
    2 // Copyright (C) 1999-2021  Maciej Komosinski and Szymon Ulatowski.
     2// Copyright (C) 1999-2023  Maciej Komosinski and Szymon Ulatowski.
    33// See LICENSE.txt for details.
     4
     5//TODO parsing quotes/neurons seems too relaxed, for example the genotype aag"S""acalaaaafbc is considered valid
     6//TODO Same with numbers: "----1.23" is valid
     7//TODO reconsider: Horizontal gene transfer - copying a single random gene from each parent to the beginning of the other parent: should the gene be copied (seems to cause bloat!) or rather moved?
     8//Neurons ("N") can grow even without using quotes and providing neuron classname in the genotype, for example aaaaabbccvaaapdgfddaalaandwddbaajt (this works likely as designed, but investigate and reconsider); also valid neuron definitions inside the genotype are sometimes not expressed
    49
    510#include <frams/util/sstring.h>
     
    1924        { "Genetics: fB: Mutation", },
    2025        { "Genetics: fB: Crossover", },
    21         { "fB_mut_substitution", 1, 0, "Substitution", "f 0 1 0.6", FIELD(mutationprobs[FB_SUBSTITUTION]), "Probability of mutation by changing single random letter in genotype", },
    22         { "fB_mut_insertion", 1, 0, "Insertion", "f 0 1 0.095", FIELD(mutationprobs[FB_INSERTION]), "Probability of mutation by inserting characters in random place of genotype", },
    23         { "fB_mut_nclassins", 1, 0, "Insertion of neuron class definition", "f 0 1 0.005", FIELD(mutationprobs[FB_NCLASSINS]), "Probability of mutation by inserting neuron class definition in random place of genotype", },
    24         { "fB_mut_deletion", 1, 0, "Deletion", "f 0 1 0.1", FIELD(mutationprobs[FB_DELETION]), "Probability of mutation by deleting random characters in genotype", },
    25         { "fB_mut_duplication", 1, 0, "Duplication", "f 0 1 0.0", FIELD(mutationprobs[FB_DUPLICATION]), "Probability of mutation by copying single *gene* of genotype and appending it to the beginning of this genotype", },
    26         { "fB_mut_translocation", 1, 0, "Translocation", "f 0 1 0.15", FIELD(mutationprobs[FB_TRANSLOCATION]), "Probability of mutation by replacing two substrings in genotype", },
    27         { "fB_cross_gene_transfer", 2, 0, "Horizontal gene transfer", "f 0 1 0.0", FIELD(crossoverprobs[FB_GENE_TRANSFER]), "Probability of crossing over by transferring single genes from both parents to beginning of each other", },
    28         { "fB_cross_crossover", 2, 0, "Crossing over", "f 0 1 1.0", FIELD(crossoverprobs[FB_CROSSING_OVER]), "Probability of crossing over by random distribution of genes from both parents to both children", },
     26        { "fB_mut_substitute", 1, 0, "Substitution", "f 0 100 1", FIELD(mutationprobs[FB_SUBSTITUTION]), "Relative probability of changing a single random character (or a neuron) in the genotype", },
     27        { "fB_mut_insert", 1, 0, "Insertion", "f 0 100 3", FIELD(mutationprobs[FB_INSERTION]), "Relative probability of inserting a random character in a random place of the genotype", },
     28        { "fB_mut_insert_neuron", 1, 0, "Insertion of a neuron", "f 0 100 3", FIELD(mutationprobs[FB_INSERTION_NEURON]), "Relative probability of inserting a neuron in a random place of genotype", },
     29        { "fB_mut_delete", 1, 0, "Deletion", "f 0 100 4", FIELD(mutationprobs[FB_DELETION]), "Relative probability of deleting a random character (or a neuron) in the genotype", },
     30        { "fB_mut_duplicate", 1, 0, "Duplication", "f 0 100 0", FIELD(mutationprobs[FB_DUPLICATION]), "Relative probability of copying a single *gene* of the genotype and appending it to the beginning of this genotype", },
     31        { "fB_mut_translocate", 1, 0, "Translocation", "f 0 100 4", FIELD(mutationprobs[FB_TRANSLOCATION]), "Relative probability of swapping two substrings in the genotype", },
     32        { "fB_cross_gene_transfer", 2, 0, "Horizontal gene transfer", "f 0 100 0", FIELD(crossoverprobs[FB_GENE_TRANSFER]), "Relative probability of crossing over by copying a single random gene from each parent to the beginning of the other parent", },
     33        { "fB_cross_crossover", 2, 0, "Crossing over", "f 0 100 100", FIELD(crossoverprobs[FB_CROSSING_OVER]), "Relative probability of crossing over by a random distribution of genes from both parents to both children", },
    2934        { 0, },
    3035};
     
    128133        if (!genotype.getNextToken(pos, strdims, '\n'))
    129134        {
    130                 return GENOPER_OPFAIL;
     135                return GENOPER_OK;
    131136        }
    132137        // parse dimension
     
    134139        if (!ExtValue::parseInt(strdims.c_str(), dims, true, false))
    135140        {
    136                 return GENOPER_OPFAIL;
     141                return GENOPER_OK;
    137142        }
    138143        SString line;
     
    171176                        else
    172177                        {
    173                                 return GENOPER_OPFAIL;
     178                                return GENOPER_OK;
    174179                        }
    175180                }
     
    247252        {
    248253                std::list<SString> tokenized = tokenizeSequence(line);
    249                 int rndid = rndUint(tokenized.size()); // select random letter from genotype
     254                int rndid = rndUint((int)tokenized.size()); // select random letter from genotype
    250255                // increment/decrement character - when overflow happens, this method
    251256                // uses the "reflect" approach
     
    282287                break;
    283288        }
    284         case FB_NCLASSINS:
     289        case FB_INSERTION_NEURON:
    285290        {
    286291                std::list<SString> tokenized = tokenizeSequence(line);
    287292                std::list<SString>::iterator it = tokenized.begin();
    288                 int rndid = rndUint(tokenized.size()); // select random insertion point
     293                int rndid = rndUint((int)tokenized.size()); // select random insertion point
    289294                std::advance(it, rndid);
    290295                NeuroClass *cls = getRandomNeuroClass(Model::SHAPETYPE_BALL_AND_STICK);
     
    307312                chg = 1.0 / line.length();
    308313                std::list<SString> tokenized = tokenizeSequence(line);
    309                 int rndid = rndUint(tokenized.size()); // select random insertion point
     314                int rndid = rndUint((int)tokenized.size()); // select random insertion point
    310315                std::list<SString>::iterator it = tokenized.begin();
    311316                std::advance(it, rndid);
     
    321326                std::list<SString> tokenized = tokenizeSequence(line);
    322327                std::list<SString>::iterator it = tokenized.begin();
    323                 int rndid = rndUint(tokenized.size()); // select random deletion point
     328                int rndid = rndUint((int)tokenized.size()); // select random deletion point
    324329                std::advance(it, rndid);
    325330                tokenized.erase(it);
     
    343348                for (int i = 0; i < 4; i++)
    344349                {
    345                         cuts[i] = rndUint(tokenized.size());
     350                        cuts[i] = rndUint((int)tokenized.size());
    346351                }
    347352                std::sort(cuts.begin(), cuts.end());
     
    407412        case FB_GENE_TRANSFER:
    408413        {
    409                 // get random gene from first parent
     414                // get a random gene from the first parent
    410415                int choice = rndUint(fB_GenoHelpers::geneCount(parent1));
    411416                int start, end;
     
    414419                child2 = gene + parent2;
    415420                chg2 = (float)parent2.length() / (float)child2.length();
    416                 // do the same for second parent
     421                // do the same for the second parent
    417422                choice = rndUint(fB_GenoHelpers::geneCount(parent2));
    418423                gene = fB_GenoHelpers::getGene(choice, parent2, start, end);
     
    554559                {
    555560                        pos--;
    556                         if (isdigit(geno[pos]) == 0)
    557                         {
    558                                 return GENSTYLE_CS(0, GENSTYLE_INVALID);
    559                         }
    560                 }
    561                 return GENSTYLE_RGBS(0, 0, 200, GENSTYLE_BOLD);
    562         }
    563         if (islower(ch) == 0)
    564         {
    565                 return GENSTYLE_CS(0, GENSTYLE_INVALID);
    566         }
    567         uint32_t style = GENSTYLE_CS(GENCOLOR_TEXT, GENSTYLE_NONE);
    568         if (ch == 'a' && pos > 0 && (geno[pos - 1] == 'a' || geno[pos - 1] == '\n'))
     561                        if (isdigit(geno[pos]) == 0) //going left we encountered some non-digit character
     562                        {
     563                                return GENSTYLE_CS(GENCOLOR_NUMBER, GENSTYLE_NONE); //so 'ch' is any digit in the genotype (neural property value etc.); for simplicity, digits as parts of neuroclass name or property name also get included here
     564                        }
     565                }
     566                return GENSTYLE_RGBS(0, 0, 200, GENSTYLE_BOLD); //only digits up to the beginning, so this is the dimensionality value
     567        }
     568        if (ch == '-' || ch == '.')
     569                return GENSTYLE_CS(GENCOLOR_NUMBER, GENSTYLE_NONE);
     570        if (ch == '"')
     571                return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_BOLD); //quotes encompass neuron definitions. To further distinguish the text inside quotes from the text outside quotes, we would need to determine the number of '"' from the beginning, i.e. linear search through the entire genotype. We don't want to do it - it would mean the complexity of len(geno)^2 if performed for each symbol in the genotype independently, like this function does. Below we perform an approximate partial scan.
     572        if (isupper(ch) || strchr("@|*", ch))
     573                return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_BOLD); //neuroclass
     574        if (strchr(":,=", ch))
     575                return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_NONE); //these symbols occur exclusively inside "...neuron...", so let's make the entire neuron section "...neuron..." more visually uniform by using the same violet color as the neuroclass name and quotes have
     576        if (islower(ch)) //how to color the current lower-case letter?
     577        {
     578                static const int SCAN_RANGE = 8; //how many characters before the current one to scan to discover some context and find out if we are likely in the neuroclass name or the property name. Reduces computational complexity. Example genotype fragments: abcabc"T:r=0.9, ry=4.088, rz=1.213"abcabc or abc"N:in=0.0, fo=0.17, si=999.0"abc
     579                int i = pos;
     580                while (i > 0 && pos - i < SCAN_RANGE)
     581                {
     582                        i--; //go back one char
     583                        if (isupper(geno[i]))
     584                                return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_BOLD); //neuroclass
     585                        if (geno[i] == ',' || geno[i] == ':') //this is what must occur before property name starts
     586                                return GENSTYLE_RGBS(255, 140, 0, GENSTYLE_BOLD); //property
     587                        if (!(isalpha(geno[i]) || isspace(geno[i]))) //going left we encountered any char that is not a letter or space
     588                                break;
     589                }
     590        }
     591
     592        uint32_t style = GENSTYLE_CS(GENCOLOR_TEXT, GENSTYLE_NONE); //if the current character did not fall into any of the above cases, assume default black style
     593        if (ch == 'a' && (geno[pos + 1] == 'a' || (pos > 0 && geno[pos - 1] == 'a'))) //start codon, "aa"
    569594        {
    570595                style = GENSTYLE_RGBS(0, 200, 0, GENSTYLE_BOLD);
    571596        }
    572         else if (ch == 'z' && pos > 0 && geno[pos - 1] == 'z')
     597        else if (ch == 'z' && (geno[pos + 1] == 'z' || (pos > 0 && geno[pos - 1] == 'z'))) //stop codon, "zz"
    573598        {
    574599                style = GENSTYLE_RGBS(200, 0, 0, GENSTYLE_BOLD);
  • cpp/frams/genetics/fB/fB_oper.h

    r821 r1273  
    11// This file is a part of Framsticks SDK.  http://www.framsticks.com/
    2 // Copyright (C) 1999-2018  Maciej Komosinski and Szymon Ulatowski.
     2// Copyright (C) 1999-2023  Maciej Komosinski and Szymon Ulatowski.
    33// See LICENSE.txt for details.
    44
     
    1111/** @name Codes for general fB mutation types */
    1212//@{
    13 #define FB_SUBSTITUTION  0 ///<Relative probability of mutation by changing single random letter in genotype (substitution)
    14 #define FB_INSERTION     1 ///<Relative probability of mutation by inserting characters in random place of genotype
    15 #define FB_NCLASSINS     2 ///<Relative probability of mutation by inserting neuron class definition in random place of genotype
    16 #define FB_DELETION      3 ///<Relative probability of mutation by deleting random characters in genotype
    17 #define FB_DUPLICATION   4 ///<Relative probability of mutation by copying single *gene* of genotype and appending it to the beginning of this genotype
    18 #define FB_TRANSLOCATION 5 ///<Relative probability of mutation by replacing two substrings in genotype
    19 #define FB_MUT_COUNT     6 ///<Count of mutation types
     13#define FB_SUBSTITUTION     0 ///<Relative probability of changing a single random character (or a neuron) in the genotype
     14#define FB_INSERTION        1 ///<Relative probability of inserting a random character in a random place of the genotype
     15#define FB_INSERTION_NEURON 2 ///<Relative probability of inserting a neuron in a random place of genotype
     16#define FB_DELETION         3 ///<Relative probability of deleting a random character (or a neuron) in the genotype
     17#define FB_DUPLICATION      4 ///<Relative probability of copying a single *gene* of the genotype and appending it to the beginning of this genotype
     18#define FB_TRANSLOCATION    5 ///<Relative probability of swapping two substrings in the genotype
     19#define FB_MUT_COUNT        6 ///<Count of mutation types
    2020//@}
    2121
    2222/** @name Codes for fB cross over types */
    2323//@{
    24 #define FB_GENE_TRANSFER 0 ///<Relative probability of crossing over by transferring single genes from both parents to beginning of each other
    25 #define FB_CROSSING_OVER 1 ///<Relative probability of crossing over by random distribution of genes from both parents to both children
     24#define FB_GENE_TRANSFER 0 ///<Relative probability of crossing over by copying a single random gene from each parent to the beginning of the other parent
     25#define FB_CROSSING_OVER 1 ///<Relative probability of crossing over by a random distribution of genes from both parents to both children
    2626#define FB_XOVER_COUNT   2 ///<Count of crossing over types
    2727//@}
     
    4848        int crossOver(char *&g1, char *&g2, float& chg1, float& chg2);
    4949
    50         virtual const char* getSimplest() { return "5\naaazz"; }
     50        virtual const char* getSimplest() { return "3\naaazz"; }
    5151
    5252        uint32_t style(const char *geno, int pos);
Note: See TracChangeset for help on using the changeset viewer.