Changeset 1243
- Timestamp:
- 05/19/23 19:12:22 (20 months ago)
- Location:
- cpp/frams/genetics
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
cpp/frams/genetics/genooperators.cpp
r1241 r1243 472 472 } 473 473 474 string GenoOperators::simplifiedModifiers_rR(const string& str) 475 { 476 int R = 0; //positive means more 'R', negative means more 'r' 477 for (char c : str) 478 { 479 if (c == 'R') R++; else 480 if (c == 'r') R--; 481 } 482 R %= 8; // 8 * 45 degrees = 360 degrees. After this, we get R=-7..+7 483 484 /* now, simplify homogeneous sequences of rR longer than 4: for example, rrrrr == RRR and RRRRRR == rr 485 -7 1 486 -6 2 487 -5 3 488 -4 -4 (or 4; we choose +4 meaning we will never see rrrr) 489 -3..3 (no changes) 490 4 4 (or -4) 491 5 -3 492 6 -2 493 7 -1 494 */ 495 if (R <= -4) R += 8; //-4 => +4 496 else if (R >= 5) R -= 8; 497 498 return R == 0 ? "" : (R > 0 ? string(R, 'R') : string(-R, 'r')); 499 } 500 474 501 //#include <cassert> 475 502 string GenoOperators::simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts) 476 503 { 477 // assert(strlen(str_of_char_pairs) == char_counts.size());478 // assert(char_counts.size() % 2 == 0);479 const int MAX_NUMBER_SAME_TYPE = 8; // max. number of modifiers of each type (case-sensitive) - mainly for rR, even though for rR, 4 would be sufficient if we assume lower or upper can be chosen as required for minimal length , e.g. rrrrr==RRR, RRRRRR==rr504 // assert(strlen(str_of_char_pairs) == char_counts.size()); 505 // assert(char_counts.size() % 2 == 0); 506 const int MAX_NUMBER_SAME_TYPE = 8; // max. number of modifiers of each type (case-sensitive) - mainly for rR, even though for rR, 4 would be sufficient if we assume lower or upper can be chosen as required for minimal length just as simplifiedModifiers_rR() does, e.g. rrrrr==RRR, RRRRRR==rr 480 507 string simplified; 481 //#define CLUMP_IDENTICAL_MODIFIERS // not good because with the exception of rR properties are calculated incrementally, non-linearly, and their values are updated after each modifier character, so these values may for example saturate after a large number of identical modifier symbols. The order of modifiers is (with the exception of rR) relevant and extreme values of properties increase this relevance, so better keep the modifiers dispersed.508 //#define CLUMP_IDENTICAL_MODIFIERS //if GeneProps::normalizeBiol4() is used, this is not good because properties are calculated incrementally, non-linearly, their values are updated after each modifier character and some properties interact with each other due to normalization so they can saturate when clumped, therefore it is better keep the modifiers dispersed to equalize their effects 482 509 #ifdef CLUMP_IDENTICAL_MODIFIERS 483 510 for (size_t i = 0; i < strlen(str_of_char_pairs); i++) … … 510 537 string GenoOperators::simplifiedModifiers(const string & original) 511 538 { 512 const int MAX_NUMBER_SAME_TYPE = 6; // max. number of modifiers of each type (case-insensitive). rR could be treated separately in simplification because their influence follows different (i.e., simple additive) logic - so the simplifiedModifiersFixedOrder() logic with cancelling out is appropriate for rR. However in this function, making no exception to rR does not cause any harm to these modifiers either - the only consequence is that we will not remove antagonistic letters and will not simplify sequences of rR longer than 4, while they could be simplified (e.g. rrrrr==RRR, RRRRRR==rr).539 const int MAX_NUMBER_SAME_TYPE = 6; // max. number of modifiers of each type (case-insensitive). rR is treated separately in simplification because their influence follows different (i.e., simple additive) logic - so the simplifiedModifiersFixedOrder() logic with cancelling out antagonistic modifiers is appropriate for rR. 513 540 int counter[256] = {}; //initialize with zeros; 256 is unnecessarily too big and redundant, but enables very fast access (indexed directly by the ascii code) 514 541 string simplified = ""; 515 for (int i = original.size() - 1; i >= 0; i--) //iterate from end to begin -easier to remove "oldest" = first modifiers542 for (int i = original.size() - 1; i >= 0; i--) //iterate from end to begin so it is easier to remove "oldest" = first modifiers 516 543 { 517 544 unsigned char c = original[i]; 518 if (!std::isalpha(c) )545 if (!std::isalpha(c) || c == 'r' || c == 'R') //ignore non-alphabet characters; also, 'r' and 'R' are handled separately by simplifiedModifiers_rR() 519 546 continue; 520 547 unsigned char lower = std::tolower(c); … … 524 551 } 525 552 std::reverse(simplified.begin(), simplified.end()); //"simplified" was built in reverse order, so need to restore the order that corresponds to "original" 526 return simplified ;527 } 553 return simplifiedModifiers_rR(original) + simplified; 554 } -
cpp/frams/genetics/genooperators.h
r1241 r1243 218 218 219 219 static int getRandomChar(const char *choices, const char *excluded); ///<returns index of a random character from \a choices excluding \a excluded, or -1 when everything is excluded or \a choices is empty. 220 static string simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts); ///<returns a sequence of chars from \a str_of_char_pairs based on how many times each char occurred in \a char_counts. Assume that an even-index char and the following odd-index char have the opposite influence, so they cancel out. We don't use this function, because a fixed order imposed by this function means that the number of different parameter values produced by a sequence of modifiers is lowered (N same-letter upper- and lower-case chars yield only 2*N different values). Due to how modifiers work, the effect of aaA, aAa, Aaa etc. is different (N same-letter upper- and lower-case chars yield 2^N different values), so simplifying modifiers should not impose any order and should not interfere with their original order - see \a simplifiedModifiers(). 220 static string simplifiedModifiers_rR(const string& str); ///<finds all 'r' and 'R' in \a str and returns the shortest sequence of 'r' and 'R that is equivalent to all these found in \a str. 221 static string simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts); ///<returns a sequence of chars from \a str_of_char_pairs based on how many times each char occurred in \a char_counts. Assume that an even-index char and the following odd-index char have the opposite influence, so they cancel out. We don't use this function, because a fixed order imposed by this function means that the number of different parameter values produced by a sequence of modifiers is lowered (N same-letter upper- and lower-case chars yield only 2*N different values). Due to how modifiers work, the effect of aaA, aAa, Aaa etc. is different (N same-letter upper- and lower-case chars yield 2^N different values), so simplifying modifiers should not impose any order, should not interfere with their original order, and should not cancel out antagonistic modifiers - see \a simplifiedModifiers() and geneprops_test.cpp. 221 222 //@} 222 223 static string simplifiedModifiers(const string &original); ///<from the \a original sequence removes modifiers that are too numerous (exceeding a defined threshold number), starting the removal from the leftmost (="oldest" when interpreting the sequence from left to right) ones.
Note: See TracChangeset
for help on using the changeset viewer.