Changeset 1243 for cpp/frams/genetics/genooperators.cpp
- Timestamp:
- 05/19/23 19:12:22 (12 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
cpp/frams/genetics/genooperators.cpp
r1241 r1243 472 472 } 473 473 474 string GenoOperators::simplifiedModifiers_rR(const string& str) 475 { 476 int R = 0; //positive means more 'R', negative means more 'r' 477 for (char c : str) 478 { 479 if (c == 'R') R++; else 480 if (c == 'r') R--; 481 } 482 R %= 8; // 8 * 45 degrees = 360 degrees. After this, we get R=-7..+7 483 484 /* now, simplify homogeneous sequences of rR longer than 4: for example, rrrrr == RRR and RRRRRR == rr 485 -7 1 486 -6 2 487 -5 3 488 -4 -4 (or 4; we choose +4 meaning we will never see rrrr) 489 -3..3 (no changes) 490 4 4 (or -4) 491 5 -3 492 6 -2 493 7 -1 494 */ 495 if (R <= -4) R += 8; //-4 => +4 496 else if (R >= 5) R -= 8; 497 498 return R == 0 ? "" : (R > 0 ? string(R, 'R') : string(-R, 'r')); 499 } 500 474 501 //#include <cassert> 475 502 string GenoOperators::simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts) 476 503 { 477 // assert(strlen(str_of_char_pairs) == char_counts.size());478 // assert(char_counts.size() % 2 == 0);479 const int MAX_NUMBER_SAME_TYPE = 8; // max. number of modifiers of each type (case-sensitive) - mainly for rR, even though for rR, 4 would be sufficient if we assume lower or upper can be chosen as required for minimal length , e.g. rrrrr==RRR, RRRRRR==rr504 // assert(strlen(str_of_char_pairs) == char_counts.size()); 505 // assert(char_counts.size() % 2 == 0); 506 const int MAX_NUMBER_SAME_TYPE = 8; // max. number of modifiers of each type (case-sensitive) - mainly for rR, even though for rR, 4 would be sufficient if we assume lower or upper can be chosen as required for minimal length just as simplifiedModifiers_rR() does, e.g. rrrrr==RRR, RRRRRR==rr 480 507 string simplified; 481 //#define CLUMP_IDENTICAL_MODIFIERS // not good because with the exception of rR properties are calculated incrementally, non-linearly, and their values are updated after each modifier character, so these values may for example saturate after a large number of identical modifier symbols. The order of modifiers is (with the exception of rR) relevant and extreme values of properties increase this relevance, so better keep the modifiers dispersed.508 //#define CLUMP_IDENTICAL_MODIFIERS //if GeneProps::normalizeBiol4() is used, this is not good because properties are calculated incrementally, non-linearly, their values are updated after each modifier character and some properties interact with each other due to normalization so they can saturate when clumped, therefore it is better keep the modifiers dispersed to equalize their effects 482 509 #ifdef CLUMP_IDENTICAL_MODIFIERS 483 510 for (size_t i = 0; i < strlen(str_of_char_pairs); i++) … … 510 537 string GenoOperators::simplifiedModifiers(const string & original) 511 538 { 512 const int MAX_NUMBER_SAME_TYPE = 6; // max. number of modifiers of each type (case-insensitive). rR could be treated separately in simplification because their influence follows different (i.e., simple additive) logic - so the simplifiedModifiersFixedOrder() logic with cancelling out is appropriate for rR. However in this function, making no exception to rR does not cause any harm to these modifiers either - the only consequence is that we will not remove antagonistic letters and will not simplify sequences of rR longer than 4, while they could be simplified (e.g. rrrrr==RRR, RRRRRR==rr).539 const int MAX_NUMBER_SAME_TYPE = 6; // max. number of modifiers of each type (case-insensitive). rR is treated separately in simplification because their influence follows different (i.e., simple additive) logic - so the simplifiedModifiersFixedOrder() logic with cancelling out antagonistic modifiers is appropriate for rR. 513 540 int counter[256] = {}; //initialize with zeros; 256 is unnecessarily too big and redundant, but enables very fast access (indexed directly by the ascii code) 514 541 string simplified = ""; 515 for (int i = original.size() - 1; i >= 0; i--) //iterate from end to begin -easier to remove "oldest" = first modifiers542 for (int i = original.size() - 1; i >= 0; i--) //iterate from end to begin so it is easier to remove "oldest" = first modifiers 516 543 { 517 544 unsigned char c = original[i]; 518 if (!std::isalpha(c) )545 if (!std::isalpha(c) || c == 'r' || c == 'R') //ignore non-alphabet characters; also, 'r' and 'R' are handled separately by simplifiedModifiers_rR() 519 546 continue; 520 547 unsigned char lower = std::tolower(c); … … 524 551 } 525 552 std::reverse(simplified.begin(), simplified.end()); //"simplified" was built in reverse order, so need to restore the order that corresponds to "original" 526 return simplified ;527 } 553 return simplifiedModifiers_rR(original) + simplified; 554 }
Note: See TracChangeset
for help on using the changeset viewer.