Changeset 1177


Ignore:
Timestamp:
04/26/22 00:52:58 (3 years ago)
Author:
Maciej Komosinski
Message:

FramsticksLib?.dissimilarity() now has a mandatory argument to select a method of dissimilarity calculation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • framspy/FramsticksLib.py

    r1170 r1177  
    5757                print('OK.')
    5858                if not self.DETERMINISTIC:
    59                         frams.Math.randomize();
     59                        frams.Math.randomize()
    6060                frams.Simulator.expdef = "standard-eval"  # this expdef (or fully compatible) must be used by EVALUATION_SETTINGS_FILE
    6161                if sim_settings_files is not None:
     
    7878                        partially empty and may not have the fields you expected, so handle such cases properly.
    7979                """
    80                 assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
     80                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
    8181
    8282                if not self.PRINT_FRAMSTICKS_OUTPUT:
     
    107107                for g in frams.GenePools[0]:
    108108                        serialized_dict = frams.String.serialize(g.data[frams.ExpProperties.evalsavedata._value()])
    109                         evaluations = json.loads(serialized_dict._string()) # Framsticks native ExtValue's get converted to native python types such as int, float, list, str.
     109                        evaluations = json.loads(serialized_dict._string())  # Framsticks native ExtValue's get converted to native python types such as int, float, list, str.
    110110                        # now, for consistency with FramsticksCLI.py, add "num" and "name" keys that are missing because we got data directly from Genotype, not from the file produced by standard-eval.expdef's function printStats(). What we do below is what printStats() does.
    111111                        result = {"num": g.num._value(), "name": g.name._value(), "evaluations": evaluations}
     
    120120                        The genotype(s) of the mutated source genotype(s). self.GENOTYPE_INVALID for genotypes whose mutation failed (for example because the source genotype was invalid).
    121121                """
    122                 assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
     122                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
    123123
    124124                mutated = []
     
    137137
    138138
    139         def dissimilarity(self, genotype_list: List[str]) -> np.ndarray:
    140                 """
    141                 Returns:
    142                         A square array with dissimilarities of each pair of genotypes.
    143                 """
    144                 assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
     139        def dissimilarity(self, genotype_list: List[str], method: int) -> np.ndarray:
     140                """
     141                        :param method: -1 = genetic Levenshtein distance; 0, 1, 2 = phenetic dissimilarity (SimilMeasureGreedy, SimilMeasureHungarian, SimilMeasureDistribution)
     142                        :return: A square array with dissimilarities of each pair of genotypes.
     143                """
     144                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
    145145
    146146                # if you want to override what EVALUATION_SETTINGS_FILE sets, you can do it below:
    147                 # frams.SimilMeasure.simil_type = 1
    148147                # frams.SimilMeasureHungarian.simil_partgeom = 1
    149148                # frams.SimilMeasureHungarian.simil_weightedMDS = 1
     
    151150                n = len(genotype_list)
    152151                square_matrix = np.zeros((n, n))
    153                 genos = []  # prepare an array of Geno objects so that we don't need to convert raw strings to Geno objects all the time in loops
    154                 for g in genotype_list:
    155                         genos.append(frams.Geno.newFromString(g))
    156                 frams_evaluateDistance = frams.SimilMeasure.evaluateDistance  # cache function reference for better performance in loops
    157                 for i in range(n):
    158                         for j in range(n):  # maybe calculate only one triangle if you really need a 2x speedup
    159                                 square_matrix[i][j] = frams_evaluateDistance(genos[i], genos[j])._double()
     152
     153                if method in (0, 1, 2):  # Framsticks phenetic dissimilarity methods
     154                        frams.SimilMeasure.simil_type = method
     155                        genos = []  # prepare an array of Geno objects so that we don't need to convert raw strings to Geno objects all the time in loops
     156                        for g in genotype_list:
     157                                genos.append(frams.Geno.newFromString(g))
     158                        frams_evaluateDistance = frams.SimilMeasure.evaluateDistance  # cache function reference for better performance in loops
     159                        for i in range(n):
     160                                for j in range(n):  # maybe calculate only one triangle if you really need a 2x speedup
     161                                        square_matrix[i][j] = frams_evaluateDistance(genos[i], genos[j])._double()
     162                elif method == -1:
     163                        import Levenshtein
     164                        for i in range(n):
     165                                for j in range(n):  # maybe calculate only one triangle if you really need a 2x speedup
     166                                        square_matrix[i][j] = Levenshtein.distance(genotype_list[i], genotype_list[j])
     167                else:
     168                        raise Exception("Don't know what to do with dissimilarity method = %d" % method)
    160169
    161170                for i in range(n):
     
    165174                if non_symmetric_count > 0:
    166175                        non_symmetric_diff_abs = np.abs(non_symmetric_diff)
    167                         max_pos1d = np.argmax(non_symmetric_diff_abs)  # location of largest discrepancy
    168                         max_pos2d_XY = np.unravel_index(max_pos1d, non_symmetric_diff_abs.shape)  # 2D coordinates of largest discrepancy
    169                         max_pos2d_YX = max_pos2d_XY[1], max_pos2d_XY[0]  # 2D coordinates of largest discrepancy mirror
     176                        max_pos1d = np.argmax(non_symmetric_diff_abs)  # location of the largest discrepancy
     177                        max_pos2d_XY = np.unravel_index(max_pos1d, non_symmetric_diff_abs.shape)  # 2D coordinates of the largest discrepancy
     178                        max_pos2d_YX = max_pos2d_XY[1], max_pos2d_XY[0]  # 2D coordinates of the largest discrepancy mirror
    170179                        worst_guy_XY = square_matrix[max_pos2d_XY]  # this distance and the other below (its mirror) are most different
    171180                        worst_guy_YX = square_matrix[max_pos2d_YX]
     
    179188
    180189        def isValid(self, genotype_list: List[str]) -> List[bool]:
    181                 assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
     190                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
    182191                valid = []
    183192                for g in genotype_list:
     
    226235        offspring = framsLib.crossOver(parent1, parent2)
    227236        print("\tCrossover (Offspring):", offspring)
    228         print('\tDissimilarity of Parent1 and Offspring:', framsLib.dissimilarity([parent1, offspring])[0, 1])
     237        print('\tDissimilarity of Parent1 and Offspring:', framsLib.dissimilarity([parent1, offspring], 1)[0, 1])
    229238        print('\tPerformance of Offspring:', framsLib.evaluate([offspring]))
    230239        print('\tValidity of Parent1, Parent 2, and Offspring:', framsLib.isValid([parent1, parent2, offspring]))
Note: See TracChangeset for help on using the changeset viewer.