source: framspy/FramsticksLib.py @ 1309

Last change on this file since 1309 was 1309, checked in by Maciej Komosinski, 8 days ago

Print genotypes that may have caused problems when being evaluated - useful when reproducing the problem

File size: 22.7 KB
Line 
1from typing import List  # to be able to specify a type hint of list(something)
2from enum import Enum, auto, unique
3import json
4import sys, os
5import argparse
6import numpy as np
7import frams
8
9
10@unique
11class DissimMethod(Enum):  # values assigned to fields are irrelevant, hence auto()
12        GENE_LEVENSHTEIN = auto()  # genetic Levenshtein distance
13        PHENE_STRUCT_GREEDY = auto()  # phenetic, graph structure, fast but approximate
14        PHENE_STRUCT_OPTIM = auto()  # phenetic, graph structure, slower for complex creatures but optimal
15        PHENE_DESCRIPTORS = auto()  # phenetic, shape descriptors
16        PHENE_DENSITY_COUNT = auto()  # phenetic, density distribution, count of samples
17        PHENE_DENSITY_FREQ = auto()  # phenetic, density distribution, frequency of count of samples
18        FITNESS = auto()  # fitness value
19
20
21
22class FramsticksLib:
23        """Communicates directly with Framsticks library (.dll or .so or .dylib).
24        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
25        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
26        You can even design and use in evolution your own genetic representation implemented entirely in python,
27        or access and control the simulation and simulated creatures step by step.
28
29        Should you want to modify or extend this class, first see and test the examples in frams-test.py.
30
31        You need to provide one or two parameters when you run this class: the path to Framsticks where .dll/.so/.dylib resides
32        and, optionally, the name of the Framsticks dll/so/dylib (if it is non-standard). See::
33                FramsticksLib.py -h"""
34
35        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
36        DETERMINISTIC: bool = False  # set to True to have the same results in each run
37
38        GENOTYPE_INVALID = "/*invalid*/"  # this is how genotype invalidity is represented in Framsticks
39        EVALUATION_SETTINGS_FILE = [  # all files MUST be compatible with the standard-eval expdef. The order they are loaded in is important!
40                "eval-allcriteria.sim",  # a good trade-off in performance sampling period ("perfperiod") for vertpos and velocity
41                # "deterministic.sim",  # turns off random noise (added for robustness) so that each evaluation yields identical performance values (causes "overfitting")
42                # "sample-period-2.sim", # short performance sampling period so performance (e.g. vertical position) is sampled more often
43                # "sample-period-longest.sim",  # increased performance sampling period so distance and velocity are measured rectilinearly
44        ]
45
46
47        # This function is not needed because in Python, "For efficiency reasons, each module is only imported once per interpreter session."
48        # @staticmethod
49        # def getFramsModuleInstance():
50        #       """If some other party needs access to the frams module to directly access or modify Framsticks objects,
51        #       use this function to avoid importing the "frams" module multiple times and avoid potentially initializing
52        #       it many times."""
53        #       return frams
54
55        def __init__(self, frams_path, frams_lib_name, sim_settings_files):
56                self.dissim_measure_density_distribution = None  # will be initialized only when necessary (for rare dissimilarity methods)
57
58                if frams_lib_name is None:
59                        frams.init(frams_path)  # could add support for setting alternative directories using -D and -d
60                else:
61                        frams.init(frams_path, "-L" + frams_lib_name)  # could add support for setting alternative directories using -D and -d
62
63                print('Available objects:', dir(frams))
64                print()
65
66                simplest = self.getSimplest("1")
67                if not (simplest == "X" and type(simplest) is str):
68                        raise RuntimeError('Failed getSimplest() test.')
69                if not (self.isValid(["X[0:0],", "X[0:0]", "X[1:0]"]) == [False, True, False]):
70                        raise RuntimeError('Failed isValid() test.')
71
72                if not self.DETERMINISTIC:
73                        frams.Math.randomize()
74                frams.Simulator.expdef = "standard-eval"  # this expdef (or fully compatible) must be used by EVALUATION_SETTINGS_FILE
75                if sim_settings_files is not None:
76                        self.EVALUATION_SETTINGS_FILE = sim_settings_files.split(";")  # override defaults. str becomes list
77                print('Basic tests OK. Using settings:', self.EVALUATION_SETTINGS_FILE)
78                print()
79
80                for simfile in self.EVALUATION_SETTINGS_FILE:
81                        ec = frams.MessageCatcher.new()  # catch potential errors, warnings, messages - just to detect if there are ERRORs
82                        ec.store = 2  # store all, because they are caught by MessageCatcher and will not appear in output (which we want)
83                        frams.Simulator.ximport(simfile, 4 + 8 + 16)
84                        ec.close()
85                        print(ec.messages)  # output all caught messages
86                        if ec.error_count._value() > 0:
87                                raise ValueError("Problem while importing file '%s'" % simfile)  # make missing files or incorrect paths fatal because error messages are easy to overlook in output, and these errors would not prevent Framsticks simulator from performing genetic operations, starting and running in evaluate()
88
89
90        def getSimplest(self, genetic_format: str) -> str:
91                return frams.GenMan.getSimplest(genetic_format).genotype._string()
92
93
94        def getPJNC(self, genotype: str):
95                """
96                Returns the number of elements of a phenotype built from the provided genotype (without any simulation).
97
98                :param genotype: the genotype to assess
99                :return: a tuple of (numparts,numjoints,numneurons,numconnections) or None if the genotype is invalid.
100                """
101                model = frams.Model.newFromString(genotype)
102                if model.is_valid._int() == 0:
103                        return None
104                return (model.numparts._int(), model.numjoints._int(), model.numneurons._int(), model.numconnections._int())
105
106
107        def satisfiesConstraints(self, genotype: str, max_numparts: int, max_numjoints: int, max_numneurons: int, max_numconnections: int, max_numgenochars: int) -> bool:
108                """
109                Verifies if the genotype satisfies complexity constraints without actually simulating it.
110                For example, if the genotype represents a phenotype with 1000 Parts, it will be much faster to check it using this function than to simulate the resulting creature using evaluate() only to learn that the number of its Parts exceeds your defined limit.
111
112                :param genotype: the genotype to check
113                :return: False if any constraint is violated or the genotype is invalid, else True. The constraint value of None means no constraint.
114                """
115
116
117                def value_within_constraint(actual_value, constraint_value):
118                        if constraint_value is not None:
119                                if actual_value > constraint_value:
120                                        return False
121                        return True
122
123
124                PJNC = self.getPJNC(genotype)
125                if PJNC is None:
126                        return False  # Let's treat invalid genotypes as not satisfying constraints
127                P, J, N, C = PJNC
128
129                valid = True
130                valid &= value_within_constraint(len(genotype), max_numgenochars)
131                valid &= value_within_constraint(P, max_numparts)
132                valid &= value_within_constraint(J, max_numjoints)
133                valid &= value_within_constraint(N, max_numneurons)
134                valid &= value_within_constraint(C, max_numconnections)
135                return valid
136
137
138        def evaluate(self, genotype_list: List[str]):
139                """
140                Returns:
141                        List of dictionaries containing the performance of genotypes evaluated using self.EVALUATION_SETTINGS_FILE.
142                        Note that for whatever reason (e.g. incorrect genotype), the dictionaries you will get may be empty or
143                        partially empty and may not have the fields you expected, so handle such cases properly.
144                """
145                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
146
147                if not self.PRINT_FRAMSTICKS_OUTPUT:
148                        ec = frams.MessageCatcher.new()  # mute potential errors, warnings, messages
149                        ec.store = 2  # store all, because they are caught by MessageCatcher and will not appear in output
150
151                frams.GenePools[0].clear()
152                for g in genotype_list:
153                        frams.GenePools[0].add(g)
154                frams.ExpProperties.evalsavefile = ""  # no need to store results in a file - we will get evaluations directly from Genotype's "data" field
155                frams.Simulator.init()
156                frams.Simulator.start()
157
158                # step = frams.Simulator.step  # cache reference to avoid repeated lookup in the loop (just for performance)
159                # while frams.Simulator.running._int():  # standard-eval.expdef sets running to 0 when the evaluation is complete
160                #       step()
161                frams.Simulator.eval("while(Simulator.running) Simulator.step();")  # fastest
162                # Timing for evaluating a single simple creature 100x:
163                # - python step without caching: 2.2s
164                # - python step with caching   : 1.6s
165                # - pure FramScript and eval() : 0.4s
166
167                if not self.PRINT_FRAMSTICKS_OUTPUT:
168                        ec.close()
169                        if ec.error_count._value() > 0:
170                                print('\nErrors while evaluating this genotype list:\n',genotype_list,sep='\t')
171                                print(ec.messages)  # if errors occurred, output all caught messages for debugging
172                                raise RuntimeError("[ERROR] %d error(s) and %d warning(s) while evaluating %d genotype(s)" % (ec.error_count._value(), ec.warning_count._value(), len(genotype_list)))  # make errors fatal; by default they stop the simulation anyway so let's not use potentially incorrect or partial results and fix the cause first.
173
174                results = []
175                for g in frams.GenePools[0]:
176                        serialized_dict = frams.String.serialize(g.data[frams.ExpProperties.evalsavedata._value()])
177                        evaluations = json.loads(serialized_dict._string())  # Framsticks native ExtValue's get converted to native python types such as int, float, list, str.
178                        # now, for consistency with FramsticksCLI.py, add "num" and "name" keys that are missing because we got data directly from Genotype, not from the file produced by standard-eval.expdef's function printStats(). What we do below is what printStats() does.
179                        result = {"num": g.num._value(), "name": g.name._value(), "evaluations": evaluations}
180                        results.append(result)
181
182                return results
183
184
185        def mutate(self, genotype_list: List[str]) -> List[str]:
186                """
187                Returns:
188                        The genotype(s) of the mutated source genotype(s). self.GENOTYPE_INVALID for genotypes whose mutation failed (for example because the source genotype was invalid).
189                """
190                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
191
192                mutated = []
193                for g in genotype_list:
194                        mutated.append(frams.GenMan.mutate(frams.Geno.newFromString(g)).genotype._string())
195                if len(genotype_list) != len(mutated):
196                        raise RuntimeError("Submitted %d genotypes, received %d mutants" % (len(genotype_list), len(mutated)))
197                return mutated
198
199
200        def crossOver(self, genotype_parent1: str, genotype_parent2: str) -> str:
201                """
202                Returns:
203                        The genotype of the offspring. self.GENOTYPE_INVALID if the crossing over failed.
204                """
205                return frams.GenMan.crossOver(frams.Geno.newFromString(genotype_parent1), frams.Geno.newFromString(genotype_parent2)).genotype._string()
206
207
208        def dissimilarity(self, genotype_list: List[str], method: DissimMethod) -> np.ndarray:
209                """
210                        :param method, see DissimMethod.
211                        :return: A square array with dissimilarities of each pair of genotypes.
212                """
213                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
214
215                # if you want to override what EVALUATION_SETTINGS_FILE sets, you can do it below:
216                # frams.SimilMeasureHungarian.simil_partgeom = 1
217                # frams.SimilMeasureHungarian.simil_weightedMDS = 1
218
219                n = len(genotype_list)
220                square_matrix = np.zeros((n, n))
221
222                if method in (DissimMethod.PHENE_STRUCT_GREEDY, DissimMethod.PHENE_STRUCT_OPTIM, DissimMethod.PHENE_DESCRIPTORS):  # Framsticks phenetic dissimilarity methods
223                        frams.SimilMeasure.simil_type = 0 if method == DissimMethod.PHENE_STRUCT_GREEDY else 1 if method == DissimMethod.PHENE_STRUCT_OPTIM else 2
224                        genos = []  # prepare an array of Geno objects so that we don't need to convert raw strings to Geno objects all the time in loops
225                        for g in genotype_list:
226                                genos.append(frams.Geno.newFromString(g))
227                        frams_evaluateDistance = frams.SimilMeasure.evaluateDistance  # cache function reference for better performance in loops
228                        for i in range(n):
229                                for j in range(n):  # maybe calculate only one triangle if you really need a 2x speedup
230                                        square_matrix[i][j] = frams_evaluateDistance(genos[i], genos[j])._double()
231                elif method == DissimMethod.GENE_LEVENSHTEIN:
232                        import Levenshtein
233                        for i in range(n):
234                                for j in range(n):  # maybe calculate only one triangle if you really need a 2x speedup
235                                        square_matrix[i][j] = Levenshtein.distance(genotype_list[i], genotype_list[j])
236                elif method in (DissimMethod.PHENE_DENSITY_COUNT, DissimMethod.PHENE_DENSITY_FREQ):
237                        if self.dissim_measure_density_distribution is None:
238                                from dissimilarity.density_distribution import DensityDistribution
239                                self.dissim_measure_density_distribution = DensityDistribution(frams)
240                        self.dissim_measure_density_distribution.frequency = (method == DissimMethod.PHENE_DENSITY_FREQ)
241                        square_matrix = self.dissim_measure_density_distribution.getDissimilarityMatrix(genotype_list)
242                else:
243                        raise ValueError("Don't know what to do with dissimilarity method = %s" % method)
244
245                for i in range(n):
246                        assert square_matrix[i][i] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
247                non_symmetric_diff = square_matrix - square_matrix.T
248                non_symmetric_count = np.count_nonzero(non_symmetric_diff)
249                if non_symmetric_count > 0:
250                        non_symmetric_diff_abs = np.abs(non_symmetric_diff)
251                        max_pos1d = np.argmax(non_symmetric_diff_abs)  # location of the largest discrepancy
252                        max_pos2d_XY = np.unravel_index(max_pos1d, non_symmetric_diff_abs.shape)  # 2D coordinates of the largest discrepancy
253                        max_pos2d_YX = max_pos2d_XY[1], max_pos2d_XY[0]  # 2D coordinates of the largest discrepancy mirror
254                        worst_guy_XY = square_matrix[max_pos2d_XY]  # this distance and the other below (its mirror) are most different
255                        worst_guy_YX = square_matrix[max_pos2d_YX]
256                        print("[WARN] Dissimilarity matrix: expecting symmetry, but %g out of %d pairs were asymmetrical, max difference was %g (%g %%)" %
257                              (non_symmetric_count / 2,
258                               n * (n - 1) / 2,
259                               non_symmetric_diff_abs[max_pos2d_XY],
260                               non_symmetric_diff_abs[max_pos2d_XY] * 100 / ((worst_guy_XY + worst_guy_YX) / 2)))  # max diff is not necessarily max %
261                return square_matrix
262
263
264        def getRandomGenotype(self, initial_genotype: str, parts_min: int, parts_max: int, neurons_min: int, neurons_max: int, iter_max: int, return_even_if_failed: bool):
265                """
266                Some algorithms require a "random solution". To this end, this method generates a random framstick genotype.
267
268                :param initial_genotype: if not a specific genotype (which could facilitate greater variability of returned genotypes), try `getSimplest(format)`.
269                :param iter_max: how many mutations can be used to generate a random genotype that fullfills target numbers of parts and neurons.
270                :param return_even_if_failed: if the target numbers of parts and neurons was not achieved, return the closest genotype that was found? Set it to False first to experimentally adjust `iter_max` so that in most calls this function returns a genotype with target numbers of parts and neurons, and then you can set this parameter to True if target numbers of parts and neurons are not absolutely required.
271                :returns: a valid genotype or None if failed and `return_even_if_failed` is False.
272                """
273
274
275                def estimate_diff(g: str):
276                        if not self.isValidCreature([g])[0]:
277                                return None, None
278                        m = frams.Model.newFromString(g)
279                        numparts = m.numparts._value()
280                        numneurons = m.numneurons._value()
281                        diff_parts = abs(target_parts - numparts)
282                        diff_neurons = abs(target_neurons - numneurons)
283                        in_target_range = (parts_min <= numparts <= parts_max) and (neurons_min <= numneurons <= neurons_max)  # less demanding than precisely reaching target_parts and target_neurons
284                        return diff_parts + diff_neurons, in_target_range
285
286
287                # try to find a genotype that matches the number of parts and neurons randomly selected from the provided min..max range
288                # (even if we fail to match this precise target, the goal will be achieved if the found genotype manages to be within min..max ranges for parts and neurons)
289                target_parts = np.random.default_rng().integers(parts_min, parts_max + 1)
290                target_neurons = np.random.default_rng().integers(neurons_min, neurons_max + 1)
291
292                if not self.isValidCreature([initial_genotype])[0]:
293                        raise ValueError("Initial genotype '%s' is invalid" % initial_genotype)
294
295                g = initial_genotype
296                for i in range(iter_max // 2):  # a sequence of iter_max/2 undirected mutations starting from initial_genotype
297                        g_new = self.mutate([g])[0]
298                        if self.isValidCreature([g_new])[0]:  # valid mutation
299                                g = g_new
300
301                best_diff, best_in_target_range = estimate_diff(g)
302                for i in range(iter_max // 2):  # a sequence of iter_max/2 mutations, only accepting those which approach target numbers of parts and neurons
303                        g_new = self.mutate([g])[0]
304                        diff, in_target_range = estimate_diff(g_new)
305                        if diff is not None and diff <= best_diff:  # valid mutation and better or as good as current
306                                g = g_new
307                                best_diff = diff
308                                best_in_target_range = in_target_range
309                # print(diff, best_diff) # print progress approaching target numbers of parts and neurons
310
311                if best_in_target_range or return_even_if_failed:
312                        return g  # best found so far (closest to target numbers of parts and neurons)
313                return None
314
315
316        def isValid(self, genotype_list: List[str]) -> List[bool]:
317                """
318                :returns: genetic validity (i.e., not based on trying to build creatures from provided genotypes). For a more thorough check, see isValidCreature().
319                """
320                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
321                valid = []
322                for g in genotype_list:
323                        valid.append(frams.Geno.newFromString(g).is_valid._int() == 1)
324                if len(genotype_list) != len(valid):
325                        raise RuntimeError("Tested %d genotypes, received %d validity values" % (len(genotype_list), len(valid)))
326                return valid
327
328
329        def isValidCreature(self, genotype_list: List[str]) -> List[bool]:
330                """
331                :returns: validity of the genotype when revived. Apart from genetic validity, this includes detecting problems that may arise when building a Creature from Genotype, such as multiple muscles of the same type in the same location in body, e.g. 'X[@][@]'.
332                """
333
334                # Genetic validity and simulator validity are two separate properties (in particular, genetic validity check is implemented by the author of a given genetic format and operators).
335                # Thus, the subset of genotypes valid genetically and valid in simulation may be overlapping.
336                # For example, 'X[]' or 'Xr' are considered invalid by the genetic checker, but the f1->f0 converter will ignore meaningless genes and produce a valid f0 genotype.
337                # On the other hand, 'X[@][@]' or 'X[|][|]' are valid genetically, but not possible to simulate.
338                # For simplicity of usage (so that one does not need to check both properties separately using both functions), let's make one validity a subset of the other.
339                # The genetic check in the first lines of the "for" loop makes this function at least as demanding as isValid().
340
341                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
342
343                pop = frams.Populations[0]  # assuming rules from population #0 (self-colision settings are population-dependent and can influence creature build success/failure)
344
345                valid = []
346                for g in genotype_list:
347                        if frams.Geno.newFromString(g).is_valid._int() != 1:
348                                valid.append(False)  # invalid according to genetic check
349                        else:
350                                can_add = pop.canAdd(g, 1, 1)  # First "1" means to treat warnings during build as build failures - this allows detecting problems when building Creature from Genotype. Second "1" means mute emitted errors, warnings, messages. Returns 1 (ok, could add) or 0 (there were some problems building Creature from Genotype)
351                                valid.append(can_add._int() == 1)
352
353                if len(genotype_list) != len(valid):
354                        raise RuntimeError("Tested %d genotypes, received %d validity values" % (len(genotype_list), len(valid)))
355                return valid
356
357
358def parseArguments():
359        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
360        parser.add_argument('-path', type=ensureDir, required=True, help='Path to the Framsticks library (.dll or .so or .dylib) without trailing slash.')
361        parser.add_argument('-lib', required=False, help='Library name. If not given, "frams-objects.dll" (or .so or .dylib) is assumed depending on the platform.')
362        parser.add_argument('-simsettings', required=False, help="The name of the .sim file with settings for evaluation, mutation, crossover, and similarity estimation. If not given, \"eval-allcriteria.sim\" is assumed by default. Must be compatible with the \"standard-eval\" expdef. If you want to provide more files, separate them with a semicolon ';'.")
363        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
364        return parser.parse_args()
365
366
367def ensureDir(string):
368        if os.path.isdir(string):
369                return string
370        else:
371                raise NotADirectoryError(string)
372
373
374if __name__ == "__main__":
375        # A demo run.
376
377        # TODO ideas:
378        # - check_validity with three levels (invalid, corrected, valid)
379        # - a pool of binaries running simultaneously, balance load - in particular evaluation
380
381        parsed_args = parseArguments()
382        framsLib = FramsticksLib(parsed_args.path, parsed_args.lib, parsed_args.simsettings)
383
384        print("Sending a direct command to Framsticks library that calculates \"4\"+2 yields", frams.Simulator.eval("return \"4\"+2;"))
385
386        simplest = framsLib.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
387        print("\tSimplest genotype:", simplest)
388        parent1 = framsLib.mutate([simplest])[0]
389        parent2 = parent1
390        MUTATE_COUNT = 10
391        for x in range(MUTATE_COUNT):  # example of a chain of 10 mutations
392                parent2 = framsLib.mutate([parent2])[0]
393        print("\tParent1 (mutated simplest):", parent1)
394        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
395        offspring = framsLib.crossOver(parent1, parent2)
396        print("\tCrossover (Offspring):", offspring)
397        print('\tDissimilarity of Parent1 and Offspring:', framsLib.dissimilarity([parent1, offspring], DissimMethod.PHENE_STRUCT_OPTIM)[0, 1])
398        print('\tPerformance of Offspring:', framsLib.evaluate([offspring]))
399        print('\tValidity (genetic) of Parent1, Parent 2, and Offspring:', framsLib.isValid([parent1, parent2, offspring]))
400        print('\tValidity (simulation) of Parent1, Parent 2, and Offspring:', framsLib.isValidCreature([parent1, parent2, offspring]))
401        print('\tValidity (constraints) of Offspring:', framsLib.satisfiesConstraints(offspring, 2, None, 5, 10, None))
402        print('\tRandom genotype:', framsLib.getRandomGenotype(simplest, 2, 6, 2, 4, 100, True))
Note: See TracBrowser for help on using the repository browser.