source: framspy/FramsticksLib.py @ 1292

Last change on this file since 1292 was 1265, checked in by Maciej Komosinski, 17 months ago

Added a function to assess the number of Parts, Joints, Neurons and Connections of a genotype, and a function to assess the fulfillment of complexity constraints of a genotype

File size: 21.9 KB
Line 
1from typing import List  # to be able to specify a type hint of list(something)
2import json
3import sys, os
4import argparse
5import numpy as np
6import frams
7
8
9class FramsticksLib:
10        """Communicates directly with Framsticks library (.dll or .so or .dylib).
11        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
12        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
13        You can even design and use in evolution your own genetic representation implemented entirely in python,
14        or access and control the simulation and simulated creatures step by step.
15
16        Should you want to modify or extend this class, first see and test the examples in frams-test.py.
17
18        You need to provide one or two parameters when you run this class: the path to Framsticks where .dll/.so/.dylib resides
19        and, optionally, the name of the Framsticks dll/so/dylib (if it is non-standard). See::
20                FramsticksLib.py -h"""
21
22        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
23        DETERMINISTIC: bool = False  # set to True to have the same results in each run
24
25        GENOTYPE_INVALID = "/*invalid*/"  # this is how genotype invalidity is represented in Framsticks
26        EVALUATION_SETTINGS_FILE = [  # all files MUST be compatible with the standard-eval expdef. The order they are loaded in is important!
27                "eval-allcriteria.sim",  # a good trade-off in performance sampling period ("perfperiod") for vertpos and velocity
28                # "deterministic.sim",  # turns off random noise (added for robustness) so that each evaluation yields identical performance values (causes "overfitting")
29                # "sample-period-2.sim", # short performance sampling period so performance (e.g. vertical position) is sampled more often
30                # "sample-period-longest.sim",  # increased performance sampling period so distance and velocity are measured rectilinearly
31        ]
32
33
34        # This function is not needed because in Python, "For efficiency reasons, each module is only imported once per interpreter session."
35        # @staticmethod
36        # def getFramsModuleInstance():
37        #       """If some other party needs access to the frams module to directly access or modify Framsticks objects,
38        #       use this function to avoid importing the "frams" module multiple times and avoid potentially initializing
39        #       it many times."""
40        #       return frams
41
42        def __init__(self, frams_path, frams_lib_name, sim_settings_files):
43                self.dissim_measure_density_distribution = None  # will be initialized only when necessary (for rare dissimilarity methods)
44
45                if frams_lib_name is None:
46                        frams.init(frams_path)  # could add support for setting alternative directories using -D and -d
47                else:
48                        frams.init(frams_path, "-L" + frams_lib_name)  # could add support for setting alternative directories using -D and -d
49
50                print('Available objects:', dir(frams))
51                print()
52
53                simplest = self.getSimplest("1")
54                if not (simplest == "X" and type(simplest) is str):
55                        raise RuntimeError('Failed getSimplest() test.')
56                if not (self.isValid(["X[0:0],", "X[0:0]", "X[1:0]"]) == [False, True, False]):
57                        raise RuntimeError('Failed isValid() test.')
58
59                if not self.DETERMINISTIC:
60                        frams.Math.randomize()
61                frams.Simulator.expdef = "standard-eval"  # this expdef (or fully compatible) must be used by EVALUATION_SETTINGS_FILE
62                if sim_settings_files is not None:
63                        self.EVALUATION_SETTINGS_FILE = sim_settings_files.split(";")  # override defaults. str becomes list
64                print('Basic tests OK. Using settings:', self.EVALUATION_SETTINGS_FILE)
65                print()
66
67                for simfile in self.EVALUATION_SETTINGS_FILE:
68                        ec = frams.MessageCatcher.new()  # catch potential errors, warnings, messages - just to detect if there are ERRORs
69                        ec.store = 2;  # store all, because they are caught by MessageCatcher and will not appear in output (which we want)
70                        frams.Simulator.ximport(simfile, 4 + 8 + 16)
71                        ec.close()
72                        print(ec.messages)  # output all caught messages
73                        if ec.error_count._value() > 0:
74                                raise ValueError("Problem while importing file '%s'" % simfile)  # make missing files or incorrect paths fatal because error messages are easy to overlook in output, and these errors would not prevent Framsticks simulator from performing genetic operations, starting and running in evaluate()
75
76
77        def getSimplest(self, genetic_format) -> str:
78                return frams.GenMan.getSimplest(genetic_format).genotype._string()
79
80
81        def getPJNC(self, genotype: str):
82                """
83                Returns the number of elements of a phenotype built from the provided genotype (without any simulation).
84
85                :param genotype: the genotype to assess
86                :return: a tuple of (numparts,numjoints,numneurons,numconnections) or None if the genotype is invalid.
87                """
88                model = frams.Model.newFromString(genotype)
89                if model.is_valid._int() == 0:
90                        return None
91                return (model.numparts._int(), model.numjoints._int(), model.numneurons._int(), model.numconnections._int())
92
93
94        def satisfiesConstraints(self, genotype: str, max_numparts: int, max_numjoints: int, max_numneurons: int, max_numconnections: int, max_numgenochars: int) -> bool:
95                """
96                Verifies if the genotype satisfies complexity constraints without actually simulating it.
97                For example, if the genotype represents a phenotype with 1000 Parts, it will be much faster to check it using this function than to simulate the resulting creature using evaluate() only to learn that the number of its Parts exceeds your defined limit.
98
99                :param genotype: the genotype to check
100                :return: False if any constraint is violated or the genotype is invalid, else True. The constraint value of None means no constraint.
101                """
102
103
104                def value_within_constraint(actual_value, constraint_value):
105                        if constraint_value is not None:
106                                if actual_value > constraint_value:
107                                        return False
108                        return True
109
110
111                PJNC = self.getPJNC(genotype)
112                if PJNC is None:
113                        return False  # Let's treat invalid genotypes as not satisfying constraints
114                P, J, N, C = PJNC
115
116                valid = True
117                valid &= value_within_constraint(len(genotype), max_numgenochars)
118                valid &= value_within_constraint(P, max_numparts)
119                valid &= value_within_constraint(J, max_numjoints)
120                valid &= value_within_constraint(N, max_numneurons)
121                valid &= value_within_constraint(C, max_numconnections)
122                return valid
123
124
125        def evaluate(self, genotype_list: List[str]):
126                """
127                Returns:
128                        List of dictionaries containing the performance of genotypes evaluated using self.EVALUATION_SETTINGS_FILE.
129                        Note that for whatever reason (e.g. incorrect genotype), the dictionaries you will get may be empty or
130                        partially empty and may not have the fields you expected, so handle such cases properly.
131                """
132                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
133
134                if not self.PRINT_FRAMSTICKS_OUTPUT:
135                        ec = frams.MessageCatcher.new()  # mute potential errors, warnings, messages
136                        ec.store = 2;  # store all, because they are caught by MessageCatcher and will not appear in output
137
138                frams.GenePools[0].clear()
139                for g in genotype_list:
140                        frams.GenePools[0].add(g)
141                frams.ExpProperties.evalsavefile = ""  # no need to store results in a file - we will get evaluations directly from Genotype's "data" field
142                frams.Simulator.init()
143                frams.Simulator.start()
144
145                # step = frams.Simulator.step  # cache reference to avoid repeated lookup in the loop (just for performance)
146                # while frams.Simulator.running._int():  # standard-eval.expdef sets running to 0 when the evaluation is complete
147                #       step()
148                frams.Simulator.eval("while(Simulator.running) Simulator.step();")  # fastest
149                # Timing for evaluating a single simple creature 100x:
150                # - python step without caching: 2.2s
151                # - python step with caching   : 1.6s
152                # - pure FramScript and eval() : 0.4s
153
154                if not self.PRINT_FRAMSTICKS_OUTPUT:
155                        ec.close()
156                        if ec.error_count._value() > 0:
157                                print(ec.messages)  # if errors occurred, output all caught messages for debugging
158                                raise RuntimeError("[ERROR] %d error(s) and %d warning(s) while evaluating %d genotype(s)" % (ec.error_count._value(), ec.warning_count._value(), len(genotype_list)))  # make errors fatal; by default they stop the simulation anyway so let's not use potentially incorrect or partial results and fix the cause first.
159
160                results = []
161                for g in frams.GenePools[0]:
162                        serialized_dict = frams.String.serialize(g.data[frams.ExpProperties.evalsavedata._value()])
163                        evaluations = json.loads(serialized_dict._string())  # Framsticks native ExtValue's get converted to native python types such as int, float, list, str.
164                        # now, for consistency with FramsticksCLI.py, add "num" and "name" keys that are missing because we got data directly from Genotype, not from the file produced by standard-eval.expdef's function printStats(). What we do below is what printStats() does.
165                        result = {"num": g.num._value(), "name": g.name._value(), "evaluations": evaluations}
166                        results.append(result)
167
168                return results
169
170
171        def mutate(self, genotype_list: List[str]) -> List[str]:
172                """
173                Returns:
174                        The genotype(s) of the mutated source genotype(s). self.GENOTYPE_INVALID for genotypes whose mutation failed (for example because the source genotype was invalid).
175                """
176                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
177
178                mutated = []
179                for g in genotype_list:
180                        mutated.append(frams.GenMan.mutate(frams.Geno.newFromString(g)).genotype._string())
181                if len(genotype_list) != len(mutated):
182                        raise RuntimeError("Submitted %d genotypes, received %d mutants" % (len(genotype_list), len(mutated)))
183                return mutated
184
185
186        def crossOver(self, genotype_parent1: str, genotype_parent2: str) -> str:
187                """
188                Returns:
189                        The genotype of the offspring. self.GENOTYPE_INVALID if the crossing over failed.
190                """
191                return frams.GenMan.crossOver(frams.Geno.newFromString(genotype_parent1), frams.Geno.newFromString(genotype_parent2)).genotype._string()
192
193
194        def dissimilarity(self, genotype_list: List[str], method: int) -> np.ndarray:
195                """
196                        :param method: -1 = genetic Levenshtein distance; 0, 1, 2 = phenetic dissimilarity (SimilMeasureGreedy, SimilMeasureHungarian, SimilMeasureDistribution); -2, -3 = phenetic density distribution (count, frequency).
197                        :return: A square array with dissimilarities of each pair of genotypes.
198                """
199                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
200
201                # if you want to override what EVALUATION_SETTINGS_FILE sets, you can do it below:
202                # frams.SimilMeasureHungarian.simil_partgeom = 1
203                # frams.SimilMeasureHungarian.simil_weightedMDS = 1
204
205                n = len(genotype_list)
206                square_matrix = np.zeros((n, n))
207
208                if method in (0, 1, 2):  # Framsticks phenetic dissimilarity methods
209                        frams.SimilMeasure.simil_type = method
210                        genos = []  # prepare an array of Geno objects so that we don't need to convert raw strings to Geno objects all the time in loops
211                        for g in genotype_list:
212                                genos.append(frams.Geno.newFromString(g))
213                        frams_evaluateDistance = frams.SimilMeasure.evaluateDistance  # cache function reference for better performance in loops
214                        for i in range(n):
215                                for j in range(n):  # maybe calculate only one triangle if you really need a 2x speedup
216                                        square_matrix[i][j] = frams_evaluateDistance(genos[i], genos[j])._double()
217                elif method == -1:
218                        import Levenshtein
219                        for i in range(n):
220                                for j in range(n):  # maybe calculate only one triangle if you really need a 2x speedup
221                                        square_matrix[i][j] = Levenshtein.distance(genotype_list[i], genotype_list[j])
222                elif method in (-2, -3):
223                        if self.dissim_measure_density_distribution is None:
224                                from dissimilarity.densityDistribution import DensityDistribution
225                                self.dissim_measure_density_distribution = DensityDistribution(frams)
226                        self.dissim_measure_density_distribution.frequency = (method == -3)
227                        square_matrix = self.dissim_measure_density_distribution.getDissimilarityMatrix(genotype_list)
228                else:
229                        raise ValueError("Don't know what to do with dissimilarity method = %d" % method)
230
231                for i in range(n):
232                        assert square_matrix[i][i] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
233                non_symmetric_diff = square_matrix - square_matrix.T
234                non_symmetric_count = np.count_nonzero(non_symmetric_diff)
235                if non_symmetric_count > 0:
236                        non_symmetric_diff_abs = np.abs(non_symmetric_diff)
237                        max_pos1d = np.argmax(non_symmetric_diff_abs)  # location of the largest discrepancy
238                        max_pos2d_XY = np.unravel_index(max_pos1d, non_symmetric_diff_abs.shape)  # 2D coordinates of the largest discrepancy
239                        max_pos2d_YX = max_pos2d_XY[1], max_pos2d_XY[0]  # 2D coordinates of the largest discrepancy mirror
240                        worst_guy_XY = square_matrix[max_pos2d_XY]  # this distance and the other below (its mirror) are most different
241                        worst_guy_YX = square_matrix[max_pos2d_YX]
242                        print("[WARN] Dissimilarity matrix: expecting symmetry, but %g out of %d pairs were asymmetrical, max difference was %g (%g %%)" %
243                              (non_symmetric_count / 2,
244                               n * (n - 1) / 2,
245                               non_symmetric_diff_abs[max_pos2d_XY],
246                               non_symmetric_diff_abs[max_pos2d_XY] * 100 / ((worst_guy_XY + worst_guy_YX) / 2)))  # max diff is not necessarily max %
247                return square_matrix
248
249
250        def getRandomGenotype(self, initial_genotype: str, parts_min: int, parts_max: int, neurons_min: int, neurons_max: int, iter_max: int, return_even_if_failed: bool):
251                """
252                Some algorithms require a "random solution". To this end, this method generates a random framstick genotype.
253
254                :param initial_genotype: if not a specific genotype (which could facilitate greater variability of returned genotypes), try `getSimplest(format)`.
255                :param iter_max: how many mutations can be used to generate a random genotype that fullfills target numbers of parts and neurons.
256                :param return_even_if_failed: if the target numbers of parts and neurons was not achieved, return the closest genotype that was found? Set it to False first to experimentally adjust `iter_max` so that in most calls this function returns a genotype with target numbers of parts and neurons, and then you can set this parameter to True if target numbers of parts and neurons are not absolutely required.
257                :returns: a valid genotype or None if failed and `return_even_if_failed` is False.
258                """
259
260
261                def estimate_diff(g: str):
262                        if not self.isValidCreature([g])[0]:
263                                return None, None
264                        m = frams.Model.newFromString(g)
265                        numparts = m.numparts._value()
266                        numneurons = m.numneurons._value()
267                        diff_parts = abs(target_parts - numparts)
268                        diff_neurons = abs(target_neurons - numneurons)
269                        in_target_range = (parts_min <= numparts <= parts_max) and (neurons_min <= numneurons <= neurons_max)  # less demanding than precisely reaching target_parts and target_neurons
270                        return diff_parts + diff_neurons, in_target_range
271
272
273                # try to find a genotype that matches the number of parts and neurons randomly selected from the provided min..max range
274                # (even if we fail to match this precise target, the goal will be achieved if the found genotype manages to be within min..max ranges for parts and neurons)
275                target_parts = np.random.default_rng().integers(parts_min, parts_max + 1)
276                target_neurons = np.random.default_rng().integers(neurons_min, neurons_max + 1)
277
278                if not self.isValidCreature([initial_genotype])[0]:
279                        raise ValueError("Initial genotype '%s' is invalid" % initial_genotype)
280
281                g = initial_genotype
282                for i in range(iter_max // 2):  # a sequence of iter_max/2 undirected mutations starting from initial_genotype
283                        g_new = self.mutate([g])[0]
284                        if self.isValidCreature([g_new])[0]:  # valid mutation
285                                g = g_new
286
287                best_diff, best_in_target_range = estimate_diff(g)
288                for i in range(iter_max // 2):  # a sequence of iter_max/2 mutations, only accepting those which approach target numbers of parts and neurons
289                        g_new = self.mutate([g])[0]
290                        diff, in_target_range = estimate_diff(g_new)
291                        if diff is not None and diff <= best_diff:  # valid mutation and better or as good as current
292                                g = g_new
293                                best_diff = diff
294                                best_in_target_range = in_target_range
295                # print(diff, best_diff) # print progress approaching target numbers of parts and neurons
296
297                if best_in_target_range or return_even_if_failed:
298                        return g  # best found so far (closest to target numbers of parts and neurons)
299                return None
300
301
302        def isValid(self, genotype_list: List[str]) -> List[bool]:
303                """
304                :returns: genetic validity (i.e., not based on trying to build creatures from provided genotypes). For a more thorough check, see isValidCreature().
305                """
306                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
307                valid = []
308                for g in genotype_list:
309                        valid.append(frams.Geno.newFromString(g).is_valid._int() == 1)
310                if len(genotype_list) != len(valid):
311                        raise RuntimeError("Tested %d genotypes, received %d validity values" % (len(genotype_list), len(valid)))
312                return valid
313
314
315        def isValidCreature(self, genotype_list: List[str]) -> List[bool]:
316                """
317                :returns: validity of the genotype when revived. Apart from genetic validity, this includes detecting problems that may arise when building a Creature from Genotype, such as multiple muscles of the same type in the same location in body, e.g. 'X[@][@]'.
318                """
319
320                # Genetic validity and simulator validity are two separate properties (in particular, genetic validity check is implemented by the author of a given genetic format and operators).
321                # Thus, the subset of genotypes valid genetically and valid in simulation may be overlapping.
322                # For example, 'X[]' or 'Xr' are considered invalid by the genetic checker, but the f1->f0 converter will ignore meaningless genes and produce a valid f0 genotype.
323                # On the other hand, 'X[@][@]' or 'X[|][|]' are valid genetically, but not possible to simulate.
324                # For simplicity of usage (so that one does not need to check both properties separately using both functions), let's make one validity a subset of the other.
325                # The genetic check in the first lines of the "for" loop makes this function at least as demanding as isValid().
326
327                assert isinstance(genotype_list, list)  # because in python, str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
328
329                pop = frams.Populations[0]  # assuming rules from population #0 (self-colision settings are population-dependent and can influence creature build success/failure)
330
331                valid = []
332                for g in genotype_list:
333                        if frams.Geno.newFromString(g).is_valid._int() != 1:
334                                valid.append(False)  # invalid according to genetic check
335                        else:
336                                can_add = pop.canAdd(g, 1, 1)  # First "1" means to treat warnings during build as build failures - this allows detecting problems when building Creature from Genotype. Second "1" means mute emitted errors, warnings, messages. Returns 1 (ok, could add) or 0 (there were some problems building Creature from Genotype)
337                                valid.append(can_add._int() == 1)
338
339                if len(genotype_list) != len(valid):
340                        raise RuntimeError("Tested %d genotypes, received %d validity values" % (len(genotype_list), len(valid)))
341                return valid
342
343
344def parseArguments():
345        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
346        parser.add_argument('-path', type=ensureDir, required=True, help='Path to the Framsticks library (.dll or .so or .dylib) without trailing slash.')
347        parser.add_argument('-lib', required=False, help='Library name. If not given, "frams-objects.dll" (or .so or .dylib) is assumed depending on the platform.')
348        parser.add_argument('-simsettings', required=False, help="The name of the .sim file with settings for evaluation, mutation, crossover, and similarity estimation. If not given, \"eval-allcriteria.sim\" is assumed by default. Must be compatible with the \"standard-eval\" expdef. If you want to provide more files, separate them with a semicolon ';'.")
349        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
350        return parser.parse_args()
351
352
353def ensureDir(string):
354        if os.path.isdir(string):
355                return string
356        else:
357                raise NotADirectoryError(string)
358
359
360if __name__ == "__main__":
361        # A demo run.
362
363        # TODO ideas:
364        # - check_validity with three levels (invalid, corrected, valid)
365        # - a pool of binaries running simultaneously, balance load - in particular evaluation
366
367        parsed_args = parseArguments()
368        framsLib = FramsticksLib(parsed_args.path, parsed_args.lib, parsed_args.simsettings)
369
370        print("Sending a direct command to Framsticks library that calculates \"4\"+2 yields", frams.Simulator.eval("return \"4\"+2;"))
371
372        simplest = framsLib.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
373        print("\tSimplest genotype:", simplest)
374        parent1 = framsLib.mutate([simplest])[0]
375        parent2 = parent1
376        MUTATE_COUNT = 10
377        for x in range(MUTATE_COUNT):  # example of a chain of 10 mutations
378                parent2 = framsLib.mutate([parent2])[0]
379        print("\tParent1 (mutated simplest):", parent1)
380        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
381        offspring = framsLib.crossOver(parent1, parent2)
382        print("\tCrossover (Offspring):", offspring)
383        print('\tDissimilarity of Parent1 and Offspring:', framsLib.dissimilarity([parent1, offspring], 1)[0, 1])
384        print('\tPerformance of Offspring:', framsLib.evaluate([offspring]))
385        print('\tValidity (genetic) of Parent1, Parent 2, and Offspring:', framsLib.isValid([parent1, parent2, offspring]))
386        print('\tValidity (simulation) of Parent1, Parent 2, and Offspring:', framsLib.isValidCreature([parent1, parent2, offspring]))
387        print('\tValidity (constraints) of Offspring:', framsLib.satisfiesConstraints(offspring, 2, None, 5, 10, None))
388        print('\tRandom genotype:', framsLib.getRandomGenotype(simplest, 2, 6, 2, 4, 100, True))
Note: See TracBrowser for help on using the repository browser.