source: framspy/FramsticksCLI.py @ 1331

Last change on this file since 1331 was 1105, checked in by Maciej Komosinski, 4 years ago

Using the "framsfiles" module to write genotypes to a file instead of an ad-hoc code

File size: 16.4 KB
Line 
1from subprocess import Popen, PIPE, check_output
2from enum import Enum
3from typing import List  # to be able to specify a type hint of list(something)
4from itertools import count  # for tracking multiple instances
5import json
6import sys, os
7import argparse
8import numpy as np
9from framsfiles import reader as framsreader
10from framsfiles import writer as framswriter
11
12
13class FramsticksCLI:
14        """Note: instead of this class, you should use the simpler, faster, and more reliable FramsticksLib.py.
15       
16        This class runs Framsticks CLI (command-line) executable and communicates with it using standard input and output.
17        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
18        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
19        You can even design and use in evolution your own genetic representation implemented entirely in python.
20
21        You need to provide one or two parameters when you run this class: the path to Framsticks CLI
22        and, optionally, the name of the Framsticks CLI executable (if it is non-standard). See::
23                FramsticksCLI.py -h"""
24
25        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
26        DETERMINISTIC: bool = False  # set to True to have the same results in each run
27
28        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
29        OUTPUT_DIR = "scripts_output"
30        GENOTYPE_INVALID = "/*invalid*/"  # this is how genotype invalidity is represented in Framsticks
31        STDOUT_ENDOPER_MARKER = "FileObject.write:"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
32
33        FILE_PREFIX = 'framspy_'
34
35        RANDOMIZE_CMD = "Math.randomize();"
36        SETEXPEDEF_CMD = "Simulator.expdef=\"standard-eval\";"
37        GETSIMPLEST_CMD = "getsimplest"
38        GETSIMPLEST_FILE = "simplest.gen"
39        EVALUATE_CMD = "evaluate eval-allcriteria.sim"  # the .sim file must be compatible with the standard-eval expdef
40        EVALUATE_FILE = "genos_eval.json"
41        CROSSOVER_CMD = "crossover"
42        CROSSOVER_FILE = "crossover_child.gen"
43        DISSIMIL_CMD = "dissimil"
44        DISSIMIL_FILE = "dissimilarity_matrix.tsv"  # tab-separated values
45        ISVALID_CMD = "isvalid_many"
46        ISVALID_FILE = "validity.txt"
47        MUTATE_CMD = "mutate_many"
48        MUTATE_FILE = "mutation_results.gen"
49
50        CLI_INPUT_FILE = "genotypes.gen"
51
52        _next_instance_id = count(0)  # "static" counter incremented when a new instance is created. Used to ensure unique filenames for each instance.
53
54
55        def __init__(self, framspath, framsexe, pid=""):
56                self.pid = pid if pid is not None else ""
57                self.id = next(FramsticksCLI._next_instance_id)
58                self.frams_path = framspath
59                self.frams_exe = framsexe if framsexe is not None else 'frams.exe' if os.name == "nt" else 'frams.linux'
60                self.writing_path = None
61                mainpath = os.path.join(self.frams_path, self.frams_exe)
62                exe_call = [mainpath, '-Q', '-s', '-c', '-icliutils.ini']  # -c will be ignored in Windows Framsticks (this option is meaningless because the Windows version does not support color console, so no need to deactivate this feature using -c)
63                exe_call_to_get_version = [mainpath, '-V']
64                exe_call_to_get_path = [mainpath, '-?']
65                try:
66                        print("\n".join(self.__readAllOutput(exe_call_to_get_version)))
67                        help = self.__readAllOutput(exe_call_to_get_path)
68                        for helpline in help:
69                                if 'dDIRECTORY' in helpline:
70                                        self.writing_path = helpline.split("'")[1]
71                except FileNotFoundError:
72                        print("Could not find Framsticks executable ('%s') in the given location ('%s')." % (self.frams_exe, self.frams_path))
73                        sys.exit(1)
74                print("Temporary files with results will be saved in detected writable working directory '%s'" % self.writing_path)
75                self.__spawnFramsticksCLI(exe_call)
76
77
78        def __readAllOutput(self, command):
79                frams_process = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE)
80                return [line.decode('utf-8').rstrip() for line in iter(frams_process.stdout.readlines())]
81
82
83        def __spawnFramsticksCLI(self, args):
84                # the child app (Framsticks CLI) should not buffer outputs and we need to immediately read its stdout, hence we use pexpect/wexpect
85                print('Spawning Framsticks CLI for continuous stdin/stdout communication... ', end='')
86                if os.name == "nt":  # Windows:
87                        import wexpect  # https://pypi.org/project/wexpect/
88                        # https://github.com/raczben/wexpect/tree/master/examples
89                        self.child = wexpect.spawn(' '.join(args))
90                else:
91                        import pexpect  # https://pexpect.readthedocs.io/en/stable/
92                        self.child = pexpect.spawn(' '.join(args))
93                self.child.setecho(False)  # ask the communication to not copy to stdout what we write to stdin
94                print('OK.')
95
96                self.__readFromFramsCLIUntil("UserScripts.autoload")
97                print('Performing a basic test 1/2... ', end='')
98                assert self.getSimplest("1") == "X"
99                print('OK.')
100                print('Performing a basic test 2/2... ', end='')
101                assert self.isValid(["X[0:0],", "X[0:0]", "X[1:0]"]) == [False, True, False]
102                print('OK.')
103                if not self.DETERMINISTIC:
104                        self.sendDirectCommand(self.RANDOMIZE_CMD)
105                self.sendDirectCommand(self.SETEXPEDEF_CMD)
106
107
108        def closeFramsticksCLI(self):
109                # End gracefully by sending end-of-file character: ^Z or ^D
110                # Without the -Q argument ("quiet mode"), Framsticks CLI would print "Shell closed." for goodbye.
111                self.child.sendline(chr(26 if os.name == "nt" else 4))
112
113
114        def __getPrefixedFilename(self, filename: str) -> str:
115                # Returns filename with unique instance id appended so there is no clash when many instances of this class use the same Framsticks CLI executable
116                return FramsticksCLI.FILE_PREFIX + self.pid + str(chr(ord('A') + self.id)) + '_' + filename
117
118
119        def __saveGenotypeToFile(self, genotype, name, mode, saveformat):
120                relname = self.__getPrefixedFilename(name)
121                absname = os.path.join(self.writing_path, relname)
122                if mode == 'd':  # special mode, 'delete'
123                        if os.path.exists(absname):
124                                os.remove(absname)
125                else:
126                        outfile = open(absname, mode)
127                        if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
128                                outfile.write(genotype)
129                        else:
130                                outfile.write(framswriter.from_collection({"_classname": "org", "genotype": genotype}))
131                                outfile.write("\n")
132                        outfile.close()
133                return relname, absname
134
135
136        def __readFromFramsCLIUntil(self, until_marker: str) -> str:
137                output = ""
138                while True:
139                        self.child.expect('\r\n' if os.name == "nt" else '\n')
140                        msg = str(self.child.before)
141                        if self.PRINT_FRAMSTICKS_OUTPUT or msg.startswith("[ERROR]") or msg.startswith("[CRITICAL]"):
142                                print(msg)
143                        if until_marker in msg:
144                                break
145                        else:
146                                output += msg + '\n'
147                return output
148
149
150        def __runCommand(self, command, genotypes, result_file_name, saveformat) -> List[str]:
151                filenames_rel = []  # list of file names with input data for the command
152                filenames_abs = []  # same list but absolute paths actually used
153                if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
154                        for i in range(len(genotypes)):
155                                # plain text format = must have a separate file for each genotype
156                                rel, abs = self.__saveGenotypeToFile(genotypes[i], "genotype" + str(i) + ".gen", "w", self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
157                                filenames_rel.append(rel)
158                                filenames_abs.append(abs)
159                elif saveformat == self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"]:
160                        self.__saveGenotypeToFile(None, self.CLI_INPUT_FILE, 'd', None)  # 'd'elete: ensure there is nothing left from the last run of the program because we "a"ppend to file in the loop below
161                        for i in range(len(genotypes)):
162                                rel, abs = self.__saveGenotypeToFile(genotypes[i], self.CLI_INPUT_FILE, "a", self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
163                        #  since we use the same file in the loop above, add this file only once (i.e., outside of the loop)
164                        filenames_rel.append(rel)
165                        filenames_abs.append(abs)
166
167                result_file_name = self.__getPrefixedFilename(result_file_name)
168                cmd = command + " " + " ".join(filenames_rel) + " " + result_file_name
169                self.child.sendline(cmd)
170                self.__readFromFramsCLIUntil(self.STDOUT_ENDOPER_MARKER)
171                filenames_abs.append(os.path.join(self.writing_path, self.OUTPUT_DIR, result_file_name))
172                return filenames_abs  # last element is a path to the file containing results
173
174
175        def __cleanUpCommandResults(self, filenames):
176                """Deletes files with results just created by the command."""
177                for name in filenames:
178                        os.remove(name)
179
180
181        sendDirectCommand_counter = count(0)  # an internal counter for the sendDirectCommand() method; should be static within that method but python does not allow
182
183
184        def sendDirectCommand(self, command: str) -> str:
185                """Sends any command to Framsticks CLI. Use when you know Framsticks and its scripting language, Framscript.
186
187                Returns:
188                        The output of the command, likely with extra \\n because for each entered command, Framsticks CLI responds with a (muted in Quiet mode) prompt and a \\n.
189                """
190                self.child.sendline(command.strip())
191                next(FramsticksCLI.sendDirectCommand_counter)
192                STDOUT_ENDOPER_MARKER = "uniqe-marker-" + str(FramsticksCLI.sendDirectCommand_counter)
193                self.child.sendline("Simulator.print(\"%s\");" % STDOUT_ENDOPER_MARKER)
194                return self.__readFromFramsCLIUntil(STDOUT_ENDOPER_MARKER)
195
196
197        def getSimplest(self, genetic_format) -> str:
198                files = self.__runCommand(self.GETSIMPLEST_CMD + " " + genetic_format + " ", [], self.GETSIMPLEST_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
199                with open(files[-1]) as f:
200                        genotype = "".join(f.readlines())
201                self.__cleanUpCommandResults(files)
202                return genotype
203
204
205        def evaluate(self, genotype_list: List[str]):
206                """
207                Returns:
208                        List of dictionaries containing the performance of genotypes evaluated with self.EVALUATE_COMMAND.
209                        Note that for whatever reason (e.g. incorrect genotype), the dictionaries you will get may be empty or
210                        partially empty and may not have the fields you expected, so handle such cases properly.
211                """
212                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
213                files = self.__runCommand(self.EVALUATE_CMD, genotype_list, self.EVALUATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
214                with open(files[-1]) as f:
215                        data = json.load(f)
216                if len(data) > 0:
217                        self.__cleanUpCommandResults(files)
218                        assert len(genotype_list) == len(data), f"After evaluating {len(genotype_list)} genotype(s) got {len(data)} result(s)."
219                        return data
220                else:
221                        print("Evaluating genotype: no performance data was returned in", self.EVALUATE_FILE)  # we do not delete files here
222                        return None
223
224
225        def mutate(self, genotype_list: List[str]) -> List[str]:
226                """
227                Returns:
228                        The genotype(s) of the mutated source genotype(s). self.GENOTYPE_INVALID for genotypes whose mutation failed (for example because the source genotype was invalid).
229                """
230                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
231                files = self.__runCommand(self.MUTATE_CMD, genotype_list, self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
232                genos = framsreader.load(files[-1], "gen file")
233                self.__cleanUpCommandResults(files)
234                return [g["genotype"] for g in genos]
235
236
237        def crossOver(self, genotype_parent1: str, genotype_parent2: str) -> str:
238                """
239                Returns:
240                        The genotype of the offspring. self.GENOTYPE_INVALID if the crossing over failed.
241                """
242                files = self.__runCommand(self.CROSSOVER_CMD, [genotype_parent1, genotype_parent2], self.CROSSOVER_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
243                with open(files[-1]) as f:
244                        child_genotype = "".join(f.readlines())
245                self.__cleanUpCommandResults(files)
246                return child_genotype
247
248
249        def dissimilarity(self, genotype_list: List[str]) -> np.ndarray:
250                """
251                Returns:
252                        A square array with dissimilarities of each pair of genotypes.
253                """
254                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
255                files = self.__runCommand(self.DISSIMIL_CMD, genotype_list, self.DISSIMIL_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
256                with open(files[-1]) as f:
257                        dissimilarity_matrix = np.genfromtxt(f, dtype=np.float64, comments='#', encoding=None, delimiter='\t')
258                # We would like to skip column #1 while reading and read everything else, but... https://stackoverflow.com/questions/36091686/exclude-columns-from-genfromtxt-with-numpy
259                # This would be too complicated, so strings (names) in column #1 become NaN as floats (unless they accidentally are valid numbers) - not great, not terrible
260                square_matrix = dissimilarity_matrix[:, 2:]  # get rid of two first columns (fitness and name)
261                EXPECTED_SHAPE = (len(genotype_list), len(genotype_list))
262                # print(square_matrix)
263                assert square_matrix.shape == EXPECTED_SHAPE, f"Not a correct dissimilarity matrix, expected {EXPECTED_SHAPE}"
264                for i in range(len(square_matrix)):
265                        assert square_matrix[i][i] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
266                assert (square_matrix == square_matrix.T).all(), "Probably not a correct dissimilarity matrix, expecting symmetry, verify this"  # could introduce tolerance in comparison (e.g. class field DISSIMIL_DIFF_TOLERANCE=10^-5) so that miniscule differences do not fail here
267                self.__cleanUpCommandResults(files)
268                return square_matrix
269
270
271        def isValid(self, genotype_list: List[str]) -> List[bool]:
272                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
273                files = self.__runCommand(self.ISVALID_CMD, genotype_list, self.ISVALID_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
274                valid = []
275                with open(files[-1]) as f:
276                        for line in f:
277                                valid.append(line.strip() == "1")
278                self.__cleanUpCommandResults(files)
279                assert len(genotype_list) == len(valid), "Submitted %d genotypes, received %d validity values" % (len(genotype_list), len(valid))
280                return valid
281
282
283def parseArguments():
284        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
285        parser.add_argument('-path', type=ensureDir, required=True, help='Path to Framsticks CLI without trailing slash.')
286        parser.add_argument('-exe', required=False, help='Executable name. If not given, "frams.exe" or "frams.linux" is assumed depending on the platform.')
287        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
288        parser.add_argument('-pid', required=False, help='Unique ID of this process. Only relevant when you run multiple instances of this class simultaneously but as separate processes, and they use the same Framsticks CLI executable. This value will be appended to the names of created files to avoid conflicts.')
289        return parser.parse_args()
290
291
292def ensureDir(string):
293        if os.path.isdir(string):
294                return string
295        else:
296                raise NotADirectoryError(string)
297
298
299if __name__ == "__main__":
300        # A demo run.
301
302        # TODO ideas:
303        # - check_validity with three levels (invalid, corrected, valid)
304        # - "vectorize" crossover so that many genotypes is handled in one call. Even better, use .so/.dll direct communication to CLI
305        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
306        # - a pool of binaries running simultaneously, balance load - in particular, evaluation
307
308        parsed_args = parseArguments()
309        framsCLI = FramsticksCLI(parsed_args.path, parsed_args.exe, parsed_args.pid)
310
311        print("Sending a direct command to Framsticks CLI that calculates \"4\"+2 yields", repr(framsCLI.sendDirectCommand("Simulator.print(\"4\"+2);")))
312
313        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
314        print("\tSimplest genotype:", simplest)
315        parent1 = framsCLI.mutate([simplest])[0]
316        parent2 = parent1
317        MUTATE_COUNT = 10
318        for x in range(MUTATE_COUNT):  # example of a chain of 10 mutations
319                parent2 = framsCLI.mutate([parent2])[0]
320        print("\tParent1 (mutated simplest):", parent1)
321        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
322        offspring = framsCLI.crossOver(parent1, parent2)
323        print("\tCrossover (Offspring):", offspring)
324        print('\tDissimilarity of Parent1 and Offspring:', framsCLI.dissimilarity([parent1, offspring])[0, 1])
325        print('\tPerformance of Offspring:', framsCLI.evaluate([offspring]))
326        print('\tValidity of Parent1, Parent 2, and Offspring:', framsCLI.isValid([parent1, parent2, offspring]))
327
328        framsCLI.closeFramsticksCLI()
Note: See TracBrowser for help on using the repository browser.