source: framspy/FramsticksCLI.py @ 977

Last change on this file since 977 was 971, checked in by Maciej Komosinski, 5 years ago

Added support for calculation of dissimilarity of more than two genotypes at once (returns a full square matrix)

File size: 13.8 KB
Line 
1from math import sqrt
2from subprocess import Popen, PIPE, check_output
3from enum import Enum
4from typing import List  # to be able to specify a type hint of list(something)
5from itertools import count  # for tracking multiple instances
6import json
7import sys, os
8import argparse
9import numpy as np
10
11
12class FramsticksCLI:
13        """Runs Framsticks CLI (command-line) executable and communicates with it using standard input and output.
14        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
15        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
16        You can even design and use in evolution your own genetic representation implemented entirely in python.
17
18        You need to provide one or two parameters when you run this class: the path to Framsticks CLI
19        and the name of the Framsticks CLI executable (if it is non-standard). See::
20                FramsticksCLI.py -h"""
21
22        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
23        DETERMINISTIC: bool = False  # set to True to have the same results on each run
24
25        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
26        OUTPUT_DIR = "scripts_output"
27        STDOUT_ENDOPER_MARKER = "FileObject.write"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
28
29        FILE_PREFIX = 'framspy_'
30
31        RANDOMIZE_CMD = "rnd" + "\n"
32        SETEXPEDEF_CMD = "expdef standard-eval" + "\n"
33        GETSIMPLEST_CMD = "getsimplest"
34        GETSIMPLEST_FILE = "simplest.gen"
35        EVALUATE_CMD = "evaluate eval-allcriteria.sim"
36        EVALUATE_FILE = "genos_eval.json"
37        CROSSOVER_CMD = "crossover"
38        CROSSOVER_FILE = "child.gen"
39        DISSIMIL_CMD = "dissimil"
40        DISSIMIL_FILE = "dissimilarity_matrix.gen"
41        ISVALID_CMD = "isvalid"
42        ISVALID_FILE = "validity.gen"
43        MUTATE_CMD = "mutate"
44        MUTATE_FILE = "mutant.gen"
45
46        CLI_INPUT_FILE = "genotypes.gen"
47
48        _next_instance_id = count(0)  # "static" counter incremented when a new instance is created. Used for unique filenames
49
50
51        def __init__(self, framspath, framsexe, pid=""):
52                self.pid = pid if pid is not None else ""
53                self.id = next(FramsticksCLI._next_instance_id)
54                self.frams_path = framspath
55                self.frams_exe = framsexe if framsexe is not None else 'frams.exe' if os.name == "nt" else 'frams.linux'
56                self.writing_path = None
57                mainpath = os.path.join(self.frams_path, self.frams_exe)
58                exe_call = [mainpath, '-Q', '-s', '-c', '-icliutils.ini']  # -c will be ignored in Windows Framsticks (this option is meaningless because the Windows version does not support color console, so no need to deactivate this feature using -c)
59                exe_call_to_get_version = [mainpath, '-V']
60                exe_call_to_get_path = [mainpath, '-?']
61                try:
62                        print("\n".join(self.__readAllOutput(exe_call_to_get_version)))
63                        help = self.__readAllOutput(exe_call_to_get_path)
64                        for helpline in help:
65                                if 'dDIRECTORY' in helpline:
66                                        self.writing_path = helpline.split("'")[1]
67                except FileNotFoundError:
68                        print("Could not find Framsticks executable ('%s') in the given location ('%s')." % (self.frams_exe, self.frams_path))
69                        sys.exit(1)
70                print("Temporary files with results will be saved in detected writable working directory '%s'" % self.writing_path)
71                self.__spawnFramsticksCLI(exe_call)
72
73
74        def __readAllOutput(self, command):
75                frams_process = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE)
76                return [line.decode('utf-8').rstrip() for line in iter(frams_process.stdout.readlines())]
77
78
79        def __spawnFramsticksCLI(self, args):
80                # the child app (Framsticks CLI) should not buffer outputs and we need to immediately read its stdout, hence we use pexpect/wexpect
81                print('Spawning Framsticks CLI for continuous stdin/stdout communication... ', end='')
82                if os.name == "nt":  # Windows:
83                        import wexpect  # https://pypi.org/project/wexpect/
84                        # https://github.com/raczben/wexpect/tree/master/examples
85                        self.child = wexpect.spawn(' '.join(args))
86                else:
87                        import pexpect  # https://pexpect.readthedocs.io/en/stable/
88                        self.child = pexpect.spawn(' '.join(args))
89                        self.child.setecho(False)  # linux only
90                print('OK.')
91
92                self.__readFromFramsCLIUntil("UserScripts.autoload")
93                print('Performing a basic test 1/3... ', end='')
94                assert self.getSimplest("1") == "X"
95                print('OK.')
96                print('Performing a basic test 2/3... ', end='')
97                assert self.isValid("X[0:0]") is True
98                print('OK.')
99                print('Performing a basic test 3/3... ', end='')
100                assert self.isValid("X[0:0],") is False
101                print('OK.')
102                if not self.DETERMINISTIC:
103                        self.child.sendline(self.RANDOMIZE_CMD)
104                self.child.sendline(self.SETEXPEDEF_CMD)
105
106
107        def closeFramsticksCLI(self):
108                # End gracefully by sending end-of-file character: ^Z or ^D
109                # Without -Q argument ("quiet mode"), Framsticks CLI would print "Shell closed." for goodbye.
110                self.child.sendline(chr(26 if os.name == "nt" else 4))
111
112
113        def __getPrefixedFilename(self, filename: str) -> str:
114                # Returns filename with unique instance id appended so there is no clash when many instances of this class use the same Framsticks CLI executable
115                return FramsticksCLI.FILE_PREFIX + self.pid + str(chr(ord('A') + self.id)) + '_' + filename
116
117
118        def __saveGenotypeToFile(self, genotype, name, mode, saveformat):
119                relname = self.__getPrefixedFilename(name)
120                absname = os.path.join(self.writing_path, relname)
121                if mode == 'd':  # special mode, 'delete'
122                        if os.path.exists(absname):
123                                os.remove(absname)
124                else:
125                        outfile = open(absname, mode)
126                        if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
127                                outfile.write(genotype)
128                        else:
129                                outfile.write("org:\n")
130                                outfile.write("genotype:~\n")
131                                outfile.write(genotype + "~\n\n")  # TODO proper quoting of special characters in genotype...
132                        outfile.close()
133                return relname, absname
134
135
136        def __readFromFramsCLIUntil(self, until_marker: str):
137                while True:
138                        self.child.expect('\n')
139                        msg = str(self.child.before)
140                        if self.PRINT_FRAMSTICKS_OUTPUT or msg.startswith("[ERROR]"):
141                                print(msg)
142                        if until_marker in msg:
143                                break
144
145
146        def __runCommand(self, command, genotypes, result_file_name, saveformat) -> List[str]:
147                filenames_rel = []  # list of file names with input data for the command
148                filenames_abs = []  # same list but absolute paths actually used
149                if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
150                        for i in range(len(genotypes)):
151                                # plain text format = must have a separate file for each genotype
152                                rel, abs = self.__saveGenotypeToFile(genotypes[i], "genotype" + str(i) + ".gen", "w", self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
153                                filenames_rel.append(rel)
154                                filenames_abs.append(abs)
155                elif saveformat == self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"]:
156                        self.__saveGenotypeToFile(None, self.CLI_INPUT_FILE, 'd', None)  # 'd'elete: ensure there is nothing left from the last run of the program because we "a"ppend to file in the loop below
157                        for i in range(len(genotypes)):
158                                rel, abs = self.__saveGenotypeToFile(genotypes[i], self.CLI_INPUT_FILE, "a", self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
159                        #  since we use the same file in the loop above, add this file only once (i.e., outside of the loop)
160                        filenames_rel.append(rel)
161                        filenames_abs.append(abs)
162
163                result_file_name = self.__getPrefixedFilename(result_file_name)
164                cmd = command + " " + " ".join(filenames_rel) + " " + result_file_name
165                self.child.sendline(cmd + '\n')
166                self.__readFromFramsCLIUntil(self.STDOUT_ENDOPER_MARKER)
167                filenames_abs.append(os.path.join(self.writing_path, self.OUTPUT_DIR, result_file_name))
168                return filenames_abs  # last element is a path to the file containing results
169
170
171        def __cleanUpCommandResults(self, filenames):
172                """Deletes files with results just created by the command."""
173                for name in filenames:
174                        os.remove(name)
175
176
177        def getSimplest(self, genetic_format) -> str:
178                assert len(genetic_format) == 1, "Genetic format should be a single character"
179                files = self.__runCommand(self.GETSIMPLEST_CMD + " " + genetic_format + " ", [], self.GETSIMPLEST_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
180                with open(files[-1]) as f:
181                        genotype = "".join(f.readlines())
182                self.__cleanUpCommandResults(files)
183                return genotype
184
185
186        def evaluate(self, genotype: str):
187                """
188                Returns:
189                        Dictionary -- genotype evaluated with self.EVALUATE_COMMAND. Note that for whatever reason (e.g. incorrect genotype),
190                        the dictionary you will get may be empty or partially empty and may not have the fields you expected, so handle such cases properly.
191                """
192                files = self.__runCommand(self.EVALUATE_CMD, [genotype], self.EVALUATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
193                with open(files[-1]) as f:
194                        data = json.load(f)
195                if len(data) > 0:
196                        self.__cleanUpCommandResults(files)
197                        return data
198                else:
199                        print("Evaluating genotype: no performance data was returned in", self.EVALUATE_FILE)  # we do not delete files here
200                        return None
201
202
203        def mutate(self, genotype: str) -> str:
204                files = self.__runCommand(self.MUTATE_CMD, [genotype], self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
205                with open(files[-1]) as f:
206                        newgenotype = "".join(f.readlines())
207                self.__cleanUpCommandResults(files)
208                return newgenotype
209
210
211        def crossOver(self, genotype1: str, genotype2: str) -> str:
212                files = self.__runCommand(self.CROSSOVER_CMD, [genotype1, genotype2], self.CROSSOVER_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
213                with open(files[-1]) as f:
214                        child_genotype = "".join(f.readlines())
215                self.__cleanUpCommandResults(files)
216                return child_genotype
217
218
219        def dissimilarity(self, genotype_list: List[str]) -> np.ndarray:
220                """
221                Returns:
222                        A square array with dissimilarities of each pair of genotypes.
223                """
224                files = self.__runCommand(self.DISSIMIL_CMD, genotype_list, self.DISSIMIL_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
225                with open(files[-1]) as f:
226                        dissimilarity_matrix = np.genfromtxt(f, dtype=np.float64, comments='#', encoding=None, delimiter='\t')
227                # We would like to skip column #1 while reading and read everything else, but... https://stackoverflow.com/questions/36091686/exclude-columns-from-genfromtxt-with-numpy
228                # This would be too complicated, so strings (names) in column #1 become NaN as floats (unless they accidentally are valid numbers) - not great, not terrible
229                square_matrix = dissimilarity_matrix[:, 2:]  # get rid of two first columns (fitness and name)
230                EXPECTED_SHAPE = (len(genotype_list), len(genotype_list))
231                # print(square_matrix)
232                assert square_matrix.shape == EXPECTED_SHAPE, f"Not a correct dissimilarity matrix, expected {EXPECTED_SHAPE} "
233                for i in range(len(square_matrix)):
234                        assert square_matrix[i][i] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
235                assert (square_matrix == square_matrix.T).all(), "Probably not a correct dissimilarity matrix, expecting symmetry, verify this"
236                self.__cleanUpCommandResults(files)
237                return square_matrix
238
239
240        def isValid(self, genotype: str) -> bool:
241                files = self.__runCommand(self.ISVALID_CMD, [genotype], self.ISVALID_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
242                with open(files[-1]) as f:
243                        valid = f.readline() == "1"
244                self.__cleanUpCommandResults(files)
245                return valid
246
247
248def parseArguments():
249        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
250        parser.add_argument('-path', type=ensureDir, required=True, help='Path to Framsticks CLI without trailing slash.')
251        parser.add_argument('-exe', required=False, help='Executable name. If not given, "frams.exe" or "frams.linux" is assumed.')
252        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
253        parser.add_argument('-pid', required=False, help='Unique ID of this process. Only relevant when you run multiple instances of this class simultaneously but as separate processes, and they use the same Framsticks CLI executable. This value will be appended to the names of created files to avoid conflicts.')
254        return parser.parse_args()
255
256
257def ensureDir(string):
258        if os.path.isdir(string):
259                return string
260        else:
261                raise NotADirectoryError(string)
262
263
264if __name__ == "__main__":
265        # A demo run.
266
267        # TODO ideas:
268        # - check_validity with three levels (invalid, corrected, valid)
269        # - "vectorize" some operations (isvalid, evaluate) so that a number of genotypes is handled in one call
270        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
271        # - a pool of binaries run at the same time, balance load - in particular evaluation
272        # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader/0.1.2/ and use it if successful,
273        #    if not then print a message "framsreader not available, using simple internal method to save a genotype" and proceed as it is now.
274        #    So far we don't read, but we should use the proper writer to handle all special cases like quoting etc.
275
276        parsed_args = parseArguments()
277        framsCLI = FramsticksCLI(parsed_args.path, parsed_args.exe, parsed_args.pid)
278
279        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
280        print("\tSimplest genotype:", simplest)
281        parent1 = framsCLI.mutate(simplest)
282        parent2 = parent1
283        MUTATE_COUNT = 10
284        for x in range(MUTATE_COUNT):  # example of a chain of 20 mutations
285                parent2 = framsCLI.mutate(parent2)
286        print("\tParent1 (mutated simplest):", parent1)
287        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
288        offspring = framsCLI.crossOver(parent1, parent2)
289        print("\tCrossover (Offspring):", offspring)
290        print('\tDissimilarity of Parent1 and Offspring:', framsCLI.dissimilarity([parent1, offspring])[0, 1])
291        print('\tPerformance of Offspring:', framsCLI.evaluate(offspring))
292        print('\tValidity of Offspring:', framsCLI.isValid(offspring))
293
294        framsCLI.closeFramsticksCLI()
Note: See TracBrowser for help on using the repository browser.