Ignore:
Timestamp:
01/24/21 13:29:42 (4 years ago)
Author:
Maciej Komosinski
Message:

"Vectorized" mutation for better performance due to decreased time of communication py<->frams (mutate many genotypes in one call)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • framspy/FramsticksCLI.py

    r1059 r1060  
    77import argparse
    88import numpy as np
     9import framsreader  # only needed for mutation: https://pypi.org/project/framsreader
    910
    1011
     
    2425        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
    2526        OUTPUT_DIR = "scripts_output"
     27        GENOTYPE_INVALID = "/*invalid*/"  # this is how genotype invalidity is represented in Framsticks
    2628        STDOUT_ENDOPER_MARKER = "FileObject.write"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
    2729
     
    3537        EVALUATE_FILE = "genos_eval.json"
    3638        CROSSOVER_CMD = "crossover"
    37         CROSSOVER_FILE = "child.gen"
     39        CROSSOVER_FILE = "crossover_child.gen"
    3840        DISSIMIL_CMD = "dissimil"
    3941        DISSIMIL_FILE = "dissimilarity_matrix.tsv"  # tab-separated values
    40         ISVALID_CMD = "arevalid"
     42        ISVALID_CMD = "isvalid_many"
    4143        ISVALID_FILE = "validity.txt"
    42         MUTATE_CMD = "mutate"
    43         MUTATE_FILE = "mutant.gen"
     44        MUTATE_CMD = "mutate_many"
     45        MUTATE_FILE = "mutation_results.gen"
    4446
    4547        CLI_INPUT_FILE = "genotypes.gen"
     
    219221
    220222
    221         def mutate(self, genotype: str) -> str:
    222                 """
    223                 Returns:
    224                         The genotype of the mutated individual. Empty string if the mutation failed.
    225                 """
    226                 files = self.__runCommand(self.MUTATE_CMD, [genotype], self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
    227                 with open(files[-1]) as f:
    228                         newgenotype = "".join(f.readlines())
    229                 self.__cleanUpCommandResults(files)
    230                 return newgenotype
     223        def mutate(self, genotype_list: List[str]) -> List[str]:
     224                """
     225                Returns:
     226                        The genotype(s) of the mutated source genotype(s). self.GENOTYPE_INVALID for genotypes whose mutation failed (for example because the source genotype was invalid).
     227                """
     228                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
     229                files = self.__runCommand(self.MUTATE_CMD, genotype_list, self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
     230                genos = framsreader.load(files[-1], "gen file")
     231                self.__cleanUpCommandResults(files)
     232                return [g["genotype"] for g in genos]
    231233
    232234
     
    298300        # TODO ideas:
    299301        # - check_validity with three levels (invalid, corrected, valid)
    300         # - "vectorize" some operations (isvalid, evaluate) so that a number of genotypes is handled in one call
     302        # - "vectorize" crossover so that many genotypes is handled in one call. Even better, use .so/.dll direct communication to CLI
    301303        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
    302304        # - a pool of binaries run at the same time, balance load - in particular evaluation
    303         # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader/0.1.2/ and use it if successful,
     305        # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader and use it if successful,
    304306        #    if not then print a message "framsreader not available, using simple internal method to save a genotype" and proceed as it is now.
    305         #    So far we don't read, but we should use the proper writer to handle all special cases like quoting etc.
     307        #    We should use the proper writer to handle all special cases like quoting special characters etc.
    306308
    307309        parsed_args = parseArguments()
     
    312314        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
    313315        print("\tSimplest genotype:", simplest)
    314         parent1 = framsCLI.mutate(simplest)
     316        parent1 = framsCLI.mutate([simplest])[0]
    315317        parent2 = parent1
    316318        MUTATE_COUNT = 10
    317319        for x in range(MUTATE_COUNT):  # example of a chain of 20 mutations
    318                 parent2 = framsCLI.mutate(parent2)
     320                parent2 = framsCLI.mutate([parent2])[0]
    319321        print("\tParent1 (mutated simplest):", parent1)
    320322        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
Note: See TracChangeset for help on using the changeset viewer.