| 1 | // This file is a part of Framsticks SDK. http://www.framsticks.com/ |
|---|
| 2 | // Copyright (C) 1999-2020 Maciej Komosinski and Szymon Ulatowski. |
|---|
| 3 | // See LICENSE.txt for details. |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | #include <vector> |
|---|
| 7 | #include <string> |
|---|
| 8 | #include "common/loggers/loggertostdout.h" |
|---|
| 9 | #include "frams/_demos/genotypeloader.h" |
|---|
| 10 | #include "frams/genetics/preconfigured.h" |
|---|
| 11 | #include "common/virtfile/stdiofile.h" |
|---|
| 12 | #include "frams/model/similarity/measure-distribution.h" |
|---|
| 13 | #include "frams/model/similarity/measure-greedy.h" |
|---|
| 14 | #include "frams/model/similarity/measure-hungarian.h" |
|---|
| 15 | #include <stdexcept> //std::invalid_argument |
|---|
| 16 | |
|---|
| 17 | using namespace std; |
|---|
| 18 | |
|---|
| 19 | int add_double_param(std::vector<string> *args, int pos, std::vector<double> *params, std::vector<string> *params_names) |
|---|
| 20 | { |
|---|
| 21 | for (unsigned int i = 0; i < params_names->size(); i++) |
|---|
| 22 | { |
|---|
| 23 | try |
|---|
| 24 | { |
|---|
| 25 | params->push_back(std::stod(args->at(pos))); |
|---|
| 26 | pos++; |
|---|
| 27 | } |
|---|
| 28 | catch (const std::invalid_argument&) |
|---|
| 29 | { |
|---|
| 30 | printf("%s should be a number\n", params_names->at(i).c_str()); |
|---|
| 31 | return -1; |
|---|
| 32 | } |
|---|
| 33 | catch (const std::out_of_range&) |
|---|
| 34 | { |
|---|
| 35 | printf("%s should be inside double range\n", params_names->at(i).c_str()); |
|---|
| 36 | return -1; |
|---|
| 37 | } |
|---|
| 38 | } |
|---|
| 39 | return 0; |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | /** Computes a matrix of distances between all genotypes in the specified |
|---|
| 43 | .gen file, using the matching and measure weights as specified in the |
|---|
| 44 | command line. */ |
|---|
| 45 | int main(int argc, char *argv[]) |
|---|
| 46 | { |
|---|
| 47 | typedef double *pDouble; |
|---|
| 48 | LoggerToStdout messages_to_stdout(LoggerBase::Enable); |
|---|
| 49 | SimilMeasureBase *simil_measure = nullptr; |
|---|
| 50 | if (argc < 5) |
|---|
| 51 | { |
|---|
| 52 | printf("Too few parameters!\n"); |
|---|
| 53 | printf("Command line: [-names] <genotypesFile> <measure (greedy/hungarian)> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <fixZaxis?>\n\n"); |
|---|
| 54 | printf("Command line: [-names] <genotypesFile> <measure (distribution)> <desc> <simil> <dens> <bins> <samp_num>\n\n"); |
|---|
| 55 | printf("Parameters:\n"); |
|---|
| 56 | printf(" <genotypesFile> name of a file with genotypes\n"); |
|---|
| 57 | printf(" <measure> similarity measure name (greedy/hungarian/distribution)\n"); |
|---|
| 58 | printf("\n"); |
|---|
| 59 | printf("Parameters of greedy and hungarian measures:\n"); |
|---|
| 60 | printf(" <w_dP> weight of the difference in the number of parts\n"); |
|---|
| 61 | printf(" <w_dDEG> weight of the difference in degrees of matched parts\n"); |
|---|
| 62 | printf(" <w_dNEU> weight of the difference in neurons of matched parts\n"); |
|---|
| 63 | printf(" <w_dGEO> weight of the distance of matched parts\n"); |
|---|
| 64 | printf(" <fixZaxis?> should the 'z' (vertical) coordinate be fixed during the alignment? (0 or 1)\n\n"); |
|---|
| 65 | printf("Parameters of distribution measure:\n"); |
|---|
| 66 | printf(" <dens> sampling density\n"); |
|---|
| 67 | printf(" <bins> number of histogram bins\n"); |
|---|
| 68 | printf(" <samp_num> number of samples taken\n\n"); |
|---|
| 69 | |
|---|
| 70 | printf("Switches:\n"); |
|---|
| 71 | printf(" -names specifies that the number and names of genotypes are to be printed to output\n"); |
|---|
| 72 | printf(" before the distance matrix; by default the number and names are not printed\n\n"); |
|---|
| 73 | |
|---|
| 74 | printf("Outputs a symmetric distance matrix in the format:\n"); |
|---|
| 75 | printf(" <row_1> (columns in a row are separated by TABs)\n"); |
|---|
| 76 | printf(" ...\n"); |
|---|
| 77 | printf(" <row_n>\n"); |
|---|
| 78 | |
|---|
| 79 | return -1; |
|---|
| 80 | } |
|---|
| 81 | |
|---|
| 82 | std::vector<string> args; |
|---|
| 83 | for (int i = 1; i < argc; i++) |
|---|
| 84 | args.push_back(std::string(argv[i])); |
|---|
| 85 | |
|---|
| 86 | bool print_names = false; |
|---|
| 87 | |
|---|
| 88 | int pos = 1; |
|---|
| 89 | if (args.at(0).compare("-names")==0) |
|---|
| 90 | { |
|---|
| 91 | print_names = true; |
|---|
| 92 | pos = 2; |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | string measure_name = args.at(pos); |
|---|
| 96 | pos++; |
|---|
| 97 | std::vector<double> params; |
|---|
| 98 | |
|---|
| 99 | if (measure_name.compare("greedy")==0 || measure_name.compare("hungarian")==0) |
|---|
| 100 | { |
|---|
| 101 | std::vector<string> params_names{ "<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>", "<fixZaxis?>" }; |
|---|
| 102 | |
|---|
| 103 | if (add_double_param(&args, pos, ¶ms, ¶ms_names) == -1) |
|---|
| 104 | return -1; |
|---|
| 105 | |
|---|
| 106 | if (measure_name.compare("greedy")==0) |
|---|
| 107 | simil_measure = new SimilMeasureGreedy(); |
|---|
| 108 | else |
|---|
| 109 | simil_measure = new SimilMeasureHungarian(); |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | |
|---|
| 113 | else if (measure_name.compare("distribution")==0) |
|---|
| 114 | { |
|---|
| 115 | std::vector<string> params_names{ "<dens>", "<bins>", "<samp_num>" }; |
|---|
| 116 | |
|---|
| 117 | if (add_double_param(&args, pos, ¶ms, ¶ms_names)==-1) |
|---|
| 118 | return -1; |
|---|
| 119 | |
|---|
| 120 | simil_measure = new SimilMeasureDistribution(); |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | else |
|---|
| 124 | { |
|---|
| 125 | printf("Measure type should be greedy (flexible criteria order and optimal matching), hungarian (vertex degree order and greedy matching) or distribution!\n"); |
|---|
| 126 | return -1; |
|---|
| 127 | } |
|---|
| 128 | |
|---|
| 129 | simil_measure->setParams(params); |
|---|
| 130 | |
|---|
| 131 | // read the input file |
|---|
| 132 | // prepare loading of genotypes from a .gen file |
|---|
| 133 | // create some basic genotype converters |
|---|
| 134 | PreconfiguredGenetics genetics; |
|---|
| 135 | StdioFileSystem_autoselect stdiofilesys; |
|---|
| 136 | |
|---|
| 137 | // prepare output parameters from .gen file |
|---|
| 138 | vector<Geno *> pvGenos; |
|---|
| 139 | vector<char *> pvNames; |
|---|
| 140 | |
|---|
| 141 | long count = 0, totalsize = 0; |
|---|
| 142 | GenotypeMiniLoader loader(args.at(0).c_str()); |
|---|
| 143 | GenotypeMini *loaded; |
|---|
| 144 | while (loaded = loader.loadNextGenotype()) |
|---|
| 145 | { |
|---|
| 146 | // while a valid genotype was loaded |
|---|
| 147 | count++; |
|---|
| 148 | totalsize += loaded->genotype.length(); |
|---|
| 149 | // create a Geno object based on the MiniGenotype |
|---|
| 150 | Geno *pNextGenotype = new Geno(loaded->genotype); |
|---|
| 151 | if ((pNextGenotype != NULL) && (pNextGenotype->isValid())) |
|---|
| 152 | { |
|---|
| 153 | pvGenos.push_back(pNextGenotype); |
|---|
| 154 | char *szNewName = new char[loaded->name.length() + 1]; |
|---|
| 155 | strcpy(szNewName, loaded->name.c_str()); |
|---|
| 156 | pvNames.push_back(szNewName); |
|---|
| 157 | } |
|---|
| 158 | else |
|---|
| 159 | { |
|---|
| 160 | printf("Genotype %2li is not valid\n", count); |
|---|
| 161 | if (pNextGenotype != NULL) delete pNextGenotype; |
|---|
| 162 | } |
|---|
| 163 | } |
|---|
| 164 | if (loader.getStatus() == GenotypeMiniLoader::OnError) |
|---|
| 165 | { |
|---|
| 166 | printf("Error: %s", loader.getError().c_str()); |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | double dSimilarity = 0.0; |
|---|
| 170 | double **aaSimil = NULL; // array of similarities |
|---|
| 171 | |
|---|
| 172 | // create the empty array of similarities |
|---|
| 173 | aaSimil = new pDouble[pvGenos.size()]; |
|---|
| 174 | for (unsigned int k = 0; k < pvGenos.size(); k++) |
|---|
| 175 | { |
|---|
| 176 | aaSimil[k] = new double[pvGenos.size()]; |
|---|
| 177 | for (unsigned int l = 0; l < pvGenos.size(); l++) |
|---|
| 178 | aaSimil[k][l] = 0.0; |
|---|
| 179 | } |
|---|
| 180 | |
|---|
| 181 | |
|---|
| 182 | |
|---|
| 183 | // compute and remember similarities |
|---|
| 184 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
|---|
| 185 | { |
|---|
| 186 | for (unsigned int j = 0; j < pvGenos.size(); j++) |
|---|
| 187 | { |
|---|
| 188 | dSimilarity = simil_measure->evaluateDistance(pvGenos.operator[](i), pvGenos.operator[](j)); |
|---|
| 189 | aaSimil[i][j] = dSimilarity; |
|---|
| 190 | } |
|---|
| 191 | } |
|---|
| 192 | |
|---|
| 193 | if (print_names) |
|---|
| 194 | { |
|---|
| 195 | // if "-names" switch was given, print the number of genotypes and their names |
|---|
| 196 | printf("%li\n", pvGenos.size()); |
|---|
| 197 | for (unsigned int iGen = 0; iGen < pvNames.size(); iGen++) |
|---|
| 198 | { |
|---|
| 199 | printf("%s\n", pvNames.at(iGen)); |
|---|
| 200 | } |
|---|
| 201 | } |
|---|
| 202 | |
|---|
| 203 | // print out the matrix of similarities |
|---|
| 204 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
|---|
| 205 | { |
|---|
| 206 | for (unsigned int j = 0; j < pvGenos.size(); j++) |
|---|
| 207 | { |
|---|
| 208 | printf("%.2lf\t", aaSimil[i][j]); |
|---|
| 209 | } |
|---|
| 210 | printf("\n"); |
|---|
| 211 | } |
|---|
| 212 | |
|---|
| 213 | // delete vectors and arrays |
|---|
| 214 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
|---|
| 215 | { |
|---|
| 216 | delete pvGenos.operator[](i); |
|---|
| 217 | delete[] pvNames.operator[](i); |
|---|
| 218 | delete[] aaSimil[i]; |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | delete[] aaSimil; |
|---|
| 222 | delete simil_measure; |
|---|
| 223 | |
|---|
| 224 | return 0; |
|---|
| 225 | } |
|---|