[349] | 1 | // This file is a part of Framsticks SDK. http://www.framsticks.com/ |
---|
[973] | 2 | // Copyright (C) 1999-2020 Maciej Komosinski and Szymon Ulatowski. |
---|
[349] | 3 | // See LICENSE.txt for details. |
---|
| 4 | |
---|
| 5 | |
---|
| 6 | #include <vector> |
---|
[391] | 7 | #include "common/loggers/loggertostdout.h" |
---|
[349] | 8 | #include "frams/_demos/genotypeloader.h" |
---|
| 9 | #include "frams/genetics/preconfigured.h" |
---|
[382] | 10 | #include "common/virtfile/stdiofile.h" |
---|
[349] | 11 | #include "frams/model/similarity/simil_model.h" |
---|
| 12 | |
---|
| 13 | |
---|
| 14 | |
---|
| 15 | /** Computes a matrix of distances between all genotypes in the specified |
---|
[606] | 16 | .gen file, using the matching and measure weights as specified in the |
---|
| 17 | command line. */ |
---|
[349] | 18 | int main(int argc, char *argv[]) |
---|
| 19 | { |
---|
[606] | 20 | LoggerToStdout messages_to_stdout(LoggerBase::Enable); |
---|
| 21 | int iCurrParam = 0; // index of the currently processed parameter |
---|
| 22 | char *szCurrParam = NULL; |
---|
| 23 | ModelSimil M; // similarity computing object |
---|
| 24 | bool bPrintNames = false; // specifies if names of genotypes are to be printed |
---|
| 25 | int nResult = 0; // a temporary result |
---|
[349] | 26 | |
---|
[893] | 27 | if (argc < 8) |
---|
[606] | 28 | { |
---|
| 29 | printf("Too few parameters!\n"); |
---|
[893] | 30 | printf("Command line: [-names] <genotypesFile> <measure> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <fixZaxis?>\n\n"); |
---|
[349] | 31 | |
---|
[606] | 32 | printf("Parameters:\n"); |
---|
| 33 | printf(" <genotypesFile> name of a file with genotypes\n"); |
---|
[893] | 34 | printf(" <measure> similarity measure\n"); |
---|
[606] | 35 | printf(" <w_dP> weight of the difference in the number of parts\n"); |
---|
| 36 | printf(" <w_dDEG> weight of the difference in degrees of matched parts\n"); |
---|
| 37 | printf(" <w_dNEU> weight of the difference in neurons of matched parts\n"); |
---|
| 38 | printf(" <w_dGEO> weight of the distance of matched parts\n"); |
---|
| 39 | printf(" <fixZaxis?> should the 'z' (vertical) coordinate be fixed during the alignment? (0 or 1)\n\n"); |
---|
[349] | 40 | |
---|
[606] | 41 | printf("Switches:\n"); |
---|
| 42 | printf(" -names specifies that the number and names of genotypes are to be printed to output\n"); |
---|
| 43 | printf(" before the distance matrix; by default the number and names are not printed\n\n"); |
---|
[349] | 44 | |
---|
[606] | 45 | printf("Outputs a symmetric distance matrix in the format:\n"); |
---|
| 46 | printf(" <row_1> (columns in a row are separated by TABs)\n"); |
---|
| 47 | printf(" ...\n"); |
---|
| 48 | printf(" <row_n>\n"); |
---|
[349] | 49 | |
---|
[606] | 50 | return -1; |
---|
| 51 | } |
---|
[349] | 52 | |
---|
[606] | 53 | // prepare output parameters from .gen file |
---|
| 54 | vector<Geno *> pvGenos; |
---|
| 55 | vector<char *> pvNames; |
---|
[349] | 56 | |
---|
[606] | 57 | // check if there is a switch |
---|
| 58 | iCurrParam = 1; |
---|
| 59 | szCurrParam = argv[iCurrParam]; |
---|
| 60 | if (strcmp(szCurrParam, "-names") == 0) |
---|
| 61 | { |
---|
| 62 | // switch "-names" was given; print names also |
---|
| 63 | bPrintNames = true; |
---|
| 64 | // pass to the next parameter |
---|
| 65 | iCurrParam++; |
---|
| 66 | } |
---|
[349] | 67 | |
---|
[606] | 68 | // check the parameters |
---|
| 69 | // get <genotypesFile> name from command line |
---|
| 70 | char *szFileName = argv[iCurrParam]; |
---|
[349] | 71 | |
---|
[606] | 72 | // initially set measure components' weights to invalid values (negative) |
---|
| 73 | for (int i = 0; i < M.GetNOFactors(); i++) |
---|
| 74 | { |
---|
| 75 | M.m_adFactors[i] = -1.0; |
---|
| 76 | } |
---|
[973] | 77 | |
---|
[893] | 78 | iCurrParam++; |
---|
| 79 | szCurrParam = argv[iCurrParam]; |
---|
[973] | 80 | int measure_type = -1; |
---|
[895] | 81 | nResult = sscanf(szCurrParam, "%d", &measure_type); |
---|
[893] | 82 | if (nResult != 1) |
---|
| 83 | { |
---|
| 84 | printf("Measure type should be a number!\n"); |
---|
| 85 | return -1; |
---|
| 86 | } |
---|
[973] | 87 | |
---|
[893] | 88 | if (measure_type != 0 && measure_type != 1) |
---|
| 89 | { |
---|
| 90 | printf("Measure type should be 0 (flexible criteria order and optimal matching) or 1 (vertex degree order and greedy matching)!\n"); |
---|
[973] | 91 | return -1; |
---|
[893] | 92 | } |
---|
[973] | 93 | |
---|
[893] | 94 | M.matching_method = measure_type; |
---|
[349] | 95 | |
---|
[606] | 96 | const char *params[] = { "<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>" }; |
---|
| 97 | for (int i = 0; i < M.GetNOFactors(); i++) |
---|
| 98 | { |
---|
| 99 | iCurrParam++; |
---|
| 100 | szCurrParam = argv[iCurrParam]; |
---|
[895] | 101 | nResult = sscanf(szCurrParam, "%lf", &M.m_adFactors[i]); |
---|
[606] | 102 | if (nResult != 1) |
---|
| 103 | { |
---|
| 104 | // <w_dX> is not a number -- error |
---|
| 105 | printf("%s", params[i]); |
---|
| 106 | printf(" should be a number\n"); |
---|
| 107 | return -1; |
---|
| 108 | } |
---|
| 109 | else |
---|
| 110 | { |
---|
| 111 | // <w_dX> is a number; check if nonnegative |
---|
| 112 | if (M.m_adFactors[i] < 0.0) |
---|
| 113 | { |
---|
| 114 | printf("%s", params[i]); |
---|
| 115 | printf(" should be a nonnegative number\n"); |
---|
| 116 | return -1; |
---|
| 117 | } |
---|
| 118 | } |
---|
| 119 | } |
---|
[349] | 120 | |
---|
[606] | 121 | iCurrParam++; |
---|
| 122 | szCurrParam = argv[iCurrParam]; |
---|
[895] | 123 | nResult = sscanf(szCurrParam, "%d", &M.fixedZaxis); |
---|
[606] | 124 | if (nResult != 1) |
---|
| 125 | { |
---|
| 126 | // <isZFixed> is not a number -- error |
---|
| 127 | printf("<isZFixed> should be a number\n"); |
---|
| 128 | return -1; |
---|
| 129 | } |
---|
[612] | 130 | else if (M.fixedZaxis != 0 && M.fixedZaxis != 1) |
---|
[606] | 131 | { |
---|
[612] | 132 | printf("<isZFixed>=%d. <isZFixed> should be equal to 0 or 1\n", M.fixedZaxis); |
---|
[606] | 133 | return -1; |
---|
| 134 | } |
---|
[349] | 135 | |
---|
[606] | 136 | // read the input file |
---|
| 137 | // prepare loading of genotypes from a .gen file |
---|
| 138 | // create some basic genotype converters |
---|
| 139 | PreconfiguredGenetics genetics; |
---|
| 140 | StdioFileSystem_autoselect stdiofilesys; |
---|
| 141 | |
---|
| 142 | long count = 0, totalsize = 0; |
---|
[732] | 143 | GenotypeMiniLoader loader(szFileName); |
---|
| 144 | GenotypeMini *loaded; |
---|
[606] | 145 | while (loaded = loader.loadNextGenotype()) |
---|
| 146 | { |
---|
| 147 | // while a valid genotype was loaded |
---|
| 148 | count++; |
---|
[973] | 149 | totalsize += loaded->genotype.length(); |
---|
[606] | 150 | // create a Geno object based on the MiniGenotype |
---|
| 151 | Geno *pNextGenotype = new Geno(loaded->genotype); |
---|
| 152 | if ((pNextGenotype != NULL) && (pNextGenotype->isValid())) |
---|
| 153 | { |
---|
| 154 | pvGenos.push_back(pNextGenotype); |
---|
[973] | 155 | char *szNewName = new char[loaded->name.length() + 1]; |
---|
[606] | 156 | strcpy(szNewName, loaded->name.c_str()); |
---|
| 157 | pvNames.push_back(szNewName); |
---|
| 158 | } |
---|
| 159 | else |
---|
| 160 | { |
---|
| 161 | printf("Genotype %2li is not valid\n", count); |
---|
| 162 | if (pNextGenotype != NULL) delete pNextGenotype; |
---|
| 163 | } |
---|
| 164 | } |
---|
[732] | 165 | if (loader.getStatus() == GenotypeMiniLoader::OnError) |
---|
[606] | 166 | { |
---|
| 167 | printf("Error: %s", loader.getError().c_str()); |
---|
| 168 | } |
---|
| 169 | |
---|
| 170 | double dSimilarity = 0.0; |
---|
| 171 | double **aaSimil = NULL; // array of similarities |
---|
| 172 | |
---|
[1005] | 173 | // create an empty array of similarities |
---|
| 174 | aaSimil = new double*[pvGenos.size()]; |
---|
[606] | 175 | for (unsigned int k = 0; k < pvGenos.size(); k++) |
---|
| 176 | { |
---|
| 177 | aaSimil[k] = new double[pvGenos.size()]; |
---|
[455] | 178 | for (unsigned int l = 0; l < pvGenos.size(); l++) |
---|
[606] | 179 | aaSimil[k][l] = 0.0; |
---|
| 180 | } |
---|
[349] | 181 | |
---|
[1005] | 182 | // compute and store similarities |
---|
[455] | 183 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
---|
[606] | 184 | { |
---|
[455] | 185 | for (unsigned int j = 0; j < pvGenos.size(); j++) |
---|
[606] | 186 | { |
---|
| 187 | dSimilarity = M.EvaluateDistance(pvGenos.operator[](i), pvGenos.operator[](j)); |
---|
| 188 | aaSimil[i][j] = dSimilarity; |
---|
| 189 | } |
---|
| 190 | } |
---|
[349] | 191 | |
---|
[606] | 192 | if (bPrintNames) |
---|
| 193 | { |
---|
[1005] | 194 | // if the "-names" switch was given, print the number of genotypes and their names |
---|
[606] | 195 | printf("%li\n", pvGenos.size()); |
---|
[455] | 196 | for (unsigned int iGen = 0; iGen < pvNames.size(); iGen++) |
---|
[606] | 197 | { |
---|
| 198 | printf("%s\n", pvNames.at(iGen)); |
---|
| 199 | } |
---|
| 200 | } |
---|
[349] | 201 | |
---|
[606] | 202 | // print out the matrix of similarities |
---|
[455] | 203 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
---|
[606] | 204 | { |
---|
[455] | 205 | for (unsigned int j = 0; j < pvGenos.size(); j++) |
---|
[606] | 206 | { |
---|
| 207 | printf("%.2lf\t", aaSimil[i][j]); |
---|
| 208 | } |
---|
| 209 | printf("\n"); |
---|
| 210 | } |
---|
[349] | 211 | |
---|
[606] | 212 | // delete vectors and arrays |
---|
[455] | 213 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
---|
[606] | 214 | { |
---|
| 215 | delete pvGenos.operator[](i); |
---|
| 216 | delete[] pvNames.operator[](i); |
---|
| 217 | delete[] aaSimil[i]; |
---|
| 218 | } |
---|
[349] | 219 | |
---|
[606] | 220 | delete[] aaSimil; |
---|
[349] | 221 | |
---|
[606] | 222 | return 0; |
---|
[372] | 223 | } |
---|