[349] | 1 | // This file is a part of Framsticks SDK. http://www.framsticks.com/ |
---|
| 2 | // Copyright (C) 1999-2015 Maciej Komosinski and Szymon Ulatowski. |
---|
| 3 | // See LICENSE.txt for details. |
---|
| 4 | |
---|
| 5 | |
---|
| 6 | #include <vector> |
---|
[391] | 7 | #include "common/loggers/loggertostdout.h" |
---|
[349] | 8 | #include "frams/_demos/genotypeloader.h" |
---|
| 9 | #include "frams/genetics/preconfigured.h" |
---|
[382] | 10 | #include "common/virtfile/stdiofile.h" |
---|
[349] | 11 | #include "frams/model/similarity/simil_model.h" |
---|
| 12 | |
---|
| 13 | |
---|
| 14 | |
---|
| 15 | using namespace std; |
---|
| 16 | |
---|
| 17 | /** Computes a matrix of distances between all genotypes in the specified |
---|
| 18 | .gen file, using the matching and measure weights as specified in the |
---|
| 19 | command line. |
---|
| 20 | |
---|
| 21 | Command line parameters: [-names] <genotypesFile> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> |
---|
| 22 | |
---|
| 23 | Parameters: |
---|
[359] | 24 | <genotypesFile> name of a file with genotypes |
---|
[349] | 25 | <w_dP> weight of the difference in the number of parts |
---|
| 26 | <w_dDEG> weight of the difference in degrees of matched parts |
---|
| 27 | <w_dNEU> weight of the difference in neurons of matched parts |
---|
[359] | 28 | <w_dGEO> weight of the distance between matched parts |
---|
[349] | 29 | |
---|
| 30 | Switches: |
---|
| 31 | -names specifies that the number and names of genotypes are to be printed to output |
---|
| 32 | before the distance matrix; by default the number and names are not printed |
---|
| 33 | |
---|
| 34 | Outputs a distance matrix in the format: |
---|
| 35 | <row_1> (columns in a row are separated by TABs) |
---|
| 36 | ... |
---|
| 37 | <row_n> |
---|
| 38 | */ |
---|
| 39 | int main(int argc, char *argv[]) |
---|
| 40 | { |
---|
[375] | 41 | LoggerToStdout messages_to_stdout(LoggerBase::Enable); |
---|
[349] | 42 | typedef double *pDouble; |
---|
| 43 | int iCurrParam = 0; // index of the currently processed parameter |
---|
| 44 | char *szCurrParam = NULL; |
---|
| 45 | ModelSimil M; // similarity computing object |
---|
| 46 | bool bPrintNames = false; // specifies if names of genotypes are to be printed |
---|
| 47 | int nResult = 0; // a temporary result |
---|
| 48 | |
---|
| 49 | if (argc < 6) |
---|
| 50 | { |
---|
| 51 | // too few parameters |
---|
| 52 | printf("Too few parameters!\n"); |
---|
| 53 | printf("Command line: [-names] <genotypesFile> <matchType> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <ifFUZZY>\n\n"); |
---|
| 54 | printf("Parameters:\n"); |
---|
| 55 | printf(" <genotypesFile> name of a file with genotypes (only f1 format is allowed)\n"); |
---|
| 56 | printf(" <w_dP> weight of the difference in the number of parts\n"); |
---|
| 57 | printf(" <w_dDEG> weight of the difference in degrees of matched parts\n"); |
---|
| 58 | printf(" <w_dNEU> weight of the difference in neurons of matched parts\n"); |
---|
| 59 | printf(" <w_dGEO> weight of the distance of matched parts\n\n"); |
---|
| 60 | printf("Switches:\n"); |
---|
| 61 | printf(" -names specifies that the number and names of genotypes are to be printed to output\n"); |
---|
| 62 | printf(" before the distance matrix; by default the number and names are not printed\n\n"); |
---|
| 63 | |
---|
| 64 | printf("Outputs a symmetric distance matrix in the format:\n"); |
---|
| 65 | printf(" <row_1> (columns in a row are separated by TABs)\n"); |
---|
| 66 | printf(" ...\n"); |
---|
| 67 | printf(" <row_n>\n"); |
---|
| 68 | |
---|
| 69 | return -1; |
---|
| 70 | } |
---|
| 71 | |
---|
| 72 | // prepare output parameters from .gen file |
---|
| 73 | vector<Geno *> *pvGenos = new vector<Geno *>(); |
---|
| 74 | vector<char *> *pvNames = new vector<char *>(); |
---|
| 75 | |
---|
| 76 | // check if there is a switch |
---|
| 77 | iCurrParam = 1; |
---|
| 78 | szCurrParam = argv[ iCurrParam ]; |
---|
| 79 | if (strcmp(szCurrParam, "-names") == 0) |
---|
| 80 | { |
---|
| 81 | // switch "-names" was given; print names also |
---|
| 82 | bPrintNames = true; |
---|
| 83 | // pass to the next parameter |
---|
| 84 | iCurrParam++; |
---|
| 85 | } |
---|
| 86 | |
---|
| 87 | // check the parameters |
---|
| 88 | // get <genotypesFile> name from command line |
---|
| 89 | char *szFileName = argv[ iCurrParam ]; |
---|
| 90 | |
---|
| 91 | // initially set measure components' weights to invalid values (negative) |
---|
| 92 | for (int i = 0; i < M.GetNOFactors(); i++) |
---|
| 93 | { |
---|
| 94 | M.m_adFactors[i] = -1.0; |
---|
| 95 | } |
---|
| 96 | |
---|
[352] | 97 | const char *params[] = {"<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>"}; |
---|
[349] | 98 | for (int i = 0; i < M.GetNOFactors(); i++) |
---|
| 99 | { |
---|
| 100 | iCurrParam++; |
---|
| 101 | szCurrParam = argv[ iCurrParam ]; |
---|
| 102 | nResult = sscanf(szCurrParam, " %lf ", & M.m_adFactors[ i ]); |
---|
| 103 | if (nResult != 1) |
---|
| 104 | { |
---|
| 105 | // <w_dP> is not a number -- error |
---|
| 106 | printf("%s", params[i]); |
---|
| 107 | printf(" should be a number\n"); |
---|
| 108 | return -1; |
---|
| 109 | } |
---|
| 110 | else |
---|
| 111 | { |
---|
| 112 | // <w_dP> is a number; check if nonnegative |
---|
| 113 | if (M.m_adFactors[ i ] < 0.0) |
---|
| 114 | { |
---|
| 115 | printf("%s", params[i]); |
---|
| 116 | printf(" should be a nonnegative number\n"); |
---|
| 117 | return -1; |
---|
| 118 | } |
---|
| 119 | } |
---|
| 120 | } |
---|
| 121 | |
---|
| 122 | // read the input file |
---|
| 123 | // prepare loading of genotypes from a .gen file |
---|
| 124 | // create some basic genotype converters |
---|
| 125 | PreconfiguredGenetics genetics; |
---|
| 126 | StdioFileSystem_autoselect stdiofilesys; |
---|
| 127 | |
---|
| 128 | long count = 0, totalsize = 0; |
---|
| 129 | MiniGenotypeLoader loader(szFileName); |
---|
| 130 | MiniGenotype *loaded; |
---|
| 131 | while (loaded = loader.loadNextGenotype()) |
---|
| 132 | { |
---|
| 133 | // while a valid genotype was loaded |
---|
| 134 | count++; |
---|
| 135 | totalsize += loaded->genotype.len(); |
---|
| 136 | // create a Geno object based on the MiniGenotype |
---|
| 137 | Geno *pNextGenotype = new Geno(loaded->genotype); |
---|
| 138 | if ((pNextGenotype != NULL) && (pNextGenotype->isValid())) |
---|
| 139 | { |
---|
| 140 | pvGenos->push_back(pNextGenotype); |
---|
| 141 | char *szNewName = new char [ loaded->name.len() + 1]; |
---|
| 142 | strcpy(szNewName, loaded->name.c_str()); |
---|
| 143 | pvNames->push_back(szNewName); |
---|
| 144 | } |
---|
| 145 | else |
---|
| 146 | { |
---|
| 147 | printf("Genotype %2li is not valid\n", count); |
---|
| 148 | } |
---|
| 149 | } |
---|
| 150 | if (loader.getStatus() == MiniGenotypeLoader::OnError) |
---|
| 151 | { |
---|
| 152 | printf("Error: %s", loader.getError().c_str()); |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | double dSimilarity = 0.0; |
---|
| 156 | double **aaSimil = NULL; // array of similarities |
---|
| 157 | |
---|
| 158 | // create the empty array of similarities |
---|
| 159 | aaSimil = new pDouble [pvGenos->size()]; |
---|
[361] | 160 | for (unsigned int k = 0; k < pvGenos->size(); k++) |
---|
[349] | 161 | { |
---|
| 162 | aaSimil[k] = new double [pvGenos->size()]; |
---|
[361] | 163 | for (unsigned int l = 0; l < pvGenos->size(); l++) |
---|
[349] | 164 | aaSimil[k][l] = 0.0; |
---|
| 165 | } |
---|
| 166 | |
---|
| 167 | // compute and remember similarities |
---|
[361] | 168 | for (unsigned int i = 0; i < pvGenos->size(); i++) |
---|
[349] | 169 | { |
---|
[361] | 170 | for (unsigned int j = 0; j < pvGenos->size(); j++) |
---|
[349] | 171 | { |
---|
| 172 | dSimilarity = M.EvaluateDistance(pvGenos->operator[](i), pvGenos->operator[](j)); |
---|
| 173 | aaSimil[i][j] = dSimilarity; |
---|
| 174 | } |
---|
| 175 | } |
---|
| 176 | |
---|
| 177 | if (bPrintNames) |
---|
| 178 | { |
---|
| 179 | // if "-names" switch was given, |
---|
| 180 | // print the number of genotypes and their names |
---|
| 181 | printf("%li\n", pvGenos->size()); |
---|
[361] | 182 | for (unsigned int iGen = 0; iGen < pvNames->size(); iGen++) |
---|
[349] | 183 | { |
---|
| 184 | printf("%s\n", pvNames->at(iGen)); |
---|
| 185 | } |
---|
[361] | 186 | } |
---|
[349] | 187 | |
---|
| 188 | // print out the matrix of similarities |
---|
[361] | 189 | for (unsigned int i = 0; i < pvGenos->size(); i++) |
---|
[349] | 190 | { |
---|
[361] | 191 | for (unsigned int j = 0; j < pvGenos->size(); j++) |
---|
[349] | 192 | { |
---|
| 193 | printf("%.2lf\t", aaSimil[i][j]); |
---|
| 194 | } |
---|
| 195 | printf("\n"); |
---|
| 196 | } |
---|
| 197 | |
---|
| 198 | // delete vectors and arrays |
---|
[361] | 199 | for (unsigned int i = 0; i < pvGenos->size(); i++) |
---|
[349] | 200 | { |
---|
| 201 | delete pvGenos->operator[](i); |
---|
| 202 | delete [] pvNames->operator[](i); |
---|
| 203 | delete [] aaSimil[i]; |
---|
| 204 | } |
---|
| 205 | |
---|
| 206 | delete pvGenos; |
---|
| 207 | delete pvNames; |
---|
| 208 | delete [] aaSimil; |
---|
| 209 | |
---|
| 210 | return 0; |
---|
[372] | 211 | } |
---|