1 | // This file is a part of Framsticks SDK. http://www.framsticks.com/ |
---|
2 | // Copyright (C) 1999-2015 Maciej Komosinski and Szymon Ulatowski. |
---|
3 | // See LICENSE.txt for details. |
---|
4 | |
---|
5 | |
---|
6 | #include <vector> |
---|
7 | #include "common/loggers/loggertostdout.h" |
---|
8 | #include "frams/_demos/genotypeloader.h" |
---|
9 | #include "frams/genetics/preconfigured.h" |
---|
10 | #include "common/virtfile/stdiofile.h" |
---|
11 | #include "frams/model/similarity/simil_model.h" |
---|
12 | |
---|
13 | |
---|
14 | |
---|
15 | using namespace std; |
---|
16 | |
---|
17 | /** Computes a matrix of distances between all genotypes in the specified |
---|
18 | .gen file, using the matching and measure weights as specified in the |
---|
19 | command line. |
---|
20 | |
---|
21 | Command line parameters: [-names] <genotypesFile> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> |
---|
22 | |
---|
23 | Parameters: |
---|
24 | <genotypesFile> name of a file with genotypes |
---|
25 | <w_dP> weight of the difference in the number of parts |
---|
26 | <w_dDEG> weight of the difference in degrees of matched parts |
---|
27 | <w_dNEU> weight of the difference in neurons of matched parts |
---|
28 | <w_dGEO> weight of the distance between matched parts |
---|
29 | |
---|
30 | Switches: |
---|
31 | -names specifies that the number and names of genotypes are to be printed to output |
---|
32 | before the distance matrix; by default the number and names are not printed |
---|
33 | |
---|
34 | Outputs a distance matrix in the format: |
---|
35 | <row_1> (columns in a row are separated by TABs) |
---|
36 | ... |
---|
37 | <row_n> |
---|
38 | */ |
---|
39 | int main(int argc, char *argv[]) |
---|
40 | { |
---|
41 | LoggerToStdout messages_to_stdout(LoggerBase::Enable); |
---|
42 | typedef double *pDouble; |
---|
43 | int iCurrParam = 0; // index of the currently processed parameter |
---|
44 | char *szCurrParam = NULL; |
---|
45 | ModelSimil M; // similarity computing object |
---|
46 | bool bPrintNames = false; // specifies if names of genotypes are to be printed |
---|
47 | int nResult = 0; // a temporary result |
---|
48 | |
---|
49 | if (argc < 7) |
---|
50 | { |
---|
51 | // too few parameters |
---|
52 | printf("Too few parameters!\n"); |
---|
53 | printf("Command line: [-names] <genotypesFile> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <isZfixed>\n\n"); |
---|
54 | printf("Parameters:\n"); |
---|
55 | printf(" <genotypesFile> name of a file with genotypes\n"); |
---|
56 | printf(" <w_dP> weight of the difference in the number of parts\n"); |
---|
57 | printf(" <w_dDEG> weight of the difference in degrees of matched parts\n"); |
---|
58 | printf(" <w_dNEU> weight of the difference in neurons of matched parts\n"); |
---|
59 | printf(" <w_dGEO> weight of the distance of matched parts\n\n"); |
---|
60 | printf(" <isZFixed> should z cooridante be fixed during the alignment\n"); |
---|
61 | printf("Switches:\n"); |
---|
62 | printf(" -names specifies that the number and names of genotypes are to be printed to output\n"); |
---|
63 | printf(" before the distance matrix; by default the number and names are not printed\n\n"); |
---|
64 | |
---|
65 | printf("Outputs a symmetric distance matrix in the format:\n"); |
---|
66 | printf(" <row_1> (columns in a row are separated by TABs)\n"); |
---|
67 | printf(" ...\n"); |
---|
68 | printf(" <row_n>\n"); |
---|
69 | |
---|
70 | return -1; |
---|
71 | } |
---|
72 | |
---|
73 | // prepare output parameters from .gen file |
---|
74 | vector<Geno *> pvGenos; |
---|
75 | vector<char *> pvNames; |
---|
76 | |
---|
77 | // check if there is a switch |
---|
78 | iCurrParam = 1; |
---|
79 | szCurrParam = argv[ iCurrParam ]; |
---|
80 | if (strcmp(szCurrParam, "-names") == 0) |
---|
81 | { |
---|
82 | // switch "-names" was given; print names also |
---|
83 | bPrintNames = true; |
---|
84 | // pass to the next parameter |
---|
85 | iCurrParam++; |
---|
86 | } |
---|
87 | |
---|
88 | // check the parameters |
---|
89 | // get <genotypesFile> name from command line |
---|
90 | char *szFileName = argv[ iCurrParam ]; |
---|
91 | |
---|
92 | // initially set measure components' weights to invalid values (negative) |
---|
93 | for (int i = 0; i < M.GetNOFactors(); i++) |
---|
94 | { |
---|
95 | M.m_adFactors[i] = -1.0; |
---|
96 | } |
---|
97 | |
---|
98 | const char *params[] = {"<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>"}; |
---|
99 | for (int i = 0; i < M.GetNOFactors(); i++) |
---|
100 | { |
---|
101 | iCurrParam++; |
---|
102 | szCurrParam = argv[ iCurrParam ]; |
---|
103 | nResult = sscanf(szCurrParam, " %lf ", & M.m_adFactors[ i ]); |
---|
104 | if (nResult != 1) |
---|
105 | { |
---|
106 | // <w_dX> is not a number -- error |
---|
107 | printf("%s", params[i]); |
---|
108 | printf(" should be a number\n"); |
---|
109 | return -1; |
---|
110 | } |
---|
111 | else |
---|
112 | { |
---|
113 | // <w_dX> is a number; check if nonnegative |
---|
114 | if (M.m_adFactors[ i ] < 0.0) |
---|
115 | { |
---|
116 | printf("%s", params[i]); |
---|
117 | printf(" should be a nonnegative number\n"); |
---|
118 | return -1; |
---|
119 | } |
---|
120 | } |
---|
121 | } |
---|
122 | |
---|
123 | iCurrParam++; |
---|
124 | szCurrParam = argv[ iCurrParam ]; |
---|
125 | nResult = sscanf(szCurrParam, " %d", & M.zFixed); |
---|
126 | if (nResult != 1) |
---|
127 | { |
---|
128 | // <isZFixed> is not a number -- error |
---|
129 | printf("<isZFixed> should be a number\n"); |
---|
130 | return -1; |
---|
131 | } |
---|
132 | else if (M.zFixed != 0 && M.zFixed !=1) |
---|
133 | { |
---|
134 | printf("<isZFixed>=%d. <isZFixed> should be equal to 0 or 1\n", M.zFixed); |
---|
135 | return -1; |
---|
136 | } |
---|
137 | |
---|
138 | // read the input file |
---|
139 | // prepare loading of genotypes from a .gen file |
---|
140 | // create some basic genotype converters |
---|
141 | PreconfiguredGenetics genetics; |
---|
142 | StdioFileSystem_autoselect stdiofilesys; |
---|
143 | |
---|
144 | long count = 0, totalsize = 0; |
---|
145 | MiniGenotypeLoader loader(szFileName); |
---|
146 | MiniGenotype *loaded; |
---|
147 | while (loaded = loader.loadNextGenotype()) |
---|
148 | { |
---|
149 | // while a valid genotype was loaded |
---|
150 | count++; |
---|
151 | totalsize += loaded->genotype.len(); |
---|
152 | // create a Geno object based on the MiniGenotype |
---|
153 | Geno *pNextGenotype = new Geno(loaded->genotype); |
---|
154 | if ((pNextGenotype != NULL) && (pNextGenotype->isValid())) |
---|
155 | { |
---|
156 | pvGenos.push_back(pNextGenotype); |
---|
157 | char *szNewName = new char [ loaded->name.len() + 1]; |
---|
158 | strcpy(szNewName, loaded->name.c_str()); |
---|
159 | pvNames.push_back(szNewName); |
---|
160 | } |
---|
161 | else |
---|
162 | { |
---|
163 | printf("Genotype %2li is not valid\n", count); |
---|
164 | if (pNextGenotype!=NULL) delete pNextGenotype; |
---|
165 | } |
---|
166 | } |
---|
167 | if (loader.getStatus() == MiniGenotypeLoader::OnError) |
---|
168 | { |
---|
169 | printf("Error: %s", loader.getError().c_str()); |
---|
170 | } |
---|
171 | |
---|
172 | double dSimilarity = 0.0; |
---|
173 | double **aaSimil = NULL; // array of similarities |
---|
174 | |
---|
175 | // create the empty array of similarities |
---|
176 | aaSimil = new pDouble [pvGenos.size()]; |
---|
177 | for (unsigned int k = 0; k < pvGenos.size(); k++) |
---|
178 | { |
---|
179 | aaSimil[k] = new double [pvGenos.size()]; |
---|
180 | for (unsigned int l = 0; l < pvGenos.size(); l++) |
---|
181 | aaSimil[k][l] = 0.0; |
---|
182 | } |
---|
183 | |
---|
184 | // compute and remember similarities |
---|
185 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
---|
186 | { |
---|
187 | for (unsigned int j = 0; j < pvGenos.size(); j++) |
---|
188 | { |
---|
189 | dSimilarity = M.EvaluateDistance(pvGenos.operator[](i), pvGenos.operator[](j)); |
---|
190 | aaSimil[i][j] = dSimilarity; |
---|
191 | } |
---|
192 | } |
---|
193 | |
---|
194 | if (bPrintNames) |
---|
195 | { |
---|
196 | // if "-names" switch was given, |
---|
197 | // print the number of genotypes and their names |
---|
198 | printf("%li\n", pvGenos.size()); |
---|
199 | for (unsigned int iGen = 0; iGen < pvNames.size(); iGen++) |
---|
200 | { |
---|
201 | printf("%s\n", pvNames.at(iGen)); |
---|
202 | } |
---|
203 | } |
---|
204 | |
---|
205 | // print out the matrix of similarities |
---|
206 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
---|
207 | { |
---|
208 | for (unsigned int j = 0; j < pvGenos.size(); j++) |
---|
209 | { |
---|
210 | printf("%.2lf\t", aaSimil[i][j]); |
---|
211 | } |
---|
212 | printf("\n"); |
---|
213 | } |
---|
214 | |
---|
215 | // delete vectors and arrays |
---|
216 | for (unsigned int i = 0; i < pvGenos.size(); i++) |
---|
217 | { |
---|
218 | delete pvGenos.operator[](i); |
---|
219 | delete [] pvNames.operator[](i); |
---|
220 | delete [] aaSimil[i]; |
---|
221 | } |
---|
222 | |
---|
223 | delete [] aaSimil; |
---|
224 | |
---|
225 | return 0; |
---|
226 | } |
---|