source: cpp/frams/_demos/simil_test.cpp @ 349

Last change on this file since 349 was 349, checked in by oriona, 10 years ago

implementation of the similarity measure

File size: 6.9 KB
Line 
1// This file is a part of Framsticks SDK.  http://www.framsticks.com/
2// Copyright (C) 1999-2015  Maciej Komosinski and Szymon Ulatowski.
3// See LICENSE.txt for details.
4
5
6#include <vector>
7#include "frams/errmgr/stdouterr.h"
8#include "frams/_demos/genotypeloader.h"
9#include "frams/genetics/preconfigured.h"
10#include "frams/virtfile/stdiofile.h"
11#include "frams/model/similarity/simil_model.h"
12
13
14
15using namespace std;
16
17/** Computes a matrix of distances between all genotypes in the specified
18    .gen file, using the matching and measure weights as specified in the
19    command line.
20
21    Command line parameters: [-names] <genotypesFile> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO>
22
23    Parameters:
24      <genotypesFile> name of a file with genotypes (only f1 format)
25      <w_dP> weight of the difference in the number of parts
26      <w_dDEG> weight of the difference in degrees of matched parts
27      <w_dNEU> weight of the difference in neurons of matched parts
28      <w_dGEO> weight of the distance of matched parts
29
30    Switches:
31      -names specifies that the number and names of genotypes are to be printed to output
32        before the distance matrix; by default the number and names are not printed
33
34    Outputs a distance matrix in the format:
35    <row_1> (columns in a row are separated by TABs)
36    ...
37    <row_n>
38
39    Last modified: 2015-04-16
40 */
41int main(int argc, char *argv[])
42{
43    //StdoutErrorHandler err; //?TODO
44    typedef double *pDouble;
45    int iCurrParam = 0; // index of the currently processed parameter
46    char *szCurrParam = NULL;
47    ModelSimil M; // similarity computing object
48    bool bPrintNames = false; // specifies if names of genotypes are to be printed
49    int nResult = 0; // a temporary result
50
51    if (argc < 6)
52    {
53        // too few parameters
54        printf("Too few parameters!\n");
55        printf("Command line: [-names] <genotypesFile> <matchType> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <ifFUZZY>\n\n");
56        printf("Parameters:\n");
57        printf(" <genotypesFile> name of a file with genotypes (only f1 format is allowed)\n");
58        printf(" <w_dP> weight of the difference in the number of parts\n");
59        printf(" <w_dDEG> weight of the difference in degrees of matched parts\n");
60        printf(" <w_dNEU> weight of the difference in neurons of matched parts\n");
61        printf(" <w_dGEO> weight of the distance of matched parts\n\n");
62        printf("Switches:\n");
63        printf(" -names specifies that the number and names of genotypes are to be printed to output\n");
64        printf("   before the distance matrix; by default the number and names are not printed\n\n");
65
66        printf("Outputs a symmetric distance matrix in the format:\n");
67        printf(" <row_1> (columns in a row are separated by TABs)\n");
68        printf(" ...\n");
69        printf(" <row_n>\n");
70
71        return -1;
72    }
73
74    // prepare output parameters from .gen file
75    vector<Geno *> *pvGenos = new vector<Geno *>();
76    vector<char *> *pvNames = new vector<char *>();
77
78    // check if there is a switch
79    iCurrParam = 1;
80    szCurrParam = argv[ iCurrParam ];
81    if (strcmp(szCurrParam, "-names") == 0)
82    {
83        // switch "-names" was given; print names also
84        bPrintNames = true;
85        // pass to the next parameter
86        iCurrParam++;
87    }
88
89    // check the parameters
90    // get <genotypesFile> name from command line
91    char *szFileName = argv[ iCurrParam ];
92
93    // initially set measure components' weights to invalid values (negative)
94    for (int i = 0; i < M.GetNOFactors(); i++)
95    {
96        M.m_adFactors[i] = -1.0;
97    }
98
99    char *params[] = {"<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>"};
100    for (int i = 0; i < M.GetNOFactors(); i++)
101    {
102        iCurrParam++;
103        szCurrParam = argv[ iCurrParam ];
104        nResult = sscanf(szCurrParam, " %lf ", & M.m_adFactors[ i ]);
105        if (nResult != 1)
106        {
107            // <w_dP> is not a number -- error
108            printf("%s", params[i]);
109            printf(" should be a number\n");
110            return -1;
111        }
112        else
113        {
114            // <w_dP> is a number; check if nonnegative
115            if (M.m_adFactors[ i ] < 0.0)
116            {
117                printf("%s", params[i]);
118                printf(" should be a nonnegative number\n");
119                return -1;
120            }
121        }
122    }
123
124    // read the input file
125    // prepare loading of genotypes from a .gen file
126    // create some basic genotype converters
127    PreconfiguredGenetics genetics;
128    StdioFileSystem_autoselect stdiofilesys;
129
130    long count = 0, totalsize = 0;
131    MiniGenotypeLoader loader(szFileName);
132    MiniGenotype *loaded;
133    while (loaded = loader.loadNextGenotype())
134    {
135        // while a valid genotype was loaded
136        count++;
137        totalsize += loaded->genotype.len();
138        // create a Geno object based on the MiniGenotype
139        Geno *pNextGenotype = new Geno(loaded->genotype);
140        if ((pNextGenotype != NULL) && (pNextGenotype->isValid()))
141        {
142            pvGenos->push_back(pNextGenotype);
143            char *szNewName = new char [ loaded->name.len() + 1];
144            strcpy(szNewName, loaded->name.c_str());
145            pvNames->push_back(szNewName);
146        }
147        else
148        {
149            printf("Genotype %2li is not valid\n", count);
150        }
151    }
152    if (loader.getStatus() == MiniGenotypeLoader::OnError)
153    {
154        printf("Error: %s", loader.getError().c_str());
155    }
156
157    double dSimilarity = 0.0;
158    double **aaSimil = NULL; // array of similarities
159
160    // create the empty array of similarities
161    aaSimil = new pDouble [pvGenos->size()];
162    for (unsigned int k = 0; k < pvGenos->size(); k++)
163    {
164        aaSimil[k] = new double [pvGenos->size()];
165        for (unsigned int l = 0; l < pvGenos->size(); l++)
166            aaSimil[k][l] = 0.0;
167    }
168
169    // compute and remember similarities
170    unsigned int i, j;
171    for (i = 0; i < pvGenos->size(); i++)
172    {
173        for (j = 0; j < pvGenos->size(); j++)
174        {
175            dSimilarity = M.EvaluateDistance(pvGenos->operator[](i), pvGenos->operator[](j));
176            aaSimil[i][j] = dSimilarity;
177        }
178    }
179
180    if (bPrintNames)
181    {
182        // if "-names" switch was given,
183        // print the number of genotypes and their names
184        printf("%li\n", pvGenos->size());
185        unsigned int iGen;
186        for (iGen = 0; iGen < pvNames->size(); iGen++)
187        {
188            printf("%s\n", pvNames->at(iGen));
189        }
190    } // if (bPrintNames)
191
192    // print out the matrix of similarities
193    for (i = 0; i < pvGenos->size(); i++)
194    {
195        for (j = 0; j < pvGenos->size(); j++)
196        {
197            printf("%.2lf\t", aaSimil[i][j]);
198        }
199        printf("\n");
200    }
201
202    // delete vectors and arrays
203    for (i = 0; i < pvGenos->size(); i++)
204    {
205        delete pvGenos->operator[](i);
206        delete [] pvNames->operator[](i);
207        delete [] aaSimil[i];
208    }
209
210    delete pvGenos;
211    delete pvNames;
212    delete [] aaSimil;
213
214    return 0;
215}
Note: See TracBrowser for help on using the repository browser.