source: cpp/frams/_demos/simil_test.cpp @ 1044

Last change on this file since 1044 was 1044, checked in by oriona, 4 years ago

Similarity measures code refactored. Distribution-based similarity measure added.

File size: 7.3 KB
Line 
1// This file is a part of Framsticks SDK.  http://www.framsticks.com/
2// Copyright (C) 1999-2020  Maciej Komosinski and Szymon Ulatowski.
3// See LICENSE.txt for details.
4
5
6#include <vector>
7#include <string>
8#include "common/loggers/loggertostdout.h"
9#include "frams/_demos/genotypeloader.h"
10#include "frams/genetics/preconfigured.h"
11#include "common/virtfile/stdiofile.h"
12#include "frams/model/similarity/measure-distribution.h"
13#include "frams/model/similarity/measure-greedy.h"
14#include "frams/model/similarity/measure-hungarian.h"
15
16using namespace std;
17
18int add_double_param(std::vector<string> *args, int pos, std::vector<double> *params, std::vector<string> *params_names)
19{
20    for (unsigned int i = 0; i < params_names->size(); i++)
21    {
22        try
23        {
24            params->push_back(std::stod(args->at(pos)));
25            pos++;
26        }
27        catch (const std::invalid_argument&)
28        {
29            printf("%s should be a number\n", params_names->at(i).c_str());
30            return -1;
31        }
32        catch (const std::out_of_range&)
33        {
34            printf("%s should be inside double range\n", params_names->at(i).c_str());
35            return -1;
36        }
37    }
38    return 0;
39}
40
41/** Computes a matrix of distances between all genotypes in the specified
42        .gen file, using the matching and measure weights as specified in the
43        command line. */
44int main(int argc, char *argv[])
45{
46    typedef double *pDouble;
47    LoggerToStdout messages_to_stdout(LoggerBase::Enable);
48    SimilMeasure *simil_measure = nullptr;
49    if (argc < 5)
50    {
51        printf("Too few parameters!\n");
52        printf("Command line: [-names] <genotypesFile> <measure (greedy/hungarian)> <w_dP> <w_dDEG> <w_dNEU> <w_dGEO> <fixZaxis?>\n\n");
53        printf("Command line: [-names] <genotypesFile> <measure (distribution)> <desc> <simil> <dens> <bins> <samp_num>\n\n");
54        printf("Parameters:\n");
55        printf("  <genotypesFile> name of a file with genotypes\n");
56        printf("  <measure> similarity measure name (greedy/hungarian/distribution)\n");
57        printf("\n");
58        printf("Parameters of greedy and hungarian measures:\n");
59        printf("  <w_dP> weight of the difference in the number of parts\n");
60        printf("  <w_dDEG> weight of the difference in degrees of matched parts\n");
61        printf("  <w_dNEU> weight of the difference in neurons of matched parts\n");
62        printf("  <w_dGEO> weight of the distance of matched parts\n");
63        printf("  <fixZaxis?> should the 'z' (vertical) coordinate be fixed during the alignment? (0 or 1)\n\n");
64        printf("Parameters of distribution measure:\n");
65        printf("  <dens> sampling density\n");
66        printf("  <bins> number of histogram bins\n");
67        printf("  <samp_num> number of samples taken\n\n");
68
69        printf("Switches:\n");
70        printf("  -names specifies that the number and names of genotypes are to be printed to output\n");
71        printf("  before the distance matrix; by default the number and names are not printed\n\n");
72
73        printf("Outputs a symmetric distance matrix in the format:\n");
74        printf("  <row_1> (columns in a row are separated by TABs)\n");
75        printf("  ...\n");
76        printf("  <row_n>\n");
77
78        return -1;
79    }
80
81    std::vector<string> args;
82    for (int i = 1; i < argc; i++)
83        args.push_back(std::string(argv[i]));
84
85    bool print_names = false;
86
87    int pos = 1;
88    if (args.at(0).compare("-names")==0)
89    {
90        print_names = true;
91        pos = 2;
92    }
93
94    string measure_name = args.at(pos);
95    pos++;
96    std::vector<double> params;
97
98    if (measure_name.compare("greedy")==0 || measure_name.compare("hungarian")==0)
99    {
100        std::vector<string> params_names{ "<w_dP>", "<w_dDEG>", "<w_dNEU>", "<w_dGEO>",  "<fixZaxis?>" };
101
102        if (add_double_param(&args, pos, &params, &params_names) == -1)
103            return -1;
104
105        if (measure_name.compare("greedy")==0)
106            simil_measure = new SimilMeasureGreedy();
107        else
108            simil_measure = new SimilMeasureHungarian();
109    }
110
111
112    else if (measure_name.compare("distribution")==0)
113    {
114        std::vector<string> params_names{ "<dens>", "<bins>", "<samp_num>" };
115
116        if (add_double_param(&args, pos, &params, &params_names)==-1)
117            return -1;
118
119        simil_measure = new SimilMeasureDistribution();
120    }
121
122    else
123    {
124        printf("Measure type should be greedy (flexible criteria order and optimal matching), hungarian (vertex degree order and greedy matching) or distribution!\n");
125        return -1;
126    }
127
128    simil_measure->setParams(params);
129
130    // read the input file
131    // prepare loading of genotypes from a .gen file
132    // create some basic genotype converters
133    PreconfiguredGenetics genetics;
134    StdioFileSystem_autoselect stdiofilesys;
135
136    // prepare output parameters from .gen file
137    vector<Geno *> pvGenos;
138    vector<char *> pvNames;
139
140    long count = 0, totalsize = 0;
141    GenotypeMiniLoader loader(args.at(0).c_str());
142    GenotypeMini *loaded;
143    while (loaded = loader.loadNextGenotype())
144    {
145        // while a valid genotype was loaded
146        count++;
147        totalsize += loaded->genotype.length();
148        // create a Geno object based on the MiniGenotype
149        Geno *pNextGenotype = new Geno(loaded->genotype);
150        if ((pNextGenotype != NULL) && (pNextGenotype->isValid()))
151        {
152            pvGenos.push_back(pNextGenotype);
153            char *szNewName = new char[loaded->name.length() + 1];
154            strcpy(szNewName, loaded->name.c_str());
155            pvNames.push_back(szNewName);
156        }
157        else
158        {
159            printf("Genotype %2li is not valid\n", count);
160            if (pNextGenotype != NULL) delete pNextGenotype;
161        }
162    }
163    if (loader.getStatus() == GenotypeMiniLoader::OnError)
164    {
165        printf("Error: %s", loader.getError().c_str());
166    }
167
168    double dSimilarity = 0.0;
169    double **aaSimil = NULL; // array of similarities
170
171    // create the empty array of similarities
172    aaSimil = new pDouble[pvGenos.size()];
173    for (unsigned int k = 0; k < pvGenos.size(); k++)
174    {
175        aaSimil[k] = new double[pvGenos.size()];
176        for (unsigned int l = 0; l < pvGenos.size(); l++)
177            aaSimil[k][l] = 0.0;
178    }
179
180
181
182    // compute and remember similarities
183    for (unsigned int i = 0; i < pvGenos.size(); i++)
184    {
185        for (unsigned int j = 0; j < pvGenos.size(); j++)
186        {
187            dSimilarity = simil_measure->evaluateDistance(pvGenos.operator[](i), pvGenos.operator[](j));
188            aaSimil[i][j] = dSimilarity;
189        }
190    }
191
192    if (print_names)
193    {
194        // if "-names" switch was given, print the number of genotypes and their names
195        printf("%li\n", pvGenos.size());
196        for (unsigned int iGen = 0; iGen < pvNames.size(); iGen++)
197        {
198            printf("%s\n", pvNames.at(iGen));
199        }
200    }
201
202    // print out the matrix of similarities
203    for (unsigned int i = 0; i < pvGenos.size(); i++)
204    {
205        for (unsigned int j = 0; j < pvGenos.size(); j++)
206        {
207            printf("%.2lf\t", aaSimil[i][j]);
208        }
209        printf("\n");
210    }
211
212    // delete vectors and arrays
213    for (unsigned int i = 0; i < pvGenos.size(); i++)
214    {
215        delete pvGenos.operator[](i);
216        delete[] pvNames.operator[](i);
217        delete[] aaSimil[i];
218    }
219
220    delete[] aaSimil;
221    delete simil_measure;
222
223    return 0;
224}
Note: See TracBrowser for help on using the repository browser.