1 | #include <frams/util/sstring.h> |
---|
2 | #include <vector> |
---|
3 | #include <frams/param/param.h> |
---|
4 | #include "fB_conv.h" |
---|
5 | #include "fB_general.h" |
---|
6 | #include "fB_oper.h" |
---|
7 | |
---|
8 | #define FIELDSTRUCT Geno_fB |
---|
9 | |
---|
10 | static ParamEntry GENOfBparam_tab[] = |
---|
11 | { |
---|
12 | { "Genetics: fB", 3, FB_MUT_COUNT + FB_XOVER_COUNT, }, // ask about it |
---|
13 | { "Genetics: fB: Mutation", }, |
---|
14 | { "Genetics: fB: Crossover", }, |
---|
15 | { "fB_mut_substitution", 1, 0, "Substitution", "f 0 1 0.6", FIELD(mutationprobs[FB_SUBSTITUTION]), "Probability of mutation by changing single random letter in genotype", }, |
---|
16 | { "fB_mut_insertion", 1, 0, "Insertion", "f 0 1 0.1", FIELD(mutationprobs[FB_INSERTION]), "Probability of mutation by inserting characters in random place of genotype", }, |
---|
17 | { "fB_mut_deletion", 1, 0, "Deletion", "f 0 1 0.1", FIELD(mutationprobs[FB_DELETION]), "Probability of mutation by deleting random characters in genotype", }, |
---|
18 | { "fB_mut_duplication", 1, 0, "Duplication", "f 0 1 0.05", FIELD(mutationprobs[FB_DUPLICATION]), "Probability of mutation by copying single *gene* of genotype and appending it to the beginning of this genotype", }, |
---|
19 | { "fB_mut_translocation", 1, 0, "Translocation", "f 0 1 0.15", FIELD(mutationprobs[FB_TRANSLOCATION]), "Probability of mutation by replacing two substrings in genotype", }, |
---|
20 | { "fB_cross_gene_transfer", 2, 0, "Horizontal gene transfer", "f 0 1 0.8", FIELD(crossoverprobs[FB_GENE_TRANSFER]), "Probability of crossing over by transferring single genes from both parents to beginning of each other", }, |
---|
21 | { "fB_cross_crossover", 2, 0, "Crossing over", "f 0 1 0.2", FIELD(crossoverprobs[FB_CROSSING_OVER]), "Probability of crossing over by random distribution of genes from both parents to both children", }, |
---|
22 | { 0, }, |
---|
23 | }; |
---|
24 | |
---|
25 | #undef FIELDSTRUCT |
---|
26 | |
---|
27 | Geno_fB::Geno_fB() |
---|
28 | { |
---|
29 | par.setParamTab(GENOfBparam_tab); |
---|
30 | par.select(this); |
---|
31 | par.setDefault(); |
---|
32 | supported_format = 'B'; |
---|
33 | } |
---|
34 | |
---|
35 | bool Geno_fB::hasStick(SString genotype) |
---|
36 | { |
---|
37 | for (int i = 0; i < fB_GenoHelpers::geneCount(genotype); i++) |
---|
38 | { |
---|
39 | int start, end; |
---|
40 | SString gene = fB_GenoHelpers::getGene(i, genotype, start, end); |
---|
41 | int endoffset = 0; |
---|
42 | if (gene.indexOf("zz", 0) != -1) endoffset = 2; |
---|
43 | if (gene.len() - endoffset < 3) |
---|
44 | { |
---|
45 | return true; // genes with length < 3 are always sticks |
---|
46 | } |
---|
47 | else if (gene[2] >= 'a' && gene[2] <= 'i') |
---|
48 | { |
---|
49 | return true; // gene within this range is stick |
---|
50 | } |
---|
51 | } |
---|
52 | return false; |
---|
53 | } |
---|
54 | |
---|
55 | int Geno_fB::checkValidity(const char *geno, const char *genoname) |
---|
56 | { |
---|
57 | // load genotype |
---|
58 | SString genotype(geno); |
---|
59 | SString line; |
---|
60 | int pos = 0; |
---|
61 | // if there is no genotype to load, then return error |
---|
62 | if (!genotype.getNextToken(pos, line, '\n')) |
---|
63 | { |
---|
64 | return pos + 1; |
---|
65 | } |
---|
66 | // extract dimensions |
---|
67 | int dims = 0; |
---|
68 | if (!ExtValue::parseInt(line.c_str(), dims, true, false)) |
---|
69 | { |
---|
70 | return 1; |
---|
71 | } |
---|
72 | // extract next token in order to check if next line starts with "aa" |
---|
73 | int genstart = genotype.indexOf("aa", 0); |
---|
74 | if (genstart != pos) |
---|
75 | { |
---|
76 | return pos + 1; |
---|
77 | } |
---|
78 | // check if rest of characters are lowercase |
---|
79 | for (int i = genstart; i < genotype.len(); i++) |
---|
80 | { |
---|
81 | if (!islower(genotype[i])) |
---|
82 | { |
---|
83 | return i + 1; |
---|
84 | } |
---|
85 | } |
---|
86 | if (!hasStick(genotype)) |
---|
87 | { |
---|
88 | return 1; |
---|
89 | } |
---|
90 | return GENOPER_OK; |
---|
91 | } |
---|
92 | |
---|
93 | int Geno_fB::validate(char *&geno, const char *genoname) |
---|
94 | { |
---|
95 | // load genotype |
---|
96 | SString genotype(geno); |
---|
97 | SString strdims; |
---|
98 | int pos = 0; |
---|
99 | if (!genotype.getNextToken(pos, strdims, '\n')) |
---|
100 | { |
---|
101 | return GENOPER_OPFAIL; |
---|
102 | } |
---|
103 | // parse dimension |
---|
104 | int dims = 0; |
---|
105 | if (!ExtValue::parseInt(strdims.c_str(), dims, true, false)) |
---|
106 | { |
---|
107 | return GENOPER_OPFAIL; |
---|
108 | } |
---|
109 | SString line; |
---|
110 | bool fix = false; |
---|
111 | int genstart = genotype.indexOf("aa", 0); |
---|
112 | // if there is no "aa" codon in the beginning of a genotype, then add it |
---|
113 | if (genstart != pos) |
---|
114 | { |
---|
115 | genotype = strdims + "\naa" + genotype.substr(pos); |
---|
116 | fix = true; |
---|
117 | } |
---|
118 | for (int i = pos; i < genotype.len(); i++) |
---|
119 | { |
---|
120 | // if character is not alphabetic - error |
---|
121 | if (!isalpha(genotype[i])) |
---|
122 | { |
---|
123 | return GENOPER_OPFAIL; |
---|
124 | } |
---|
125 | // if character is uppercase, then convert it to lowercase |
---|
126 | if (isupper(genotype[i])) |
---|
127 | { |
---|
128 | genotype.directWrite()[i] = tolower(genotype[i]); |
---|
129 | fix = true; |
---|
130 | } |
---|
131 | } |
---|
132 | // if the genotype does not contain any stick - add it |
---|
133 | if (!hasStick(genotype)) |
---|
134 | { |
---|
135 | genotype = SString("aaazz") + genotype; |
---|
136 | } |
---|
137 | // if there were any changes - save them |
---|
138 | if (fix) |
---|
139 | { |
---|
140 | free(geno); |
---|
141 | geno = strdup(genotype.c_str()); |
---|
142 | } |
---|
143 | return GENOPER_OK; |
---|
144 | } |
---|
145 | |
---|
146 | int Geno_fB::mutate(char *&geno, float &chg, int &method) |
---|
147 | { |
---|
148 | SString genotype(geno); |
---|
149 | SString strdims; |
---|
150 | int pos = 0; |
---|
151 | genotype.getNextToken(pos, strdims, '\n'); |
---|
152 | SString line; |
---|
153 | genotype.getNextToken(pos, line, '\n'); |
---|
154 | method = roulette(mutationprobs, FB_MUT_COUNT); |
---|
155 | switch (method) |
---|
156 | { |
---|
157 | case FB_SUBSTITUTION: |
---|
158 | { |
---|
159 | int rndid = randomN(line.len()); // select random letter from genotype |
---|
160 | // increment/decrement character - when overflow happens, this method |
---|
161 | // uses reflect method |
---|
162 | if (randomN(2) == 0) |
---|
163 | { |
---|
164 | if (line[rndid] == 'a') line.directWrite()[rndid] = 'b'; |
---|
165 | else line.directWrite()[rndid] = line[rndid] - 1; |
---|
166 | } |
---|
167 | else |
---|
168 | { |
---|
169 | if (line[rndid] == 'z') line.directWrite()[rndid] = 'y'; |
---|
170 | else line.directWrite()[rndid] = line[rndid] + 1; |
---|
171 | } |
---|
172 | chg = 1.0 / line.len(); |
---|
173 | break; |
---|
174 | } |
---|
175 | case FB_INSERTION: |
---|
176 | { |
---|
177 | chg = 1.0 / line.len(); |
---|
178 | int rndid = randomN(genotype.len()); // select random insertion point |
---|
179 | char letter = 'a' + randomN(26); |
---|
180 | SString result = line.substr(0, rndid); |
---|
181 | result += letter; |
---|
182 | result += line.substr(rndid); |
---|
183 | line = result; |
---|
184 | break; |
---|
185 | } |
---|
186 | case FB_DELETION: |
---|
187 | { |
---|
188 | chg = 1.0 / line.len(); |
---|
189 | int rndid = randomN(line.len()); // select random insertion point |
---|
190 | if (rndid == line.len() - 1) |
---|
191 | { |
---|
192 | line = line.substr(0, line.len() - 1); |
---|
193 | } |
---|
194 | else |
---|
195 | { |
---|
196 | line = line.substr(0, rndid) + line.substr(rndid + 1); |
---|
197 | } |
---|
198 | break; |
---|
199 | } |
---|
200 | case FB_DUPLICATION: |
---|
201 | { |
---|
202 | int rndgene = randomN(fB_GenoHelpers::geneCount(line)); |
---|
203 | int start, end; |
---|
204 | SString gene = fB_GenoHelpers::getGene(rndgene, line, start, end); |
---|
205 | if (gene.indexOf("zz", 0) == -1) gene += "zz"; |
---|
206 | chg = (float)gene.len() / line.len(); |
---|
207 | line = gene + line; |
---|
208 | break; |
---|
209 | } |
---|
210 | case FB_TRANSLOCATION: |
---|
211 | { |
---|
212 | std::vector<int> cuts(4); |
---|
213 | for (int i = 0; i < 4; i++) |
---|
214 | { |
---|
215 | cuts[i] = randomN(line.len()); |
---|
216 | } |
---|
217 | std::sort(cuts.begin(), cuts.end()); |
---|
218 | SString first = line.substr(cuts[0], cuts[1] - cuts[0]); |
---|
219 | SString second = line.substr(cuts[2], cuts[3] - cuts[2]); |
---|
220 | SString result = line.substr(0, cuts[0]) + second + |
---|
221 | line.substr(cuts[1], cuts[2] - cuts[1]) + first + line.substr(cuts[3]); |
---|
222 | line = result; |
---|
223 | chg = (float)(cuts[3] - cuts[2] + cuts[1] - cuts[0]) / line.len(); |
---|
224 | break; |
---|
225 | } |
---|
226 | } |
---|
227 | SString result = strdims + "\n" + line; |
---|
228 | free(geno); |
---|
229 | geno = strdup(result.c_str()); |
---|
230 | return GENOPER_OK; |
---|
231 | } |
---|
232 | |
---|
233 | int Geno_fB::crossOver(char *&g1, char *&g2, float& chg1, float& chg2) |
---|
234 | { |
---|
235 | SString p1(g1); |
---|
236 | SString p2(g2); |
---|
237 | |
---|
238 | int dims1 = 0, dims2 = 0; |
---|
239 | int pos = 0; |
---|
240 | SString strdims; |
---|
241 | p1.getNextToken(pos, strdims, '\n'); |
---|
242 | ExtValue::parseInt(strdims.c_str(), dims1, true, false); |
---|
243 | SString parent1; |
---|
244 | p1.getNextToken(pos, parent1, '\n'); |
---|
245 | |
---|
246 | pos = 0; |
---|
247 | p2.getNextToken(pos, strdims, '\n'); |
---|
248 | ExtValue::parseInt(strdims.c_str(), dims2, true, false); |
---|
249 | |
---|
250 | if (dims1 != dims2) |
---|
251 | { |
---|
252 | return GENOPER_OPFAIL; |
---|
253 | } |
---|
254 | |
---|
255 | SString parent2; |
---|
256 | p2.getNextToken(pos, parent2, '\n'); |
---|
257 | |
---|
258 | SString child1 = ""; |
---|
259 | SString child2 = ""; |
---|
260 | |
---|
261 | switch (roulette(crossoverprobs, FB_XOVER_COUNT)) |
---|
262 | { |
---|
263 | case FB_GENE_TRANSFER: |
---|
264 | { |
---|
265 | // get random gene from first parent |
---|
266 | int choice = randomN(fB_GenoHelpers::geneCount(parent1)); |
---|
267 | int start, end; |
---|
268 | SString gene = fB_GenoHelpers::getGene(choice, parent1, start, end); |
---|
269 | // add this gene to the beginning of the second parent genotype |
---|
270 | child2 = gene + parent2; |
---|
271 | chg2 = (float)parent2.len() / (float)child2.len(); |
---|
272 | // do the same for second parent |
---|
273 | choice = randomN(fB_GenoHelpers::geneCount(parent2)); |
---|
274 | gene = fB_GenoHelpers::getGene(choice, parent2, start, end); |
---|
275 | child1 = gene + parent1; |
---|
276 | chg1 = (float)parent1.len() / (float)child1.len(); |
---|
277 | break; |
---|
278 | } |
---|
279 | case FB_CROSSING_OVER: |
---|
280 | { |
---|
281 | // iterate through all genes of the first parent and assign them |
---|
282 | // randomly to children |
---|
283 | for (int i = 0; i < fB_GenoHelpers::geneCount(parent1); i++) |
---|
284 | { |
---|
285 | int start, end; |
---|
286 | SString gene = fB_GenoHelpers::getGene(i, parent1, start, end); |
---|
287 | if (randomN(2) == 0) |
---|
288 | { |
---|
289 | child1 += gene; |
---|
290 | chg1 += 1.0f; |
---|
291 | } |
---|
292 | else |
---|
293 | { |
---|
294 | child2 += gene; |
---|
295 | } |
---|
296 | } |
---|
297 | chg1 /= fB_GenoHelpers::geneCount(parent1); |
---|
298 | |
---|
299 | // do the same with second parent |
---|
300 | for (int i = 0; i < fB_GenoHelpers::geneCount(parent2); i++) |
---|
301 | { |
---|
302 | int start, end; |
---|
303 | SString gene = fB_GenoHelpers::getGene(i, parent2, start, end); |
---|
304 | if (randomN(2) == 0) |
---|
305 | { |
---|
306 | child1 += gene; |
---|
307 | } |
---|
308 | else |
---|
309 | { |
---|
310 | child2 += gene; |
---|
311 | chg2 += 1.0f; |
---|
312 | } |
---|
313 | } |
---|
314 | chg2 /= fB_GenoHelpers::geneCount(parent2); |
---|
315 | break; |
---|
316 | } |
---|
317 | } |
---|
318 | |
---|
319 | free(g1); |
---|
320 | free(g2); |
---|
321 | if (child1.len() > 0 && child2.len() == 0) |
---|
322 | { |
---|
323 | child1 = strdims + "\n" + child1; |
---|
324 | g1 = strdup(child1.c_str()); |
---|
325 | g2 = strdup(""); |
---|
326 | } |
---|
327 | else if (child2.len() > 0 && child1.len() == 0) |
---|
328 | { |
---|
329 | child2 = strdims + "\n" + child2; |
---|
330 | g1 = strdup(child2.c_str()); |
---|
331 | g2 = strdup(""); |
---|
332 | } |
---|
333 | else |
---|
334 | { |
---|
335 | child1 = strdims + "\n" + child1; |
---|
336 | child2 = strdims + "\n" + child2; |
---|
337 | g1 = strdup(child1.c_str()); |
---|
338 | g2 = strdup(child2.c_str()); |
---|
339 | } |
---|
340 | return GENOPER_OK; |
---|
341 | } |
---|
342 | |
---|
343 | uint32_t Geno_fB::style(const char *geno, int pos) |
---|
344 | { |
---|
345 | char ch = geno[pos]; |
---|
346 | if (isdigit(ch)) |
---|
347 | { |
---|
348 | while (pos > 0) |
---|
349 | { |
---|
350 | pos--; |
---|
351 | if (isdigit(geno[pos]) == 0) |
---|
352 | { |
---|
353 | return GENSTYLE_CS(0, GENSTYLE_INVALID); |
---|
354 | } |
---|
355 | } |
---|
356 | return GENSTYLE_RGBS(0, 0, 200, GENSTYLE_BOLD); |
---|
357 | } |
---|
358 | if (islower(ch) == 0) |
---|
359 | { |
---|
360 | return GENSTYLE_CS(0, GENSTYLE_INVALID); |
---|
361 | } |
---|
362 | uint32_t style = GENSTYLE_CS(GENCOLOR_TEXT, GENSTYLE_NONE); |
---|
363 | if (ch == 'a' && pos > 0 && (geno[pos - 1] == 'a' || geno[pos - 1] == '\n')) |
---|
364 | { |
---|
365 | style = GENSTYLE_RGBS(0, 200, 0, GENSTYLE_BOLD); |
---|
366 | } |
---|
367 | else if (ch == 'z' && pos > 0 && geno[pos - 1] == 'z') |
---|
368 | { |
---|
369 | style = GENSTYLE_RGBS(200, 0, 0, GENSTYLE_BOLD); |
---|
370 | } |
---|
371 | return style; |
---|
372 | } |
---|