source: mds-and-trees/tree-genealogy.py @ 615

Last change on this file since 615 was 615, checked in by Maciej Komosinski, 8 years ago

Improved descriptions

File size: 19.9 KB
Line 
1# Draws a genealogical tree (generates a SVG file) based on parent-child relationship information.
2# Supports files generated by Framsticks experiments.
3
4import json
5import random
6import math
7import argparse
8
9TIME = "" # BIRTHS / GENERATIONAL / REAL
10BALANCE = "" # MIN / DENSITY
11
12DOT_STYLE = "" # NONE / NORMAL / CLEAR
13
14JITTER = "" #
15
16# ------SVG---------
17svg_file = 0
18
19svg_line_style = 'stroke="rgb(90%,10%,16%)" stroke-width="1" stroke-opacity="0.7"'
20svg_mutation_line_style = 'stroke-width="1"'
21svg_crossover_line_style = 'stroke-width="1"'
22svg_spine_line_style = 'stroke="rgb(0%,90%,40%)" stroke-width="2" stroke-opacity="1"'
23svg_scale_line_style = 'stroke="black" stroke-width="0.5" stroke-opacity="1" stroke-dasharray="5, 5"'
24
25svg_dot_style = 'r="2" stroke="black" stroke-width="0.2" fill="red"'
26svg_clear_dot_style = 'r="2" stroke="black" stroke-width="0.4" fill="none"'
27svg_spine_dot_style = 'r="1" stroke="black" stroke-width="0.2" fill="rgb(50%,50%,100%)"'
28
29svg_scale_text_style = 'style="font-family: Arial; font-size: 12; fill: #000000;"'
30
31def hex_to_style(hex):
32    default_style = ' stroke="black" stroke-opacity="0.5" '
33
34    if hex[0] == "#":
35        hex = hex[1:]
36
37    if len(hex) == 6 or len(hex) == 8:
38        try:
39            int(hex, 16)
40        except:
41            print("Invalid characters in the color's hex #" + hex + "! Assuming black.")
42            return default_style
43        red = 100*int(hex[0:2], 16)/255
44        green = 100*int(hex[2:4], 16)/255
45        blue = 100*int(hex[4:6], 16)/255
46        opacity = 0.5
47        if len(hex) == 8:
48            opacity = int(hex[6:8], 16)/255
49        return ' stroke="rgb(' +str(red)+ '%,' +str(green)+ '%,' +str(blue)+ '%)" stroke-opacity="' +str(opacity)+ '" '
50    else:
51        print("Invalid number of digits in the color's hex #" + hex + "! Assuming black.")
52        return default_style
53
54def svg_add_line(from_pos, to_pos, style=svg_line_style):
55    svg_file.write('<line ' + style + ' x1="' + str(from_pos[0]) + '" x2="' + str(to_pos[0]) +
56                   '" y1="' + str(from_pos[1]) + '" y2="' + str(to_pos[1]) + '"  fill="none"/>')
57
58def svg_add_text(text, pos, anchor, style=svg_scale_text_style):
59    svg_file.write('<text ' + style + ' text-anchor="' + anchor + '" x="' + str(pos[0]) + '" y="' + str(pos[1]) + '" >' + text + '</text>')
60
61def svg_add_dot(pos, style=svg_dot_style):
62    svg_file.write('<circle ' + style + ' cx="' + str(pos[0]) + '" cy="' + str(pos[1]) + '" />')
63
64def svg_generate_line_style(percent):
65    # hotdog
66    from_col = [100, 70, 0]
67    to_col = [60, 0, 0]
68    # lava
69    # from_col = [100, 80, 0]
70    # to_col = [100, 0, 0]
71    # neon
72    # from_col = [30, 200, 255]
73    # to_col = [240, 0, 220]
74
75    from_opa = 0.2
76    to_opa = 1.0
77    from_stroke = 1
78    to_stroke = 3
79
80    opa = from_opa*(1-percent) + to_opa*percent
81    stroke = from_stroke*(1-percent) + to_stroke*percent
82
83    percent = 1 - ((1-percent)**20)
84
85    return 'stroke="rgb(' + str(from_col[0]*(1-percent) + to_col[0]*percent) + '%,' \
86           + str(from_col[1]*(1-percent) + to_col[1]*percent) + '%,' \
87           + str(from_col[2]*(1-percent) + to_col[2]*percent) + '%)" stroke-width="' + str(stroke) + '" stroke-opacity="' + str(opa) + '"'
88
89def svg_generate_dot_style(kind):
90    kinds = ["red", "lawngreen", "royalblue", "magenta", "yellow", "cyan", "white", "black"]
91
92    r = min(2500/len(nodes), 10)
93
94    return 'fill="' + kinds[kind] + '" r="' + str(r) + '" stroke="black" stroke-width="' + str(r/10) + '" fill-opacity="1.0" ' \
95           'stroke-opacity="1.0"'
96
97# -------------------
98
99def load_data(dir):
100    global firstnode, nodes, inv_nodes, time
101    f = open(dir)
102    for line in f:
103        sline = line.split(' ', 1)
104        if len(sline) == 2:
105            if sline[0] == "[OFFSPRING]":
106                creature = json.loads(sline[1])
107                #print("B" +str(creature))
108                if "FromIDs" in creature:
109                    if not creature["ID"] in nodes:
110                        nodes[creature["ID"]] = {}
111                        # we assign to each parent its contribution to the genotype of the child
112                        for i in range(0, len(creature["FromIDs"])):
113                            inherited = 1 #(creature["Inherited"][i] if 'Inherited' in creature else 1) #ONLY FOR NOW
114                            nodes[creature["ID"]][creature["FromIDs"][i]] = inherited
115                    else:
116                        print("Duplicated entry for " + creature["ID"])
117                        quit()
118
119                    if not creature["FromIDs"][0] in nodes:
120                        firstnode = creature["FromIDs"][0]
121
122                if "Time" in creature:
123                    time[creature["ID"]] = creature["Time"]
124
125                if "Kind" in creature:
126                    kind[creature["ID"]] = creature["Kind"]
127
128    for k, v in sorted(nodes.items()):
129        for val in sorted(v):
130            inv_nodes[val] = inv_nodes.get(val, [])
131            inv_nodes[val].append(k)
132
133
134def load_simple_data(dir):
135    global firstnode, nodes, inv_nodes
136    f = open(dir)
137    for line in f:
138        sline = line.split()
139        if len(sline) > 1:
140            #if int(sline[0]) > 15000:
141            #    break
142            if sline[0] == firstnode:
143                continue
144            nodes[sline[0]] = str(max(int(sline[1]), int(firstnode)))
145        else:
146            firstnode = sline[0]
147
148    for k, v in sorted(nodes.items()):
149        inv_nodes[v] = inv_nodes.get(v, [])
150        inv_nodes[v].append(k)
151
152    #print(str(inv_nodes))
153    #quit()
154
155def compute_depth(node):
156    my_depth = 0
157    if node in inv_nodes:
158        for c in inv_nodes[node]:
159            my_depth = max(my_depth, compute_depth(c)+1)
160    depth[node] = my_depth
161    return my_depth
162
163# ------------------------------------
164
165def xmin_crowd(x1, x2, y):
166    if BALANCE == "RANDOM":
167        return (x1 if random.randrange(2) == 0 else x2)
168    elif BALANCE == "MIN":
169        x1_closest = 999999
170        x2_closest = 999999
171        for pos in positions:
172            pos = positions[pos]
173            if pos[1] == y:
174                x1_closest = min(x1_closest, abs(x1-pos[0]))
175                x2_closest = min(x2_closest, abs(x2-pos[0]))
176        return (x1 if x1_closest > x2_closest else x2)
177    elif BALANCE == "DENSITY":
178        x1_dist = 0
179        x2_dist = 0
180        for pos in positions:
181            pos = positions[pos]
182            if pos[1] > y-10 or pos[1] < y+10:
183                dy = pos[1]-y
184                dx1 = pos[0]-x1
185                dx2 = pos[0]-x2
186
187                x1_dist += math.sqrt(dy**2 + dx1**2)
188                x2_dist += math.sqrt(dy**2 + dx2**2)
189        return (x1 if x1_dist > x2_dist else x2)
190
191# ------------------------------------
192
193def prepos_children_reccurent(node):
194    global visited
195    for c in inv_nodes[node]:
196
197        # we want to visit the node just once, after all of its parents
198        if not all_parents_visited(c):
199            continue
200        else:
201            visited[c] = True
202
203        cy = 0
204        if TIME == "BIRTHS":
205            if c[0] == "c":
206                cy = int(c[1:])
207            else:
208                cy = int(c)
209        elif TIME == "GENERATIONAL":
210            cy = positions[node][1]+1
211        elif TIME == "REAL":
212            cy = time[c]
213
214        if len(nodes[c]) == 1:
215            dissimilarity = 0
216            if JITTER == True:
217                dissimilarity = random.gauss(0,1)
218            else:
219                dissimilarity = 1
220            positions[c] = [xmin_crowd(positions[node][0]-dissimilarity, positions[node][0]+dissimilarity, cy), cy]
221        else:
222            vsum = sum([v for k, v in nodes[c].items()])
223            cx = sum([positions[k][0]*v/vsum for k, v in nodes[c].items()])
224
225            if JITTER == True:
226                positions[c] = [cx + random.gauss(0, 0.1), cy]
227            else:
228                positions[c] = [cx, cy]
229
230
231        if c in inv_nodes:
232            prepos_children_reccurent(c)
233
234def prepos_children():
235    global max_height, max_width, min_width, visited
236
237    if not bool(time):
238        print("REAL time requested, but no real time data provided. Assuming BIRTHS time instead.")
239        TIME = "BIRTHS"
240
241    positions[firstnode] = [0, 0]
242
243    visited = {}
244    visited[firstnode] = True
245    prepos_children_reccurent(firstnode)
246
247    for pos in positions:
248        max_height = max(max_height, positions[pos][1])
249        max_width = max(max_width, positions[pos][0])
250        min_width = min(min_width, positions[pos][0])
251
252# ------------------------------------
253
254def all_parents_visited(node):
255    apv = True
256    for k, v in sorted(nodes[node].items()):
257        if not k in visited:
258            apv = False
259            break
260    return apv
261# ------------------------------------
262
263def draw_children_recurrent(node, max_depth):
264    global visited
265
266    for c in inv_nodes[node]:
267
268        # we want to draw the node just once
269        if not all_parents_visited(c):
270            continue
271        else:
272            visited[c] = True
273
274        if c in inv_nodes:
275            draw_children_recurrent(c, max_depth)
276
277        line_style = ""
278        if COLORING == "NONE":
279            line_style = svg_line_style
280        elif COLORING == "TYPE":
281            line_style = (svg_mutation_line_style if len(nodes[c]) == 1 else svg_crossover_line_style)
282        else: # IMPORTANCE, default
283            line_style = svg_generate_line_style(depth[c]/max_depth)
284
285        for k, v in sorted(nodes[c].items()):
286            svg_add_line( (w_margin+w_no_margs*(positions[k][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[k][1]/max_height),
287                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
288
289        if DOT_STYLE == "NONE":
290            continue
291        elif DOT_STYLE == "TYPE":
292            dot_style = svg_generate_dot_style(kind[c] if c in kind else 0) #type
293        else: # NORMAL, default
294            dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
295        svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), dot_style)
296        #svg_add_text( str(depth[c]), (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), "end")
297
298def draw_children():
299    global visited
300    visited = {}
301    visited[firstnode] = True
302
303    max_depth = 0
304    for k, v in depth.items():
305            max_depth = max(max_depth, v)
306    draw_children_recurrent(firstnode, max_depth)
307
308    if DOT_STYLE == "NONE":
309        return
310    elif DOT_STYLE == "TYPE":
311        dot_style = svg_generate_dot_style(kind[firstnode] if firstnode in kind else 0)
312    else: # NORMAL, default
313        dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
314    svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), dot_style)
315
316def draw_spine_recurrent(node):
317    global visited
318    for c in inv_nodes[node]:
319
320        # we want to draw the node just once
321        if all_parents_visited(c):
322            visited[c] = True
323
324            if depth[c] == depth[node] - 1:
325                if c in inv_nodes:
326                    draw_spine_recurrent(c)
327
328        if depth[c] == depth[node] - 1:
329            line_style = svg_spine_line_style
330            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
331                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
332        #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), svg_spine_dot_style)
333
334def draw_spine():
335    global visited
336    visited = {}
337    visited[firstnode] = True
338
339    draw_spine_recurrent(firstnode)
340    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), svg_spine_dot_style)
341
342def draw_skeleton_reccurent(node):
343    global visited
344    for c in inv_nodes[node]:
345
346        if all_parents_visited(c):
347            visited[c] = True
348
349            if depth[c] >= min_skeleton_depth: # or depth[c] == max([depth[q] for q in inv_nodes[node]]):
350                if c in inv_nodes:
351                    draw_skeleton_reccurent(c)
352
353        if depth[c] >= min_skeleton_depth: # or depth[c] == max([depth[q] for q in inv_nodes[node]]):
354            #print([depth[q] for q in inv_nodes[node]])
355            line_style = svg_spine_line_style
356            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
357                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
358            #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), svg_spine_dot_style)
359
360def draw_skeleton():
361    global visited
362    visited = {}
363    visited[firstnode] = True
364
365    draw_skeleton_reccurent(firstnode)
366    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), svg_spine_dot_style)
367
368# ------------------------------------
369
370def draw_scale(filename ,type):
371
372    svg_add_text("Generated from " + filename.split("\\")[-1], (5, 15), "start")
373
374    svg_add_line( (w*0.7, h_margin), (w, h_margin), svg_scale_line_style)
375    start_text = ""
376    if TIME == "BIRTHS":
377       start_text = "Birth #" + str(min([int(k[1:]) for k, v in nodes.items()]))
378    if TIME == "REAL":
379       start_text = "Time " + str(min([v for k, v in time.items()]))
380    if TIME == "GENERATIONAL":
381       start_text = "Depth " + str(min([v for k, v in depth.items()]))
382    svg_add_text( start_text, (w, h_margin + 15), "end")
383
384    svg_add_line( (w*0.7, h-h_margin), (w, h-h_margin), svg_scale_line_style)
385    end_text = ""
386    if TIME == "BIRTHS":
387       end_text = "Birth #" + str(max([int(k[1:]) for k, v in nodes.items()]))
388    if TIME == "REAL":
389       end_text = "Time " + str(max([v for k, v in time.items()]))
390    if TIME == "GENERATIONAL":
391       end_text = "Depth " + str(max([v for k, v in depth.items()]))
392    svg_add_text( end_text, (w, h-h_margin + 15), "end")
393
394
395##################################################### main #####################################################
396
397args = 0
398
399h = 800
400w = 600
401h_margin = 20
402w_margin = 10
403h_no_margs = h - 2* h_margin
404w_no_margs = w - 2* w_margin
405
406max_height = 0
407max_width = 0
408min_width = 9999999999
409
410min_skeleton_depth = 0
411
412firstnode = ""
413nodes = {}
414inv_nodes = {}
415positions = {}
416visited= {}
417depth = {}
418time = {}
419kind = {}
420
421def main():
422    global svg_file, min_skeleton_depth, args, \
423        TIME, BALANCE, DOT_STYLE, COLORING, JITTER, \
424        svg_mutation_line_style, svg_crossover_line_style
425
426    parser = argparse.ArgumentParser(description='Draws a genealogical tree (generates a SVG file) based on parent-child relationship information from a text file. Supports files generated by Framsticks experiments.')
427    parser.add_argument('-i', '--in', dest='input', required=True, help='input file name with stuctured evolutionary data')
428    parser.add_argument('-o', '--out', dest='output', required=True, help='output file name for the evolutionary tree (SVG format)')
429    draw_tree_parser = parser.add_mutually_exclusive_group(required=False)
430    draw_tree_parser.add_argument('--draw-tree', dest='draw_tree', action='store_true', help='whether drawing the full tree should be skipped')
431    draw_tree_parser.add_argument('--no-draw-tree', dest='draw_tree', action='store_false')
432
433    draw_skeleton_parser = parser.add_mutually_exclusive_group(required=False)
434    draw_skeleton_parser.add_argument('--draw-skeleton', dest='draw_skeleton', action='store_true', help='whether the skeleton of the tree should be drawn')
435    draw_skeleton_parser.add_argument('--no-draw-skeleton', dest='draw_skeleton', action='store_false')
436
437    draw_spine_parser = parser.add_mutually_exclusive_group(required=False)
438    draw_spine_parser.add_argument('--draw-spine', dest='draw_spine', action='store_true', help='whether the spine of the tree should be drawn')
439    draw_spine_parser.add_argument('--no-draw-spine', dest='draw_spine', action='store_false')
440
441    #TODO: better names for those parameters
442    parser.add_argument('-t', '--time', default='GENERATIONAL', dest='time', help='values on vertical axis (BIRTHS/GENERATIONAL/REAL); '
443                                                                      'BIRTHS: time measured as the number of births since the beginning; '
444                                                                      'GENERATIONAL: time measured as number of ancestors; '
445                                                                      'REAL: real time of the simulation')
446    parser.add_argument('-b', '--balance', default='DENSITY', dest='balance', help='method of placing nodes in the tree (RANDOM/MIN/DENSITY)')
447    parser.add_argument('-s', '--scale', default='NONE', dest='scale', help='type of timescale added to the tree (NONE/SIMPLE)')
448    parser.add_argument('-c', '--coloring', default='IMPORTANCE', dest="coloring", help='method of coloring the tree (NONE/IMPORTANCE/TYPE)')
449    parser.add_argument('-d', '--dots', default='TYPE', dest='dots', help='method of drawing dots (individuals) (NONE/NORMAL/TYPE)')
450    parser.add_argument('-j', '--jitter', dest="jitter", action='store_true', help='draw horizontal positions of children from the normal distribution')
451
452    parser.add_argument('--color-mut', default="#000000", dest="color_mut", help='color of clone/mutation lines in rgba (e.g. #FF60B240) for TYPE coloring')
453    parser.add_argument('--color-cross', default="#660198", dest="color_cross", help='color of crossover lines in rgba (e.g. #FF60B240) for TYPE coloring')
454
455    parser.add_argument('--min-skeleton-depth', type=int, default=2, dest='min_skeleton_depth', help='minimal distance from the leafs for the nodes in the skeleton')
456    parser.add_argument('--seed', type=int, dest='seed', help='seed for the random number generator (-1 for random)')
457
458    parser.add_argument('--simple-data', type=bool, dest='simple_data', help='input data are given in a simple format (#child #parent)')
459
460    parser.set_defaults(draw_tree=True)
461    parser.set_defaults(draw_skeleton=False)
462    parser.set_defaults(draw_spine=False)
463
464    parser.set_defaults(seed=-1)
465
466    args = parser.parse_args()
467
468    TIME = args.time
469    BALANCE = args.balance
470    DOT_STYLE = args.dots
471    COLORING = args.coloring
472    JITTER = args.jitter
473
474    svg_mutation_line_style += hex_to_style(args.color_mut)
475    svg_crossover_line_style += hex_to_style(args.color_cross)
476
477    dir = args.input
478    min_skeleton_depth = args.min_skeleton_depth
479    seed = args.seed
480    if seed == -1:
481        seed = random.randint(0, 10000)
482    random.seed(seed)
483    print("seed:", seed)
484
485    if args.simple_data:
486        load_simple_data(dir)
487    else:
488        load_data(dir)
489
490    compute_depth(firstnode)
491
492    svg_file = open(args.output, "w")
493    svg_file.write('<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" '
494                   'width="' + str(w) + '" height="' + str(h) + '">')
495
496    prepos_children()
497
498    if args.draw_tree:
499        draw_children()
500    if args.draw_skeleton:
501        draw_skeleton()
502    if args.draw_spine:
503        draw_spine()
504
505    draw_scale(dir, args.scale)
506
507    svg_file.write("</svg>")
508    svg_file.close()
509
510main()
Note: See TracBrowser for help on using the repository browser.