source: mds-and-trees/tree-genealogy.py @ 589

Last change on this file since 589 was 589, checked in by konrad, 8 years ago

Fixed dark lines inside lines in tree-genealogy.py

File size: 19.2 KB
RevLine 
[562]1# Draws a genealogical tree (generates a SVG file) based on parent-child relationship information.
2
3import json
4import random
5import math
6import argparse
7
[571]8TIME = "" # BIRTHS / GENERATIONAL / REAL
9BALANCE = "" # MIN / DENSITY
[562]10
[571]11DOT_STYLE = "" # NONE / NORMAL / CLEAR
12
13JITTER = "" #
14
[562]15# ------SVG---------
16svg_file = 0
17
[577]18svg_line_style = 'stroke="rgb(90%,10%,16%)" stroke-width="1" stroke-opacity="0.7"'
[585]19svg_mutation_line_style = 'stroke-width="1"'
20svg_crossover_line_style = 'stroke-width="1"'
[577]21svg_spine_line_style = 'stroke="rgb(0%,90%,40%)" stroke-width="2" stroke-opacity="1"'
22svg_scale_line_style = 'stroke="black" stroke-width="0.5" stroke-opacity="1" stroke-dasharray="5, 5"'
23
[562]24svg_dot_style = 'r="2" stroke="black" stroke-width="0.2" fill="red"'
[571]25svg_clear_dot_style = 'r="2" stroke="black" stroke-width="0.4" fill="none"'
[562]26svg_spine_dot_style = 'r="1" stroke="black" stroke-width="0.2" fill="rgb(50%,50%,100%)"'
27
[576]28svg_scale_text_style = 'style="font-family: Arial; font-size: 12; fill: #000000;"'
29
[585]30def hex_to_style(hex):
[586]31    default_style = ' stroke="black" stroke-opacity="0.5" '
32
[585]33    if hex[0] == "#":
34        hex = hex[1:]
35
36    if len(hex) == 6 or len(hex) == 8:
37        try:
38            int(hex, 16)
39        except:
40            print("Wrong characters in the color's hex #" + hex + "! Assuming black.")
[586]41            return default_style
[585]42        red = 100*int(hex[0:2], 16)/255
43        green = 100*int(hex[2:4], 16)/255
44        blue = 100*int(hex[4:6], 16)/255
45        opacity = 0.5
46        if len(hex) == 8:
47            opacity = int(hex[6:8], 16)/255
48        return ' stroke="rgb(' +str(red)+ '%,' +str(green)+ '%,' +str(blue)+ '%)" stroke-opacity="' +str(opacity)+ '" '
49    else:
50        print("Wrong number of digits in the color's hex #" + hex + "! Assuming black.")
[586]51        return default_style
[585]52
[562]53def svg_add_line(from_pos, to_pos, style=svg_line_style):
[589]54    svg_file.write('<line ' + style + ' x1="' + str(from_pos[0]) + '" x2="' + str(to_pos[0]) +
55                   '" y1="' + str(from_pos[1]) + '" y2="' + str(to_pos[1]) + '"  fill="none"/>')
[562]56
[576]57def svg_add_text(text, pos, anchor, style=svg_scale_text_style):
58    svg_file.write('<text ' + style + ' text-anchor="' + anchor + '" x="' + str(pos[0]) + '" y="' + str(pos[1]) + '" >' + text + '</text>')
59
[562]60def svg_add_dot(pos, style=svg_dot_style):
61    svg_file.write('<circle ' + style + ' cx="' + str(pos[0]) + '" cy="' + str(pos[1]) + '" />')
62
63def svg_generate_line_style(percent):
[564]64    # hotdog
[562]65    from_col = [100, 70, 0]
[564]66    to_col = [60, 0, 0]
[585]67    from_col = [0, 0, 0]
68    to_col = [80, 0, 80]
[571]69    # lava
70    # from_col = [100, 80, 0]
71    # to_col = [100, 0, 0]
[564]72    # neon
73    # from_col = [30, 200, 255]
74    # to_col = [240, 0, 220]
[562]75
[564]76    from_opa = 0.2
77    to_opa = 1.0
78    from_stroke = 1
79    to_stroke = 3
[562]80
[564]81    opa = from_opa*(1-percent) + to_opa*percent
82    stroke = from_stroke*(1-percent) + to_stroke*percent
83
84    percent = 1 - ((1-percent)**20)
85
[562]86    return 'stroke="rgb(' + str(from_col[0]*(1-percent) + to_col[0]*percent) + '%,' \
87           + str(from_col[1]*(1-percent) + to_col[1]*percent) + '%,' \
[564]88           + str(from_col[2]*(1-percent) + to_col[2]*percent) + '%)" stroke-width="' + str(stroke) + '" stroke-opacity="' + str(opa) + '"'
[562]89
[577]90def svg_generate_dot_style(kind):
91    kinds = ["red", "lawngreen", "royalblue", "magenta", "yellow", "cyan", "white", "black"]
[562]92
[577]93    r = min(2500/len(nodes), 10)
[562]94
[577]95    return 'fill="' + kinds[kind] + '" r="' + str(r) + '" stroke="black" stroke-width="' + str(r/10) + '" fill-opacity="1.0" ' \
96           'stroke-opacity="1.0"'
[564]97
[562]98# -------------------
99
100def load_data(dir):
[571]101    global firstnode, nodes, inv_nodes, time
[562]102    f = open(dir)
103    for line in f:
[571]104        sline = line.split(' ', 1)
105        if len(sline) == 2:
106            if sline[0] == "[OFFSPRING]":
107                creature = json.loads(sline[1])
[562]108                #print("B" +str(creature))
[563]109                if "FromIDs" in creature:
[572]110                    if not creature["ID"] in nodes:
111                        nodes[creature["ID"]] = {}
112                        # we assign to each parent its contribution to the genotype of the child
113                        for i in range(0, len(creature["FromIDs"])):
114                            inherited = 1 #(creature["Inherited"][i] if 'Inherited' in creature else 1) #ONLY FOR NOW
115                            nodes[creature["ID"]][creature["FromIDs"][i]] = inherited
116                    else:
117                        print("Doubled entry for " + creature["ID"])
118                        quit()
119
[563]120                    if not creature["FromIDs"][0] in nodes:
121                        firstnode = creature["FromIDs"][0]
[572]122
[566]123                if "Time" in creature:
124                    time[creature["ID"]] = creature["Time"]
[562]125
[577]126                if "Kind" in creature:
127                    kind[creature["ID"]] = creature["Kind"]
128
[562]129    for k, v in sorted(nodes.items()):
[572]130        for val in sorted(v):
131            inv_nodes[val] = inv_nodes.get(val, [])
132            inv_nodes[val].append(k)
[562]133
134
135def load_simple_data(dir):
136    global firstnode, nodes, inv_nodes
137    f = open(dir)
138    for line in f:
139        sline = line.split()
140        if len(sline) > 1:
141            #if int(sline[0]) > 15000:
142            #    break
143            if sline[0] == firstnode:
144                continue
145            nodes[sline[0]] = str(max(int(sline[1]), int(firstnode)))
146        else:
147            firstnode = sline[0]
148
149    for k, v in sorted(nodes.items()):
150        inv_nodes[v] = inv_nodes.get(v, [])
151        inv_nodes[v].append(k)
152
153    #print(str(inv_nodes))
154    #quit()
155
156def compute_depth(node):
157    my_depth = 0
158    if node in inv_nodes:
159        for c in inv_nodes[node]:
160            my_depth = max(my_depth, compute_depth(c)+1)
161    depth[node] = my_depth
162    return my_depth
163
164# ------------------------------------
165
166def xmin_crowd(x1, x2, y):
167    if BALANCE == "RANDOM":
168        return (x1 if random.randrange(2) == 0 else x2)
169    elif BALANCE == "MIN":
170        x1_closest = 999999
171        x2_closest = 999999
172        for pos in positions:
173            pos = positions[pos]
174            if pos[1] == y:
175                x1_closest = min(x1_closest, abs(x1-pos[0]))
176                x2_closest = min(x2_closest, abs(x2-pos[0]))
177        return (x1 if x1_closest > x2_closest else x2)
178    elif BALANCE == "DENSITY":
179        x1_dist = 0
180        x2_dist = 0
181        for pos in positions:
182            pos = positions[pos]
183            if pos[1] > y-10 or pos[1] < y+10:
184                dy = pos[1]-y
185                dx1 = pos[0]-x1
186                dx2 = pos[0]-x2
187
188                x1_dist += math.sqrt(dy**2 + dx1**2)
189                x2_dist += math.sqrt(dy**2 + dx2**2)
190        return (x1 if x1_dist > x2_dist else x2)
191
192# ------------------------------------
193
194def prepos_children_reccurent(node):
[572]195    global visited
[562]196    for c in inv_nodes[node]:
[572]197
198        # we want to visit the node just once, after all of its parents
199        if not all_parents_visited(c):
200            continue
[571]201        else:
[572]202            visited[c] = True
[571]203
[572]204        cy = 0
[566]205        if TIME == "BIRTHS":
[562]206            if c[0] == "c":
[572]207                cy = int(c[1:])
[562]208            else:
[572]209                cy = int(c)
[562]210        elif TIME == "GENERATIONAL":
[572]211            cy = positions[node][1]+1
[566]212        elif TIME == "REAL":
[572]213            cy = time[c]
[562]214
[572]215        if len(nodes[c]) == 1:
216            dissimilarity = 0
217            if JITTER == True:
218                dissimilarity = random.gauss(0,1)
219            else:
220                dissimilarity = 1
221            positions[c] = [xmin_crowd(positions[node][0]-dissimilarity, positions[node][0]+dissimilarity, cy), cy]
222        else:
223            vsum = sum([v for k, v in nodes[c].items()])
224            cx = sum([positions[k][0]*v/vsum for k, v in nodes[c].items()])
225
226            if JITTER == True:
227                positions[c] = [cx + random.gauss(0, 0.1), cy]
228            else:
229                positions[c] = [cx, cy]
230
231
[562]232        if c in inv_nodes:
233            prepos_children_reccurent(c)
234
235def prepos_children():
[572]236    global max_height, max_width, min_width, visited
[562]237
[566]238    if not bool(time):
239        print("REAL time requested, but no real time data provided. Assuming BIRTHS time instead.")
240        TIME = "BIRTHS"
241
[562]242    positions[firstnode] = [0, 0]
243
[572]244    visited = {}
245    visited[firstnode] = True
[562]246    prepos_children_reccurent(firstnode)
247
248    for pos in positions:
249        max_height = max(max_height, positions[pos][1])
250        max_width = max(max_width, positions[pos][0])
251        min_width = min(min_width, positions[pos][0])
252
253# ------------------------------------
254
[572]255def all_parents_visited(node):
256    apv = True
257    for k, v in sorted(nodes[node].items()):
258        if not k in visited:
259            apv = False
260            break
261    return apv
262# ------------------------------------
263
[562]264def draw_children_recurrent(node, max_depth):
[572]265    global visited
266
[562]267    for c in inv_nodes[node]:
[572]268
269        # we want to draw the node just once
270        if not all_parents_visited(c):
271            continue
272        else:
273            visited[c] = True
274
[562]275        if c in inv_nodes:
276            draw_children_recurrent(c, max_depth)
[564]277
[577]278        line_style = ""
279        if COLORING == "NONE":
280            line_style = svg_line_style
281        elif COLORING == "TYPE":
282            line_style = (svg_mutation_line_style if len(nodes[c]) == 1 else svg_crossover_line_style)
283        else: # IMPORTANCE, default
284            line_style = svg_generate_line_style(depth[c]/max_depth)
285
[572]286        for k, v in sorted(nodes[c].items()):
287            svg_add_line( (w_margin+w_no_margs*(positions[k][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[k][1]/max_height),
288                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[571]289
290        if DOT_STYLE == "NONE":
291            continue
[585]292        elif DOT_STYLE == "TYPE":
[577]293            dot_style = svg_generate_dot_style(kind[c] if c in kind else 0) #type
[571]294        else: # NORMAL, default
[577]295            dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
[564]296        svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), dot_style)
[562]297def draw_children():
[572]298    global visited
299    visited = {}
300    visited[firstnode] = True
301
[562]302    max_depth = 0
303    for k, v in depth.items():
304            max_depth = max(max_depth, v)
305    draw_children_recurrent(firstnode, max_depth)
[571]306
307    if DOT_STYLE == "NONE":
308        return
[585]309    elif DOT_STYLE == "TYPE":
[577]310        dot_style = svg_generate_dot_style(kind[firstnode] if firstnode in kind else 0)
[571]311    else: # NORMAL, default
[577]312        dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
[564]313    svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), dot_style)
[562]314
315def draw_spine_recurrent(node):
316    for c in inv_nodes[node]:
317        if depth[c] == depth[node] - 1:
318            if c in inv_nodes:
319                draw_spine_recurrent(c)
[564]320
321            line_style = svg_spine_line_style
[562]322            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
[564]323                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[562]324            #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), svg_spine_dot_style)
325def draw_spine():
326    draw_spine_recurrent(firstnode)
327    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), svg_spine_dot_style)
328
329def draw_skeleton_reccurent(node, max_depth):
330    for c in inv_nodes[node]:
331        if depth[c] >= min_skeleton_depth or depth[c] == max([depth[q] for q in inv_nodes[node]]):
332            if c in inv_nodes:
333                draw_skeleton_reccurent(c, max_depth)
[564]334
335            line_style = svg_spine_line_style
[562]336            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
[564]337                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[562]338            #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height),
339            #             svg_spine_dot_style)
340def draw_skeleton():
341    max_depth = 0
342    for k, v in depth.items():
343            max_depth = max(max_depth, v)
344
345    draw_skeleton_reccurent(firstnode, max_depth)
346    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height),
347    #             svg_spine_dot_style)
348
[576]349# ------------------------------------
[562]350
[576]351def draw_scale(filename ,type):
[562]352
[576]353    svg_add_text( "Generated from " + filename.split("\\")[-1], (5, 15), "start")
354
355    svg_add_line( (w*0.7, h_margin), (w, h_margin), svg_scale_line_style)
356    start_text = ""
357    if TIME == "BIRTHS":
358       start_text = "Birth #" + str(min([int(k[1:]) for k, v in nodes.items()]))
359    if TIME == "REAL":
360       start_text = "Time " + str(min([v for k, v in time.items()]))
361    if TIME == "GENERATIONAL":
362       start_text = "Depth " + str(min([v for k, v in depth.items()]))
363    svg_add_text( start_text, (w, h_margin + 15), "end")
364
365    svg_add_line( (w*0.7, h-h_margin), (w, h-h_margin), svg_scale_line_style)
366    end_text = ""
367    if TIME == "BIRTHS":
368       end_text = "Birth #" + str(max([int(k[1:]) for k, v in nodes.items()]))
369    if TIME == "REAL":
370       end_text = "Time " + str(max([v for k, v in time.items()]))
371    if TIME == "GENERATIONAL":
372       end_text = "Depth " + str(max([v for k, v in depth.items()]))
[577]373    svg_add_text( end_text, (w, h-h_margin + 15), "end")
[576]374
375
[562]376##################################################### main #####################################################
377
378args = 0
379
380h = 800
381w = 600
[576]382h_margin = 20
[562]383w_margin = 10
384h_no_margs = h - 2* h_margin
385w_no_margs = w - 2* w_margin
386
387max_height = 0
388max_width = 0
389min_width = 9999999999
390
391min_skeleton_depth = 0
392
393firstnode = ""
394nodes = {}
395inv_nodes = {}
396positions = {}
[572]397visited= {}
[562]398depth = {}
[566]399time = {}
[577]400kind = {}
[562]401
402def main():
[585]403    global svg_file, min_skeleton_depth, args, \
404        TIME, BALANCE, DOT_STYLE, COLORING, JITTER, \
405        svg_mutation_line_style, svg_crossover_line_style
[562]406
407    parser = argparse.ArgumentParser(description='Process some integers.')
[576]408    parser.add_argument('-i', '--in', dest='input', required=True, help='input file with stuctured evolutionary data')
409    parser.add_argument('-o', '--out', dest='output', required=True, help='output file for the evolutionary tree')
[562]410    draw_tree_parser = parser.add_mutually_exclusive_group(required=False)
411    draw_tree_parser.add_argument('--draw-tree', dest='draw_tree', action='store_true', help='whether drawing the full tree should be skipped')
412    draw_tree_parser.add_argument('--no-draw-tree', dest='draw_tree', action='store_false')
413
414    draw_skeleton_parser = parser.add_mutually_exclusive_group(required=False)
415    draw_skeleton_parser.add_argument('--draw-skeleton', dest='draw_skeleton', action='store_true', help='whether the skeleton of the tree should be drawn')
416    draw_skeleton_parser.add_argument('--no-draw-skeleton', dest='draw_skeleton', action='store_false')
417
418    draw_spine_parser = parser.add_mutually_exclusive_group(required=False)
419    draw_spine_parser.add_argument('--draw-spine', dest='draw_spine', action='store_true', help='whether the spine of the tree should be drawn')
420    draw_spine_parser.add_argument('--no-draw-spine', dest='draw_spine', action='store_false')
421
422    #TODO: better names for those parameters
[585]423    parser.add_argument('-t', '--time', default='GENERATIONAL', dest='time', help='values on vertical axis (BIRTHS/GENERATIONAL/REAL); '
[571]424                                                                      'BIRTHS: time measured as the number of births since the beggining; '
425                                                                      'GENERATIONAL: time measured as number of ancestors; '
426                                                                      'REAL: real time of the simulation')
[585]427    parser.add_argument('-b', '--balance', default='DENSITY', dest='balance', help='method of placing node in the tree (RANDOM/MIN/DENSITY)')
[577]428    parser.add_argument('-s', '--scale', default='NONE', dest='scale', help='type of timescale added to the tree (NONE/SIMPLE)')
429    parser.add_argument('-c', '--coloring', default='IMPORTANCE', dest="coloring", help='method of coloring the tree (NONE/IMPORTANCE/TYPE)')
[585]430    parser.add_argument('-d', '--dots', default='TYPE', dest='dots', help='method of drawing dots (individuals) (NONE/NORMAL/TYPE)')
[571]431    parser.add_argument('-j', '--jitter', dest="jitter", action='store_true', help='draw horizontal positions of children from the normal distribution')
432
[585]433    parser.add_argument('--color-mut', default="#000000", dest="color_mut", help='color of clone/mutation lines in rgba (e.g. #FF60B240) for TYPE coloring')
434    parser.add_argument('--color-cross', default="#660198", dest="color_cross", help='color of crossover lines in rgba (e.g. #FF60B240) for TYPE coloring')
435
[562]436    parser.add_argument('--min-skeleton-depth', type=int, default=2, dest='min_skeleton_depth', help='minimal distance from the leafs for the nodes in the skeleton')
437    parser.add_argument('--seed', type=int, dest='seed', help='seed for the random number generator (-1 for random)')
438
439    parser.add_argument('--simple-data', type=bool, dest='simple_data', help='input data are given in a simple format (#child #parent)')
440
441    parser.set_defaults(draw_tree=True)
442    parser.set_defaults(draw_skeleton=False)
443    parser.set_defaults(draw_spine=False)
444
445    parser.set_defaults(seed=-1)
446
447    args = parser.parse_args()
448
449    TIME = args.time
450    BALANCE = args.balance
[571]451    DOT_STYLE = args.dots
[577]452    COLORING = args.coloring
[571]453    JITTER = args.jitter
[562]454
[585]455    svg_mutation_line_style += hex_to_style(args.color_mut)
456    svg_crossover_line_style += hex_to_style(args.color_cross)
457
[562]458    dir = args.input
459    min_skeleton_depth = args.min_skeleton_depth
460    seed = args.seed
461    if seed == -1:
462        seed = random.randint(0, 10000)
463    random.seed(seed)
464    print("seed:", seed)
465
466    if args.simple_data:
467        load_simple_data(dir)
468    else:
469        load_data(dir)
470
471    compute_depth(firstnode)
472
473    svg_file = open(args.output, "w")
474    svg_file.write('<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" '
475                   'width="' + str(w) + '" height="' + str(h) + '">')
476
477    prepos_children()
478
479    if args.draw_tree:
480        draw_children()
481    if args.draw_skeleton:
482        draw_skeleton()
483    if args.draw_spine:
484        draw_spine()
485
[576]486    draw_scale(dir, args.scale)
487
[562]488    svg_file.write("</svg>")
489    svg_file.close()
490
491main()
492
Note: See TracBrowser for help on using the repository browser.