source: mds-and-trees/tree-genealogy.py @ 577

Last change on this file since 577 was 577, checked in by konrad, 8 years ago

Supporting coloring edges and nodes by type, scalable dots

File size: 17.8 KB
RevLine 
[562]1# Draws a genealogical tree (generates a SVG file) based on parent-child relationship information.
2
3import json
4import random
5import math
6import argparse
7
[571]8TIME = "" # BIRTHS / GENERATIONAL / REAL
9BALANCE = "" # MIN / DENSITY
[562]10
[571]11DOT_STYLE = "" # NONE / NORMAL / CLEAR
12
13JITTER = "" #
14
[562]15# ------SVG---------
16svg_file = 0
17
[577]18svg_line_style = 'stroke="rgb(90%,10%,16%)" stroke-width="1" stroke-opacity="0.7"'
19svg_mutation_line_style = 'stroke="black" stroke-width="1" stroke-opacity="0.7"'
20svg_crossover_line_style = 'stroke="darkviolet" stroke-width="1" stroke-opacity="0.7"'
21svg_spine_line_style = 'stroke="rgb(0%,90%,40%)" stroke-width="2" stroke-opacity="1"'
22svg_scale_line_style = 'stroke="black" stroke-width="0.5" stroke-opacity="1" stroke-dasharray="5, 5"'
23
[562]24svg_dot_style = 'r="2" stroke="black" stroke-width="0.2" fill="red"'
[571]25svg_clear_dot_style = 'r="2" stroke="black" stroke-width="0.4" fill="none"'
[562]26svg_spine_dot_style = 'r="1" stroke="black" stroke-width="0.2" fill="rgb(50%,50%,100%)"'
27
[576]28svg_scale_text_style = 'style="font-family: Arial; font-size: 12; fill: #000000;"'
29
[562]30def svg_add_line(from_pos, to_pos, style=svg_line_style):
31    svg_file.write('<line ' + style + ' x1="' + str(from_pos[0]) + '" x2="' + str(to_pos[0]) + '" y1="' + str(from_pos[1]) + '" y2="' + str(to_pos[1]) + '" />')
32
[576]33def svg_add_text(text, pos, anchor, style=svg_scale_text_style):
34    svg_file.write('<text ' + style + ' text-anchor="' + anchor + '" x="' + str(pos[0]) + '" y="' + str(pos[1]) + '" >' + text + '</text>')
35
[562]36def svg_add_dot(pos, style=svg_dot_style):
37    svg_file.write('<circle ' + style + ' cx="' + str(pos[0]) + '" cy="' + str(pos[1]) + '" />')
38
39def svg_generate_line_style(percent):
[564]40    # hotdog
[562]41    from_col = [100, 70, 0]
[564]42    to_col = [60, 0, 0]
[571]43    # lava
44    # from_col = [100, 80, 0]
45    # to_col = [100, 0, 0]
[564]46    # neon
47    # from_col = [30, 200, 255]
48    # to_col = [240, 0, 220]
[562]49
[564]50    from_opa = 0.2
51    to_opa = 1.0
52    from_stroke = 1
53    to_stroke = 3
[562]54
[564]55    opa = from_opa*(1-percent) + to_opa*percent
56    stroke = from_stroke*(1-percent) + to_stroke*percent
57
58    percent = 1 - ((1-percent)**20)
59
[562]60    return 'stroke="rgb(' + str(from_col[0]*(1-percent) + to_col[0]*percent) + '%,' \
61           + str(from_col[1]*(1-percent) + to_col[1]*percent) + '%,' \
[564]62           + str(from_col[2]*(1-percent) + to_col[2]*percent) + '%)" stroke-width="' + str(stroke) + '" stroke-opacity="' + str(opa) + '"'
[562]63
[577]64def svg_generate_dot_style(kind):
65    kinds = ["red", "lawngreen", "royalblue", "magenta", "yellow", "cyan", "white", "black"]
[562]66
[577]67    r = min(2500/len(nodes), 10)
[562]68
[577]69    return 'fill="' + kinds[kind] + '" r="' + str(r) + '" stroke="black" stroke-width="' + str(r/10) + '" fill-opacity="1.0" ' \
70           'stroke-opacity="1.0"'
[564]71
[562]72# -------------------
73
74def load_data(dir):
[571]75    global firstnode, nodes, inv_nodes, time
[562]76    f = open(dir)
77    for line in f:
[571]78        sline = line.split(' ', 1)
79        if len(sline) == 2:
80            if sline[0] == "[OFFSPRING]":
81                creature = json.loads(sline[1])
[562]82                #print("B" +str(creature))
[563]83                if "FromIDs" in creature:
[572]84                    if not creature["ID"] in nodes:
85                        nodes[creature["ID"]] = {}
86                        # we assign to each parent its contribution to the genotype of the child
87                        for i in range(0, len(creature["FromIDs"])):
88                            inherited = 1 #(creature["Inherited"][i] if 'Inherited' in creature else 1) #ONLY FOR NOW
89                            nodes[creature["ID"]][creature["FromIDs"][i]] = inherited
90                    else:
91                        print("Doubled entry for " + creature["ID"])
92                        quit()
93
[563]94                    if not creature["FromIDs"][0] in nodes:
95                        firstnode = creature["FromIDs"][0]
[572]96
[566]97                if "Time" in creature:
98                    time[creature["ID"]] = creature["Time"]
[562]99
[577]100                if "Kind" in creature:
101                    kind[creature["ID"]] = creature["Kind"]
102
[562]103    for k, v in sorted(nodes.items()):
[572]104        for val in sorted(v):
105            inv_nodes[val] = inv_nodes.get(val, [])
106            inv_nodes[val].append(k)
[562]107
108
109def load_simple_data(dir):
110    global firstnode, nodes, inv_nodes
111    f = open(dir)
112    for line in f:
113        sline = line.split()
114        if len(sline) > 1:
115            #if int(sline[0]) > 15000:
116            #    break
117            if sline[0] == firstnode:
118                continue
119            nodes[sline[0]] = str(max(int(sline[1]), int(firstnode)))
120        else:
121            firstnode = sline[0]
122
123    for k, v in sorted(nodes.items()):
124        inv_nodes[v] = inv_nodes.get(v, [])
125        inv_nodes[v].append(k)
126
127    #print(str(inv_nodes))
128    #quit()
129
130def compute_depth(node):
131    my_depth = 0
132    if node in inv_nodes:
133        for c in inv_nodes[node]:
134            my_depth = max(my_depth, compute_depth(c)+1)
135    depth[node] = my_depth
136    return my_depth
137
138# ------------------------------------
139
140def xmin_crowd(x1, x2, y):
141    if BALANCE == "RANDOM":
142        return (x1 if random.randrange(2) == 0 else x2)
143    elif BALANCE == "MIN":
144        x1_closest = 999999
145        x2_closest = 999999
146        for pos in positions:
147            pos = positions[pos]
148            if pos[1] == y:
149                x1_closest = min(x1_closest, abs(x1-pos[0]))
150                x2_closest = min(x2_closest, abs(x2-pos[0]))
151        return (x1 if x1_closest > x2_closest else x2)
152    elif BALANCE == "DENSITY":
153        x1_dist = 0
154        x2_dist = 0
155        for pos in positions:
156            pos = positions[pos]
157            if pos[1] > y-10 or pos[1] < y+10:
158                dy = pos[1]-y
159                dx1 = pos[0]-x1
160                dx2 = pos[0]-x2
161
162                x1_dist += math.sqrt(dy**2 + dx1**2)
163                x2_dist += math.sqrt(dy**2 + dx2**2)
164        return (x1 if x1_dist > x2_dist else x2)
165
166# ------------------------------------
167
168def prepos_children_reccurent(node):
[572]169    global visited
[562]170    for c in inv_nodes[node]:
[572]171
172        # we want to visit the node just once, after all of its parents
173        if not all_parents_visited(c):
174            continue
[571]175        else:
[572]176            visited[c] = True
[571]177
[572]178        cy = 0
[566]179        if TIME == "BIRTHS":
[562]180            if c[0] == "c":
[572]181                cy = int(c[1:])
[562]182            else:
[572]183                cy = int(c)
[562]184        elif TIME == "GENERATIONAL":
[572]185            cy = positions[node][1]+1
[566]186        elif TIME == "REAL":
[572]187            cy = time[c]
[562]188
[572]189        if len(nodes[c]) == 1:
190            dissimilarity = 0
191            if JITTER == True:
192                dissimilarity = random.gauss(0,1)
193            else:
194                dissimilarity = 1
195            positions[c] = [xmin_crowd(positions[node][0]-dissimilarity, positions[node][0]+dissimilarity, cy), cy]
196        else:
197            vsum = sum([v for k, v in nodes[c].items()])
198            cx = sum([positions[k][0]*v/vsum for k, v in nodes[c].items()])
199
200            if JITTER == True:
201                positions[c] = [cx + random.gauss(0, 0.1), cy]
202            else:
203                positions[c] = [cx, cy]
204
205
[562]206        if c in inv_nodes:
207            prepos_children_reccurent(c)
208
209def prepos_children():
[572]210    global max_height, max_width, min_width, visited
[562]211
[566]212    if not bool(time):
213        print("REAL time requested, but no real time data provided. Assuming BIRTHS time instead.")
214        TIME = "BIRTHS"
215
[562]216    positions[firstnode] = [0, 0]
217
[572]218    visited = {}
219    visited[firstnode] = True
[562]220    prepos_children_reccurent(firstnode)
221
222    for pos in positions:
223        max_height = max(max_height, positions[pos][1])
224        max_width = max(max_width, positions[pos][0])
225        min_width = min(min_width, positions[pos][0])
226
227# ------------------------------------
228
[572]229def all_parents_visited(node):
230    apv = True
231    for k, v in sorted(nodes[node].items()):
232        if not k in visited:
233            apv = False
234            break
235    return apv
236# ------------------------------------
237
[562]238def draw_children_recurrent(node, max_depth):
[572]239    global visited
240
[562]241    for c in inv_nodes[node]:
[572]242
243        # we want to draw the node just once
244        if not all_parents_visited(c):
245            continue
246        else:
247            visited[c] = True
248
[562]249        if c in inv_nodes:
250            draw_children_recurrent(c, max_depth)
[564]251
[577]252        line_style = ""
253        if COLORING == "NONE":
254            line_style = svg_line_style
255        elif COLORING == "TYPE":
256            line_style = (svg_mutation_line_style if len(nodes[c]) == 1 else svg_crossover_line_style)
257        else: # IMPORTANCE, default
258            line_style = svg_generate_line_style(depth[c]/max_depth)
259
[572]260        for k, v in sorted(nodes[c].items()):
261            svg_add_line( (w_margin+w_no_margs*(positions[k][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[k][1]/max_height),
262                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[571]263
264        if DOT_STYLE == "NONE":
265            continue
[577]266        elif DOT_STYLE == "KIND":
267            dot_style = svg_generate_dot_style(kind[c] if c in kind else 0) #type
[571]268        else: # NORMAL, default
[577]269            dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
[564]270        svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), dot_style)
[562]271def draw_children():
[572]272    global visited
273    visited = {}
274    visited[firstnode] = True
275
[562]276    max_depth = 0
277    for k, v in depth.items():
278            max_depth = max(max_depth, v)
279    draw_children_recurrent(firstnode, max_depth)
[571]280
281    if DOT_STYLE == "NONE":
282        return
[577]283    elif DOT_STYLE == "KIND":
284        dot_style = svg_generate_dot_style(kind[firstnode] if firstnode in kind else 0)
[571]285    else: # NORMAL, default
[577]286        dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
[564]287    svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), dot_style)
[562]288
289def draw_spine_recurrent(node):
290    for c in inv_nodes[node]:
291        if depth[c] == depth[node] - 1:
292            if c in inv_nodes:
293                draw_spine_recurrent(c)
[564]294
295            line_style = svg_spine_line_style
[562]296            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
[564]297                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[562]298            #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), svg_spine_dot_style)
299def draw_spine():
300    draw_spine_recurrent(firstnode)
301    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), svg_spine_dot_style)
302
303def draw_skeleton_reccurent(node, max_depth):
304    for c in inv_nodes[node]:
305        if depth[c] >= min_skeleton_depth or depth[c] == max([depth[q] for q in inv_nodes[node]]):
306            if c in inv_nodes:
307                draw_skeleton_reccurent(c, max_depth)
[564]308
309            line_style = svg_spine_line_style
[562]310            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
[564]311                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[562]312            #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height),
313            #             svg_spine_dot_style)
314def draw_skeleton():
315    max_depth = 0
316    for k, v in depth.items():
317            max_depth = max(max_depth, v)
318
319    draw_skeleton_reccurent(firstnode, max_depth)
320    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height),
321    #             svg_spine_dot_style)
322
[576]323# ------------------------------------
[562]324
[576]325def draw_scale(filename ,type):
[562]326
[576]327    svg_add_text( "Generated from " + filename.split("\\")[-1], (5, 15), "start")
328
329    svg_add_line( (w*0.7, h_margin), (w, h_margin), svg_scale_line_style)
330    start_text = ""
331    if TIME == "BIRTHS":
332       start_text = "Birth #" + str(min([int(k[1:]) for k, v in nodes.items()]))
333    if TIME == "REAL":
334       start_text = "Time " + str(min([v for k, v in time.items()]))
335    if TIME == "GENERATIONAL":
336       start_text = "Depth " + str(min([v for k, v in depth.items()]))
337    svg_add_text( start_text, (w, h_margin + 15), "end")
338
339    svg_add_line( (w*0.7, h-h_margin), (w, h-h_margin), svg_scale_line_style)
340    end_text = ""
341    if TIME == "BIRTHS":
342       end_text = "Birth #" + str(max([int(k[1:]) for k, v in nodes.items()]))
343    if TIME == "REAL":
344       end_text = "Time " + str(max([v for k, v in time.items()]))
345    if TIME == "GENERATIONAL":
346       end_text = "Depth " + str(max([v for k, v in depth.items()]))
[577]347    svg_add_text( end_text, (w, h-h_margin + 15), "end")
[576]348
349
[562]350##################################################### main #####################################################
351
352args = 0
353
354h = 800
355w = 600
[576]356h_margin = 20
[562]357w_margin = 10
358h_no_margs = h - 2* h_margin
359w_no_margs = w - 2* w_margin
360
361max_height = 0
362max_width = 0
363min_width = 9999999999
364
365min_skeleton_depth = 0
366
367firstnode = ""
368nodes = {}
369inv_nodes = {}
370positions = {}
[572]371visited= {}
[562]372depth = {}
[566]373time = {}
[577]374kind = {}
[562]375
376def main():
[577]377    global svg_file, min_skeleton_depth, args, TIME, BALANCE, DOT_STYLE, COLORING, JITTER
[562]378
379    parser = argparse.ArgumentParser(description='Process some integers.')
[576]380    parser.add_argument('-i', '--in', dest='input', required=True, help='input file with stuctured evolutionary data')
381    parser.add_argument('-o', '--out', dest='output', required=True, help='output file for the evolutionary tree')
[562]382    draw_tree_parser = parser.add_mutually_exclusive_group(required=False)
383    draw_tree_parser.add_argument('--draw-tree', dest='draw_tree', action='store_true', help='whether drawing the full tree should be skipped')
384    draw_tree_parser.add_argument('--no-draw-tree', dest='draw_tree', action='store_false')
385
386    draw_skeleton_parser = parser.add_mutually_exclusive_group(required=False)
387    draw_skeleton_parser.add_argument('--draw-skeleton', dest='draw_skeleton', action='store_true', help='whether the skeleton of the tree should be drawn')
388    draw_skeleton_parser.add_argument('--no-draw-skeleton', dest='draw_skeleton', action='store_false')
389
390    draw_spine_parser = parser.add_mutually_exclusive_group(required=False)
391    draw_spine_parser.add_argument('--draw-spine', dest='draw_spine', action='store_true', help='whether the spine of the tree should be drawn')
392    draw_spine_parser.add_argument('--no-draw-spine', dest='draw_spine', action='store_false')
393
394    #TODO: better names for those parameters
[576]395    parser.add_argument('-t', '--time', default='BIRTHS', dest='time', help='values on vertical axis (BIRTHS/GENERATIONAL/REAL); '
[571]396                                                                      'BIRTHS: time measured as the number of births since the beggining; '
397                                                                      'GENERATIONAL: time measured as number of ancestors; '
398                                                                      'REAL: real time of the simulation')
[576]399    parser.add_argument('-b', '--balance', default='MIN', dest='balance', help='method of placing node in the tree (RANDOM/MIN/DENSITY)')
[577]400    parser.add_argument('-s', '--scale', default='NONE', dest='scale', help='type of timescale added to the tree (NONE/SIMPLE)')
401    parser.add_argument('-c', '--coloring', default='IMPORTANCE', dest="coloring", help='method of coloring the tree (NONE/IMPORTANCE/TYPE)')
402    parser.add_argument('-d', '--dots', default='NORMAL', dest='dots', help='method of drawing dots (individuals) (NONE/NORMAL/TYPE)')
[571]403    parser.add_argument('-j', '--jitter', dest="jitter", action='store_true', help='draw horizontal positions of children from the normal distribution')
404
[562]405    parser.add_argument('--min-skeleton-depth', type=int, default=2, dest='min_skeleton_depth', help='minimal distance from the leafs for the nodes in the skeleton')
406    parser.add_argument('--seed', type=int, dest='seed', help='seed for the random number generator (-1 for random)')
407
408    parser.add_argument('--simple-data', type=bool, dest='simple_data', help='input data are given in a simple format (#child #parent)')
409
410    parser.set_defaults(draw_tree=True)
411    parser.set_defaults(draw_skeleton=False)
412    parser.set_defaults(draw_spine=False)
413
414    parser.set_defaults(seed=-1)
415
416    args = parser.parse_args()
417
418    TIME = args.time
419    BALANCE = args.balance
[571]420    DOT_STYLE = args.dots
[577]421    COLORING = args.coloring
[571]422    JITTER = args.jitter
[562]423
424    dir = args.input
425    min_skeleton_depth = args.min_skeleton_depth
426    seed = args.seed
427    if seed == -1:
428        seed = random.randint(0, 10000)
429    random.seed(seed)
430    print("seed:", seed)
431
432    if args.simple_data:
433        load_simple_data(dir)
434    else:
435        load_data(dir)
436
437    compute_depth(firstnode)
438
439    svg_file = open(args.output, "w")
440    svg_file.write('<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" '
441                   'width="' + str(w) + '" height="' + str(h) + '">')
442
443    prepos_children()
444
445    if args.draw_tree:
446        draw_children()
447    if args.draw_skeleton:
448        draw_skeleton()
449    if args.draw_spine:
450        draw_spine()
451
[576]452    draw_scale(dir, args.scale)
453
[562]454    svg_file.write("</svg>")
455    svg_file.close()
456
457main()
458
Note: See TracBrowser for help on using the repository browser.