source: mds-and-trees/tree-genealogy.py @ 576

Last change on this file since 576 was 576, checked in by konrad, 8 years ago

Simple scale

File size: 17.9 KB
Line 
1# Draws a genealogical tree (generates a SVG file) based on parent-child relationship information.
2
3import json
4import random
5import math
6import argparse
7
8TIME = "" # BIRTHS / GENERATIONAL / REAL
9BALANCE = "" # MIN / DENSITY
10
11DOT_STYLE = "" # NONE / NORMAL / CLEAR
12
13JITTER = "" #
14
15# ------SVG---------
16svg_file = 0
17
18svg_line_style = 'stroke="rgb(90%,10%,16%)" stroke-width="1" stroke-opacity="0.8"'
19svg_dot_style = 'r="2" stroke="black" stroke-width="0.2" fill="red"'
20svg_clear_dot_style = 'r="2" stroke="black" stroke-width="0.4" fill="none"'
21
22svg_spine_line_style = 'stroke="rgb(0%,0%,80%)" stroke-width="2" stroke-opacity="1"'
23svg_spine_dot_style = 'r="1" stroke="black" stroke-width="0.2" fill="rgb(50%,50%,100%)"'
24
25svg_scale_line_style = 'stroke="black" stroke-width="0.5" stroke-opacity="1" stroke-dasharray="5, 5"'
26svg_scale_text_style = 'style="font-family: Arial; font-size: 12; fill: #000000;"'
27
28def svg_add_line(from_pos, to_pos, style=svg_line_style):
29    svg_file.write('<line ' + style + ' x1="' + str(from_pos[0]) + '" x2="' + str(to_pos[0]) + '" y1="' + str(from_pos[1]) + '" y2="' + str(to_pos[1]) + '" />')
30
31def svg_add_text(text, pos, anchor, style=svg_scale_text_style):
32    svg_file.write('<text ' + style + ' text-anchor="' + anchor + '" x="' + str(pos[0]) + '" y="' + str(pos[1]) + '" >' + text + '</text>')
33
34def svg_add_dot(pos, style=svg_dot_style):
35    svg_file.write('<circle ' + style + ' cx="' + str(pos[0]) + '" cy="' + str(pos[1]) + '" />')
36
37def svg_generate_line_style(percent):
38    # hotdog
39    from_col = [100, 70, 0]
40    to_col = [60, 0, 0]
41    # lava
42    # from_col = [100, 80, 0]
43    # to_col = [100, 0, 0]
44    # neon
45    # from_col = [30, 200, 255]
46    # to_col = [240, 0, 220]
47
48    from_opa = 0.2
49    to_opa = 1.0
50    from_stroke = 1
51    to_stroke = 3
52
53    opa = from_opa*(1-percent) + to_opa*percent
54    stroke = from_stroke*(1-percent) + to_stroke*percent
55
56    percent = 1 - ((1-percent)**20)
57
58    return 'stroke="rgb(' + str(from_col[0]*(1-percent) + to_col[0]*percent) + '%,' \
59           + str(from_col[1]*(1-percent) + to_col[1]*percent) + '%,' \
60           + str(from_col[2]*(1-percent) + to_col[2]*percent) + '%)" stroke-width="' + str(stroke) + '" stroke-opacity="' + str(opa) + '"'
61
62def svg_generate_dot_style(percent):
63    from_col = [100, 70, 0]
64    to_col = [60, 0, 0]
65    # neon
66    # from_col = [30, 200, 255]
67    # to_col = [240, 0, 220]
68
69    from_opa = 0.2
70    to_opa = 1.0
71
72    opa = from_opa*(1-percent) + to_opa*percent
73
74    percent = 1 - ((1-percent)**20)
75
76    return 'fill="rgb(' + str(from_col[0]*(1-percent) + to_col[0]*percent) + '%,' \
77           + str(from_col[1]*(1-percent) + to_col[1]*percent) + '%,' \
78           + str(from_col[2]*(1-percent) + to_col[2]*percent) + '%)" r="1.5" stroke="black" stroke-width="0.2" fill-opacity="' + str(opa) + '" ' \
79           'stroke-opacity="' + str(opa) + '"'
80
81# -------------------
82
83def load_data(dir):
84    global firstnode, nodes, inv_nodes, time
85    f = open(dir)
86    for line in f:
87        sline = line.split(' ', 1)
88        if len(sline) == 2:
89            if sline[0] == "[OFFSPRING]":
90                creature = json.loads(sline[1])
91                #print("B" +str(creature))
92                if "FromIDs" in creature:
93                    if not creature["ID"] in nodes:
94                        nodes[creature["ID"]] = {}
95                        # we assign to each parent its contribution to the genotype of the child
96                        for i in range(0, len(creature["FromIDs"])):
97                            inherited = 1 #(creature["Inherited"][i] if 'Inherited' in creature else 1) #ONLY FOR NOW
98                            nodes[creature["ID"]][creature["FromIDs"][i]] = inherited
99                    else:
100                        print("Doubled entry for " + creature["ID"])
101                        quit()
102
103                    if not creature["FromIDs"][0] in nodes:
104                        firstnode = creature["FromIDs"][0]
105
106                if "Time" in creature:
107                    time[creature["ID"]] = creature["Time"]
108
109    for k, v in sorted(nodes.items()):
110        for val in sorted(v):
111            inv_nodes[val] = inv_nodes.get(val, [])
112            inv_nodes[val].append(k)
113
114
115def load_simple_data(dir):
116    global firstnode, nodes, inv_nodes
117    f = open(dir)
118    for line in f:
119        sline = line.split()
120        if len(sline) > 1:
121            #if int(sline[0]) > 15000:
122            #    break
123            if sline[0] == firstnode:
124                continue
125            nodes[sline[0]] = str(max(int(sline[1]), int(firstnode)))
126        else:
127            firstnode = sline[0]
128
129    for k, v in sorted(nodes.items()):
130        inv_nodes[v] = inv_nodes.get(v, [])
131        inv_nodes[v].append(k)
132
133    #print(str(inv_nodes))
134    #quit()
135
136def compute_depth(node):
137    my_depth = 0
138    if node in inv_nodes:
139        for c in inv_nodes[node]:
140            my_depth = max(my_depth, compute_depth(c)+1)
141    depth[node] = my_depth
142    return my_depth
143
144# ------------------------------------
145
146def xmin_crowd(x1, x2, y):
147    if BALANCE == "RANDOM":
148        return (x1 if random.randrange(2) == 0 else x2)
149    elif BALANCE == "MIN":
150        x1_closest = 999999
151        x2_closest = 999999
152        for pos in positions:
153            pos = positions[pos]
154            if pos[1] == y:
155                x1_closest = min(x1_closest, abs(x1-pos[0]))
156                x2_closest = min(x2_closest, abs(x2-pos[0]))
157        return (x1 if x1_closest > x2_closest else x2)
158    elif BALANCE == "DENSITY":
159        x1_dist = 0
160        x2_dist = 0
161        for pos in positions:
162            pos = positions[pos]
163            if pos[1] > y-10 or pos[1] < y+10:
164                dy = pos[1]-y
165                dx1 = pos[0]-x1
166                dx2 = pos[0]-x2
167
168                x1_dist += math.sqrt(dy**2 + dx1**2)
169                x2_dist += math.sqrt(dy**2 + dx2**2)
170        return (x1 if x1_dist > x2_dist else x2)
171
172# ------------------------------------
173
174def prepos_children_reccurent(node):
175    global visited
176    for c in inv_nodes[node]:
177
178        # we want to visit the node just once, after all of its parents
179        if not all_parents_visited(c):
180            continue
181        else:
182            visited[c] = True
183
184        # if JITTER == True:
185        #     dissimilarity = random.gauss(0,1)
186        # else:
187        #     dissimilarity = 1
188        #     #TODO take this info from proper fields
189
190        cy = 0
191        if TIME == "BIRTHS":
192            if c[0] == "c":
193                cy = int(c[1:])
194            else:
195                cy = int(c)
196        elif TIME == "GENERATIONAL":
197            cy = positions[node][1]+1
198        elif TIME == "REAL":
199            cy = time[c]
200
201        if len(nodes[c]) == 1:
202            dissimilarity = 0
203            if JITTER == True:
204                dissimilarity = random.gauss(0,1)
205            else:
206                dissimilarity = 1
207            positions[c] = [xmin_crowd(positions[node][0]-dissimilarity, positions[node][0]+dissimilarity, cy), cy]
208        else:
209            vsum = sum([v for k, v in nodes[c].items()])
210            cx = sum([positions[k][0]*v/vsum for k, v in nodes[c].items()])
211
212            if JITTER == True:
213                positions[c] = [cx + random.gauss(0, 0.1), cy]
214            else:
215                positions[c] = [cx, cy]
216
217
218        if c in inv_nodes:
219            prepos_children_reccurent(c)
220
221def prepos_children():
222    global max_height, max_width, min_width, visited
223
224    if not bool(time):
225        print("REAL time requested, but no real time data provided. Assuming BIRTHS time instead.")
226        TIME = "BIRTHS"
227
228    positions[firstnode] = [0, 0]
229
230    visited = {}
231    visited[firstnode] = True
232    prepos_children_reccurent(firstnode)
233
234    for pos in positions:
235        max_height = max(max_height, positions[pos][1])
236        max_width = max(max_width, positions[pos][0])
237        min_width = min(min_width, positions[pos][0])
238
239# ------------------------------------
240
241def all_parents_visited(node):
242    apv = True
243    for k, v in sorted(nodes[node].items()):
244        if not k in visited:
245            apv = False
246            break
247    return apv
248# ------------------------------------
249
250def draw_children_recurrent(node, max_depth):
251    global visited
252
253    for c in inv_nodes[node]:
254
255        # we want to draw the node just once
256        if not all_parents_visited(c):
257            continue
258        else:
259            visited[c] = True
260
261        if c in inv_nodes:
262            draw_children_recurrent(c, max_depth)
263
264        line_style = (svg_line_style if args.mono_tree else svg_generate_line_style(depth[c]/max_depth))
265        for k, v in sorted(nodes[c].items()):
266            svg_add_line( (w_margin+w_no_margs*(positions[k][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[k][1]/max_height),
267                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
268
269        if DOT_STYLE == "NONE":
270            continue
271        elif DOT_STYLE == "CLEAR":
272            dot_style = svg_clear_dot_style
273        else: # NORMAL, default
274            dot_style = svg_generate_dot_style(depth[c]/max_depth)
275        svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), dot_style)
276def draw_children():
277    global visited
278    visited = {}
279    visited[firstnode] = True
280
281    max_depth = 0
282    for k, v in depth.items():
283            max_depth = max(max_depth, v)
284    draw_children_recurrent(firstnode, max_depth)
285
286    if DOT_STYLE == "NONE":
287        return
288    elif DOT_STYLE == "CLEAR":
289        dot_style = svg_clear_dot_style
290    else: # NORMAL, default
291        dot_style = svg_generate_dot_style(depth[firstnode]/max_depth)
292    svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), dot_style)
293
294def draw_spine_recurrent(node):
295    for c in inv_nodes[node]:
296        if depth[c] == depth[node] - 1:
297            if c in inv_nodes:
298                draw_spine_recurrent(c)
299
300            line_style = svg_spine_line_style
301            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
302                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
303            #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), svg_spine_dot_style)
304def draw_spine():
305    draw_spine_recurrent(firstnode)
306    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height), svg_spine_dot_style)
307
308def draw_skeleton_reccurent(node, max_depth):
309    for c in inv_nodes[node]:
310        if depth[c] >= min_skeleton_depth or depth[c] == max([depth[q] for q in inv_nodes[node]]):
311            if c in inv_nodes:
312                draw_skeleton_reccurent(c, max_depth)
313
314            line_style = svg_spine_line_style
315            svg_add_line( (w_margin+w_no_margs*(positions[node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[node][1]/max_height),
316                (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
317            #svg_add_dot( (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height),
318            #             svg_spine_dot_style)
319def draw_skeleton():
320    max_depth = 0
321    for k, v in depth.items():
322            max_depth = max(max_depth, v)
323
324    draw_skeleton_reccurent(firstnode, max_depth)
325    #svg_add_dot( (w_margin+w_no_margs*(positions[firstnode][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[firstnode][1]/max_height),
326    #             svg_spine_dot_style)
327
328# ------------------------------------
329
330def draw_scale(filename ,type):
331
332    svg_add_text( "Generated from " + filename.split("\\")[-1], (5, 15), "start")
333
334    svg_add_line( (w*0.7, h_margin), (w, h_margin), svg_scale_line_style)
335    start_text = ""
336    if TIME == "BIRTHS":
337       start_text = "Birth #" + str(min([int(k[1:]) for k, v in nodes.items()]))
338    if TIME == "REAL":
339       start_text = "Time " + str(min([v for k, v in time.items()]))
340    if TIME == "GENERATIONAL":
341       start_text = "Depth " + str(min([v for k, v in depth.items()]))
342    svg_add_text( start_text, (w, h_margin + 15), "end")
343
344    svg_add_line( (w*0.7, h-h_margin), (w, h-h_margin), svg_scale_line_style)
345    end_text = ""
346    if TIME == "BIRTHS":
347       end_text = "Birth #" + str(max([int(k[1:]) for k, v in nodes.items()]))
348    if TIME == "REAL":
349       end_text = "Time " + str(max([v for k, v in time.items()]))
350    if TIME == "GENERATIONAL":
351       end_text = "Depth " + str(max([v for k, v in depth.items()]))
352    svg_add_text( end_text, (w, h-h_margin - 5), "end")
353
354
355##################################################### main #####################################################
356
357args = 0
358
359h = 800
360w = 600
361h_margin = 20
362w_margin = 10
363h_no_margs = h - 2* h_margin
364w_no_margs = w - 2* w_margin
365
366max_height = 0
367max_width = 0
368min_width = 9999999999
369
370min_skeleton_depth = 0
371
372firstnode = ""
373nodes = {}
374inv_nodes = {}
375positions = {}
376visited= {}
377depth = {}
378time = {}
379
380def main():
381    global svg_file, min_skeleton_depth, args, TIME, BALANCE, DOT_STYLE, JITTER
382
383    parser = argparse.ArgumentParser(description='Process some integers.')
384    parser.add_argument('-i', '--in', dest='input', required=True, help='input file with stuctured evolutionary data')
385    parser.add_argument('-o', '--out', dest='output', required=True, help='output file for the evolutionary tree')
386    draw_tree_parser = parser.add_mutually_exclusive_group(required=False)
387    draw_tree_parser.add_argument('--draw-tree', dest='draw_tree', action='store_true', help='whether drawing the full tree should be skipped')
388    draw_tree_parser.add_argument('--no-draw-tree', dest='draw_tree', action='store_false')
389
390    draw_skeleton_parser = parser.add_mutually_exclusive_group(required=False)
391    draw_skeleton_parser.add_argument('--draw-skeleton', dest='draw_skeleton', action='store_true', help='whether the skeleton of the tree should be drawn')
392    draw_skeleton_parser.add_argument('--no-draw-skeleton', dest='draw_skeleton', action='store_false')
393
394    draw_spine_parser = parser.add_mutually_exclusive_group(required=False)
395    draw_spine_parser.add_argument('--draw-spine', dest='draw_spine', action='store_true', help='whether the spine of the tree should be drawn')
396    draw_spine_parser.add_argument('--no-draw-spine', dest='draw_spine', action='store_false')
397
398    #TODO: better names for those parameters
399    parser.add_argument('-t', '--time', default='BIRTHS', dest='time', help='values on vertical axis (BIRTHS/GENERATIONAL/REAL); '
400                                                                      'BIRTHS: time measured as the number of births since the beggining; '
401                                                                      'GENERATIONAL: time measured as number of ancestors; '
402                                                                      'REAL: real time of the simulation')
403    parser.add_argument('-b', '--balance', default='MIN', dest='balance', help='method of placing node in the tree (RANDOM/MIN/DENSITY)')
404
405    parser.add_argument('-s', '--scale', default='NONE', dest='scale', help='type of timescale added to the tree (NONE/SIMPLE/FULL)')
406
407    parser.add_argument('-d', '--dots', default='NORMAL', dest='dots', help='method of drawing dots (individuals) (NONE/NORMAL/CLEAR)')
408
409    parser.add_argument('-j', '--jitter', dest="jitter", action='store_true', help='draw horizontal positions of children from the normal distribution')
410
411    mono_tree_parser = parser.add_mutually_exclusive_group(required=False)
412    mono_tree_parser.add_argument('--mono-tree', dest='mono_tree', action='store_true', help='whether the tree should be drawn with a single color')
413    mono_tree_parser.add_argument('--no-mono-tree', dest='mono_tree', action='store_false')
414
415    parser.add_argument('--min-skeleton-depth', type=int, default=2, dest='min_skeleton_depth', help='minimal distance from the leafs for the nodes in the skeleton')
416    parser.add_argument('--seed', type=int, dest='seed', help='seed for the random number generator (-1 for random)')
417
418    parser.add_argument('--simple-data', type=bool, dest='simple_data', help='input data are given in a simple format (#child #parent)')
419
420    parser.set_defaults(mono_tree=False)
421    parser.set_defaults(draw_tree=True)
422    parser.set_defaults(draw_skeleton=False)
423    parser.set_defaults(draw_spine=False)
424
425    parser.set_defaults(seed=-1)
426
427    args = parser.parse_args()
428
429    TIME = args.time
430    BALANCE = args.balance
431    DOT_STYLE = args.dots
432    JITTER = args.jitter
433
434    dir = args.input
435    min_skeleton_depth = args.min_skeleton_depth
436    seed = args.seed
437    if seed == -1:
438        seed = random.randint(0, 10000)
439    random.seed(seed)
440    print("seed:", seed)
441
442    if args.simple_data:
443        load_simple_data(dir)
444    else:
445        load_data(dir)
446
447    compute_depth(firstnode)
448
449    svg_file = open(args.output, "w")
450    svg_file.write('<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" '
451                   'width="' + str(w) + '" height="' + str(h) + '">')
452
453    prepos_children()
454
455    if args.draw_tree:
456        draw_children()
457    if args.draw_skeleton:
458        draw_skeleton()
459    if args.draw_spine:
460        draw_spine()
461
462    draw_scale(dir, args.scale)
463
464    svg_file.write("</svg>")
465    svg_file.close()
466
467main()
468
Note: See TracBrowser for help on using the repository browser.