1 | import numpy as np |
---|
2 | from pyemd import emd |
---|
3 | from ctypes import cdll |
---|
4 | from ctypes.util import find_library |
---|
5 | from alignmodel import align |
---|
6 | |
---|
7 | class DensityDistribution: |
---|
8 | libm = cdll.LoadLibrary(find_library('m')) |
---|
9 | EPSILON = 0.0001 |
---|
10 | def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False): |
---|
11 | """ __init__ |
---|
12 | Args: |
---|
13 | density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10. |
---|
14 | steps (int, optional): How many steps is used for sampling space of voxels, |
---|
15 | The higher value the more accurate sampling and the longer calculations. Defaults to 3. |
---|
16 | reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True. |
---|
17 | frequency (bool, optional): If we should use frequency distribution. Defaults to False. |
---|
18 | metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'. |
---|
19 | fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False. |
---|
20 | verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False. |
---|
21 | """ |
---|
22 | if FramsLib == None: |
---|
23 | raise ValueError('Frams library not provided!') |
---|
24 | self.frams_lib = FramsLib |
---|
25 | |
---|
26 | self.density = density |
---|
27 | self.steps = steps |
---|
28 | self.verbose = verbose |
---|
29 | self.reduce = reduce |
---|
30 | self.frequency = frequency |
---|
31 | self.metric = metric |
---|
32 | self.fixedZaxis = fixedZaxis |
---|
33 | |
---|
34 | |
---|
35 | def calculateNeighberhood(self,array,mean_coords): |
---|
36 | """ Calculates number of elements for given sample and set ups the center of this sample |
---|
37 | to the center of mass (calculated by mean of every coordinate) |
---|
38 | Args: |
---|
39 | array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample. |
---|
40 | mean_coords ([float,float,float]): default coordinates that are the |
---|
41 | middle of the sample (used when number of voxels in sample is equal to 0) |
---|
42 | |
---|
43 | Returns: |
---|
44 | weight [int]: number of voxels in a sample |
---|
45 | coordinates [float,float,float]: center of mass for a sample |
---|
46 | """ |
---|
47 | weight = len(array) |
---|
48 | if weight > 0: |
---|
49 | point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])] |
---|
50 | return weight, point |
---|
51 | else: |
---|
52 | return 0, mean_coords |
---|
53 | |
---|
54 | |
---|
55 | def calculateDistPoints(self,point1, point2): |
---|
56 | """ Returns euclidean distance between two points |
---|
57 | Args (distribution): |
---|
58 | point1 ([float,float,float]) - coordinates of first point |
---|
59 | point2 ([float,float,float]) - coordinates of second point |
---|
60 | Args (frequency): |
---|
61 | point1 (float) - value of the first sample |
---|
62 | point2 (float) - value of the second sample |
---|
63 | |
---|
64 | Returns: |
---|
65 | [float]: euclidean distance |
---|
66 | """ |
---|
67 | if self.frequency: |
---|
68 | return abs(point1-point2) |
---|
69 | else: |
---|
70 | return np.sqrt(np.sum(np.square(point1-point2))) |
---|
71 | |
---|
72 | |
---|
73 | def calculateDistanceMatrix(self,array1, array2): |
---|
74 | """ |
---|
75 | |
---|
76 | Args: |
---|
77 | array1 ([type]): array of size n with points representing firsts model |
---|
78 | array2 ([type]): array of size n with points representing second model |
---|
79 | |
---|
80 | Returns: |
---|
81 | np.array(np.array(,dtype=float)): distance matrix n x n |
---|
82 | """ |
---|
83 | n = len(array1) |
---|
84 | distMatrix = np.zeros((n,n)) |
---|
85 | for i in range(n): |
---|
86 | for j in range(n): |
---|
87 | distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j]) |
---|
88 | return np.array(distMatrix) |
---|
89 | |
---|
90 | |
---|
91 | def reduceSignaturesFreq(self,s1,s2): |
---|
92 | """Removes samples from signatures if corresponding samples for both models have weight 0. |
---|
93 | Args: |
---|
94 | s1 (np.array(,dtype=np.float64)): values of samples |
---|
95 | s2 (np.array(,dtype=np.float64)): values of samples |
---|
96 | |
---|
97 | Returns: |
---|
98 | s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction |
---|
99 | s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction |
---|
100 | """ |
---|
101 | lens = len(s1) |
---|
102 | indices = [] |
---|
103 | for i in range(lens): |
---|
104 | if s1[i]==0 and s2[i]==0: |
---|
105 | indices.append(i) |
---|
106 | |
---|
107 | return np.delete(s1, indices), np.delete(s2, indices) |
---|
108 | |
---|
109 | |
---|
110 | def reduceSignaturesDens(self,s1,s2): |
---|
111 | """Removes samples from signatures if corresponding samples for both models have weight 0. |
---|
112 | Args: |
---|
113 | s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
114 | s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
115 | |
---|
116 | Returns: |
---|
117 | s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction |
---|
118 | s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction |
---|
119 | """ |
---|
120 | lens = len(s1[0]) |
---|
121 | indices = [] |
---|
122 | for i in range(lens): |
---|
123 | if s1[1][i]==0 and s2[1][i]==0: |
---|
124 | indices.append(i) |
---|
125 | |
---|
126 | s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)] |
---|
127 | s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)] |
---|
128 | return s1, s2 |
---|
129 | |
---|
130 | |
---|
131 | def getSignatures(self,array,steps_all,step_all): |
---|
132 | """Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int). |
---|
133 | |
---|
134 | Args: |
---|
135 | array (np.array(np.array(,dtype=float))): array with voxels representing model |
---|
136 | steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z |
---|
137 | step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis] |
---|
138 | |
---|
139 | Returns (distribution): |
---|
140 | signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights] |
---|
141 | Returns (frequency): |
---|
142 | signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients |
---|
143 | """ |
---|
144 | x_steps,y_steps,z_steps = steps_all |
---|
145 | x_step,y_step,z_step=step_all |
---|
146 | feature_array = [] |
---|
147 | weight_array = [] |
---|
148 | step_half_x = x_step/2 |
---|
149 | step_half_y = y_step/2 |
---|
150 | step_half_z = z_step/2 |
---|
151 | for x in range(len(x_steps[:-1])): |
---|
152 | for y in range(len(y_steps[:-1])) : |
---|
153 | for z in range(len(z_steps[:-1])): |
---|
154 | rows=np.where((array[:,0]> x_steps[x]) & |
---|
155 | (array[:,0]<= x_steps[x+1]) & |
---|
156 | (array[:,1]> y_steps[y]) & |
---|
157 | (array[:,1]<= y_steps[y+1]) & |
---|
158 | (array[:,2]> z_steps[z]) & |
---|
159 | (array[:,2]<= z_steps[z+1])) |
---|
160 | if self.frequency: |
---|
161 | feature_array.append(len(array[rows])) |
---|
162 | else: |
---|
163 | weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z]) |
---|
164 | feature_array.append(point) |
---|
165 | weight_array.append(weight) |
---|
166 | |
---|
167 | if self.frequency: |
---|
168 | samples = np.array(feature_array,dtype=np.float64) |
---|
169 | return abs(np.fft.fft(samples)) |
---|
170 | else: |
---|
171 | return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)] |
---|
172 | |
---|
173 | |
---|
174 | def getSignaturesForPair(self,array1,array2): |
---|
175 | """generates signatures for given pair of models represented by array of voxels. |
---|
176 | We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps. |
---|
177 | This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight |
---|
178 | which equals to the number of points. |
---|
179 | |
---|
180 | Args: |
---|
181 | array1 (np.array(np.array(,dtype=float))): array with voxels representing model1 |
---|
182 | array2 (np.array(np.array(,dtype=float))): array with voxels representing model2 |
---|
183 | steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3). |
---|
184 | |
---|
185 | Returns: |
---|
186 | s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
187 | s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
188 | """ |
---|
189 | |
---|
190 | min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])]) |
---|
191 | max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])]) |
---|
192 | min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])]) |
---|
193 | max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])]) |
---|
194 | min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])]) |
---|
195 | max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])]) |
---|
196 | |
---|
197 | x_steps,x_step = np.linspace(min_x,max_x,self.steps,retstep=True) |
---|
198 | y_steps,y_step = np.linspace(min_y,max_y,self.steps,retstep=True) |
---|
199 | z_steps,z_step = np.linspace(min_z,max_z,self.steps,retstep=True) |
---|
200 | |
---|
201 | for intervals in (x_steps, y_steps, z_steps): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures()) |
---|
202 | intervals[0] -= self.EPSILON |
---|
203 | |
---|
204 | steps_all = (x_steps,y_steps,z_steps) |
---|
205 | step_all = (x_step,y_step,z_step) |
---|
206 | |
---|
207 | s1 = self.getSignatures(array1,steps_all,step_all) |
---|
208 | s2 = self.getSignatures(array2,steps_all,step_all) |
---|
209 | |
---|
210 | return s1,s2 |
---|
211 | |
---|
212 | |
---|
213 | def getVoxels(self,geno): |
---|
214 | """ Generates voxels for genotype using frams.ModelGeometry |
---|
215 | |
---|
216 | Args: |
---|
217 | geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
218 | |
---|
219 | Returns: |
---|
220 | np.array([np.array(,dtype=float)]: list of voxels representing model. |
---|
221 | """ |
---|
222 | model = self.frams_lib.Model.newFromString(geno) |
---|
223 | align(model, self.fixedZaxis) |
---|
224 | model_geometry = self.frams_lib.ModelGeometry.forModel(model) |
---|
225 | |
---|
226 | model_geometry.geom_density = self.density |
---|
227 | voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()]) |
---|
228 | return voxels |
---|
229 | |
---|
230 | |
---|
231 | def calculateDissimforVoxels(self, voxels1, voxels2): |
---|
232 | """ Calculate EMD for pair of voxels representing models. |
---|
233 | Args: |
---|
234 | voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1. |
---|
235 | voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2. |
---|
236 | steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3). |
---|
237 | |
---|
238 | Returns: |
---|
239 | float: dissim for pair of list of voxels |
---|
240 | """ |
---|
241 | numvox1 = len(voxels1) |
---|
242 | numvox2 = len(voxels2) |
---|
243 | |
---|
244 | s1, s2 = self.getSignaturesForPair(voxels1, voxels2) |
---|
245 | |
---|
246 | if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]): |
---|
247 | print("Bad signature!") |
---|
248 | print("Base voxels fig1: ", numvox1, " fig2: ", numvox2) |
---|
249 | print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1])) |
---|
250 | raise ValueError("BAd signature!") |
---|
251 | |
---|
252 | reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens |
---|
253 | if self.reduce: |
---|
254 | s1, s2 = reduce_fun(s1,s2) |
---|
255 | |
---|
256 | if not self.frequency: |
---|
257 | if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]): |
---|
258 | print("Voxel reduction didnt work properly") |
---|
259 | print("Base voxels fig1: ", numvox1, " fig2: ", numvox2) |
---|
260 | print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1])) |
---|
261 | |
---|
262 | if self.metric == 'l1': |
---|
263 | if self.frequency: |
---|
264 | out = np.linalg.norm((s1-s2), ord=1) |
---|
265 | else: |
---|
266 | out = np.linalg.norm((s1[1]-s2[1]), ord=1) |
---|
267 | |
---|
268 | elif self.metric == 'l2': |
---|
269 | if self.frequency: |
---|
270 | out = np.linalg.norm((s1-s2)) |
---|
271 | else: |
---|
272 | out = np.linalg.norm((s1[1]-s2[1])) |
---|
273 | |
---|
274 | elif self.metric == 'emd': |
---|
275 | if self.frequency: |
---|
276 | num_points = len(s1) |
---|
277 | dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points)) |
---|
278 | else: |
---|
279 | dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0]) |
---|
280 | |
---|
281 | self.libm.fedisableexcept(0x04) # allowing for operation divide by 0 because pyemd requiers it. |
---|
282 | |
---|
283 | if self.frequency: |
---|
284 | out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64)) |
---|
285 | else: |
---|
286 | out = emd(s1[1],s2[1],dist_matrix) |
---|
287 | |
---|
288 | self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it. |
---|
289 | self.libm.feenableexcept(0x04) |
---|
290 | |
---|
291 | else: |
---|
292 | raise ValueError("Wrong metric '%s'"%self.metric) |
---|
293 | |
---|
294 | return out |
---|
295 | |
---|
296 | |
---|
297 | def calculateDissimforGeno(self, geno1, geno2): |
---|
298 | """ Calculate EMD for pair of genos. |
---|
299 | Args: |
---|
300 | geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
301 | geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
302 | steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3). |
---|
303 | |
---|
304 | Returns: |
---|
305 | float: dissim for pair of strings representing models. |
---|
306 | """ |
---|
307 | |
---|
308 | voxels1 = self.getVoxels(geno1) |
---|
309 | voxels2 = self.getVoxels(geno2) |
---|
310 | |
---|
311 | out = self.calculateDissimforVoxels(voxels1, voxels2) |
---|
312 | |
---|
313 | if self.verbose == True: |
---|
314 | print("Steps: ", self.steps) |
---|
315 | print("Geno1:\n",geno1) |
---|
316 | print("Geno2:\n",geno2) |
---|
317 | print("EMD:\n",out) |
---|
318 | |
---|
319 | return out |
---|
320 | |
---|
321 | |
---|
322 | def getDissimilarityMatrix(self,listOfGeno): |
---|
323 | """ |
---|
324 | |
---|
325 | Args: |
---|
326 | listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
327 | |
---|
328 | Returns: |
---|
329 | np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes |
---|
330 | """ |
---|
331 | numOfGeno = len(listOfGeno) |
---|
332 | dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno]) |
---|
333 | listOfVoxels = [self.getVoxels(g) for g in listOfGeno] |
---|
334 | for i in range(numOfGeno): |
---|
335 | for j in range(numOfGeno): |
---|
336 | dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j]) |
---|
337 | return dissimMatrix |
---|