source: framspy/dissimilarity/density-distribution.py @ 1254

Last change on this file since 1254 was 1225, checked in by oriona, 19 months ago

New deafult params set, unnecessary print removed.

File size: 15.1 KB
Line 
1import numpy as np
2from pyemd import emd
3from ctypes import cdll
4from ctypes.util import find_library
5from alignmodel import align
6
7class DensityDistribution:
8    """Two dissimilarity measures based on the spatial distribution of two Models. The Model bounding box is divided into a grid of equally-sized cuboids, the number of which is the 'resolution' parameter cubed. Then the Model surface is covered with points; the density of the surface sampling is determined by the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False), a signature of each cuboid is the centroid and the number of samples. In the 'frequency'=True version, FFT is computed from the vector containing the number of samples in each cuboid. The final result of the dissimilarity measure is the distance between the signatures and it can be computed using EMD, L1, or L2 norms (the 'metric' parameter).
9    """
10   
11    libm = cdll.LoadLibrary(find_library('m')) # for disabling/enabling floating point exceptions (division by zero occurs in the EMD library)
12    EPSILON = 0.0001
13   
14    def __init__(self, frams_module=None, density = 10, resolution = 8, reduce_empty=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
15        """ __init__
16        Args:
17            density (int, optional): density of samplings for frams.ModelGeometry. Defaults to 10.
18            resolution (int, optional): How many intervals are used in each dimension to partition surface samples of Models in the 3D space.
19                The higher the value, the more detailed the comparison and the longer the calculations. Defaults to 3.
20            reduce_empty (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
21            frequency (bool, optional): If we should use frequency distribution. Defaults to False.
22            metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
23            fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
24            verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.           
25        """
26        if frams_module is None:
27            raise ValueError('Framsticks module not provided!')
28        self.frams = frams_module
29
30        self.density = density
31        self.resolution = resolution
32        self.verbose = verbose
33        self.reduce_empty = reduce_empty
34        self.frequency = frequency
35        self.metric = metric
36        self.fixedZaxis = fixedZaxis
37
38
39    def calculateNeighberhood(self,array,mean_coords):
40        """ Calculates number of elements for given sample and set ups the center of this sample
41        to the center of mass (calculated by mean of every coordinate)
42        Args:
43            array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
44            mean_coords ([float,float,float]): default coordinates that are the
45                middle of the sample (used when number of voxels in sample is equal to 0)
46
47        Returns:
48            weight [int]: number of voxels in a sample
49            coordinates [float,float,float]: center of mass for a sample
50        """
51        weight = len(array)
52        if weight > 0:
53            point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
54            return weight, point
55        else:
56            return 0, mean_coords
57
58
59    def calculateDistPoints(self,point1, point2):
60        """ Returns euclidean distance between two points
61        Args (distribution):
62            point1 ([float,float,float]) - coordinates of first point
63            point2 ([float,float,float]) - coordinates of second point
64        Args (frequency):
65            point1 (float) - value of the first sample
66            point2 (float) - value of the second sample
67
68        Returns:
69            [float]: euclidean distance
70        """
71        if self.frequency:
72            return abs(point1-point2)
73        else:
74            return np.sqrt(np.sum(np.square(point1-point2)))
75
76
77    def calculateDistanceMatrix(self,array1, array2):
78        """
79        Args:
80            array1 ([type]): array of size n with points representing the first Model
81            array2 ([type]): array of size n with points representing the second Model
82
83        Returns:
84            np.array(np.array(,dtype=float)): distance matrix n*n
85        """
86        n = len(array1)
87        distMatrix = np.zeros((n,n))
88        for i in range(n):
89            for j in range(n):
90                distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
91        return np.array(distMatrix)
92
93
94    def reduceEmptySignatures_Frequency(self,s1,s2):
95        """Removes samples from signatures if corresponding samples for both models have weight 0.
96        Args:
97            s1 (np.array(,dtype=np.float64)): values of samples
98            s2 (np.array(,dtype=np.float64)): values of samples
99
100        Returns:
101            s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
102            s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
103        """
104        lens = len(s1)
105        indices = []
106        for i in range(lens):
107            if s1[i]==0 and s2[i]==0:
108                    indices.append(i)
109
110        return np.delete(s1, indices), np.delete(s2, indices)
111
112
113    def reduceEmptySignatures_Density(self,s1,s2):
114        """Removes samples from signatures if corresponding samples for both models have weight 0.
115        Args:
116            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
117            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
118
119        Returns:
120            s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
121            s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
122        """
123        lens = len(s1[0])
124        indices = []
125        for i in range(lens):
126            if s1[1][i]==0 and s2[1][i]==0:
127                indices.append(i)
128
129        s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
130        s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
131        return s1, s2
132
133
134    def getSignatures(self,array,edges3,steps3):
135        """Generates signature for array representing the Model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
136
137        Args:
138            array (np.array(np.array(,dtype=float))): array with voxels representing the Model
139            edges3 ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
140            steps3 ([float,float,float]): [size of interval for x axis, size of interval for y axis, size of interval for y axis]
141
142        Returns (distribution):
143           signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
144        Returns (frequency):
145           signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
146        """
147        edges_x,edges_y,edges_z = edges3
148        step_x,step_y,step_z=steps3
149        feature_array = []
150        weight_array = []
151        step_x_half = step_x/2
152        step_y_half = step_y/2
153        step_z_half = step_z/2
154        for x in range(len(edges_x[:-1])):
155            for y in range(len(edges_y[:-1])) :
156                for z in range(len(edges_z[:-1])):
157                    rows=np.where((array[:,0]> edges_x[x]) &
158                                  (array[:,0]<= edges_x[x+1]) &
159                                  (array[:,1]> edges_y[y]) &
160                                  (array[:,1]<= edges_y[y+1]) &
161                                  (array[:,2]> edges_z[z]) &
162                                  (array[:,2]<= edges_z[z+1]))
163                    if self.frequency:
164                        feature_array.append(len(array[rows]))
165                    else:
166                        weight, point = self.calculateNeighberhood(array[rows],[edges_x[x]+step_x_half,edges_y[y]+step_y_half,edges_z[z]+step_z_half])
167                        feature_array.append(point)
168                        weight_array.append(weight)
169
170        if self.frequency:
171            samples = np.array(feature_array,dtype=np.float64)
172            return abs(np.fft.fft(samples))
173        else:
174            return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
175
176
177    def getSignaturesForPair(self,array1,array2):
178        """Generates signatures for given pair of models represented by array of voxels.
179        We calculate space for given models by taking the extremas for each axis and dividing the space by the resolution.
180        This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight which equals to the number of points.
181       
182        Args:
183            array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
184            array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
185
186        Returns:
187            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
188            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
189        """
190
191        min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
192        max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
193        min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
194        max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
195        min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
196        max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
197
198        # We request self.resolution+1 samples since we need self.resolution intervals
199        edges_x,step_x = np.linspace(min_x,max_x,self.resolution+1,retstep=True)
200        edges_y,step_y = np.linspace(min_y,max_y,self.resolution+1,retstep=True)
201        edges_z,step_z = np.linspace(min_z,max_z,self.resolution+1,retstep=True)
202       
203        for edges in (edges_x, edges_y, edges_z):  # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
204            edges[0] -= self.EPSILON
205
206        edges3 = (edges_x,edges_y,edges_z)
207        steps3 = (step_x,step_y,step_z)
208       
209        s1 = self.getSignatures(array1,edges3,steps3)
210        s2 = self.getSignatures(array2,edges3,steps3)   
211       
212        return s1,s2
213
214
215    def getVoxels(self,geno):
216        """Generates voxels for genotype using frams.ModelGeometry
217
218        Args:
219            geno (string): representation of Model in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
220
221        Returns:
222            np.array([np.array(,dtype=float)]: list of voxels representing the Model.
223        """
224        model = self.frams.Model.newFromString(geno)
225        align(model, self.fixedZaxis)
226        model_geometry = self.frams.ModelGeometry.forModel(model)
227
228        model_geometry.geom_density = self.density
229        voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
230        return voxels
231
232
233    def calculateDissimforVoxels(self, voxels1, voxels2):
234        """Calculates EMD for pair of voxels representing models.
235        Args:
236            voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
237            voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
238
239        Returns:
240            float: dissim for pair of list of voxels
241        """
242        numvox1 = len(voxels1)
243        numvox2 = len(voxels2)   
244
245        s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
246
247        reduce_fun = self.reduceEmptySignatures_Frequency if self.frequency else self.reduceEmptySignatures_Density
248        if self.reduce_empty:
249            s1, s2 = reduce_fun(s1,s2)
250
251            if not self.frequency:
252                if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
253                    print("Voxel reduction didn't work properly")
254                    print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
255                    print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
256                    raise RuntimeError("Voxel reduction error!")
257       
258        if self.metric == 'l1':
259            if self.frequency:
260                out = np.linalg.norm((s1-s2), ord=1)
261            else:
262                out = np.linalg.norm((s1[1]-s2[1]), ord=1)
263
264        elif self.metric == 'l2':
265            if self.frequency:
266                out = np.linalg.norm((s1-s2))
267            else:
268                out = np.linalg.norm((s1[1]-s2[1]))
269
270        elif self.metric == 'emd':
271            if self.frequency:
272                num_points = len(s1)
273                dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
274            else:
275                dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
276
277            self.libm.fedisableexcept(0x04)  # change default flag value - don't cause exceptions when dividing by 0 (pyemd does it)
278
279            if self.frequency:
280                out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
281            else:
282                out = emd(s1[1],s2[1],dist_matrix)
283
284            self.libm.feclearexcept(0x04) # restoring default flag values...
285            self.libm.feenableexcept(0x04)
286
287        else:
288            raise ValueError("Wrong metric '%s'"%self.metric)
289
290        return out
291
292
293    def calculateDissimforGeno(self, geno1, geno2):
294        """Calculates EMD for a pair of genotypes.
295        Args:
296            geno1 (string): representation of model1 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
297            geno2 (string): representation of model2 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
298
299        Returns:
300            float: dissim for pair of strings representing models.
301        """     
302
303        voxels1 = self.getVoxels(geno1)
304        voxels2 = self.getVoxels(geno2)
305
306        out = self.calculateDissimforVoxels(voxels1, voxels2)
307
308        if self.verbose == True:
309            print("Intervals: ", self.resolution)
310            print("Geno1:\n",geno1)
311            print("Geno2:\n",geno2)
312            print("EMD:\n",out)
313
314        return out
315
316
317    def getDissimilarityMatrix(self,listOfGeno):
318        """
319        Args:
320            listOfGeno ([string]): list of strings representing genotypes in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
321
322        Returns:
323            np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
324        """
325        numOfGeno = len(listOfGeno)
326        dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
327        listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
328        for i in range(numOfGeno):
329            for j in range(numOfGeno):
330                dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
331        return dissimMatrix
Note: See TracBrowser for help on using the repository browser.