source: framspy/dissimilarity/density_distribution.py @ 1302

Last change on this file since 1302 was 1295, checked in by Maciej Komosinski, 9 months ago

Updated for Python 3.9+

File size: 15.9 KB
Line 
1import numpy as np
2from pyemd import emd
3from ctypes import cdll
4from ctypes.util import find_library
5from .alignmodel import align
6
7class DensityDistribution:
8    """Two dissimilarity measures based on the spatial distribution of two Models. The Model bounding box is divided into a grid of equally-sized cuboids, the number of which is the 'resolution' parameter cubed. Then the Model surface is covered with points; the density of the surface sampling is determined by the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False), a signature of each cuboid is the centroid and the number of samples. In the 'frequency'=True version, FFT is computed from the vector containing the number of samples in each cuboid. The final result of the dissimilarity measure is the distance between the signatures and it can be computed using EMD, L1, or L2 norms (the 'metric' parameter).
9    """
10
11    OLD_FPEX = False # update floating point exception handling/pyemd problems (if still needed) for new python versions
12    if OLD_FPEX:
13        libm = cdll.LoadLibrary(find_library('m')) # for disabling/enabling floating point exceptions (division by zero occurs in the EMD library)
14    EPSILON = 0.0001
15   
16    def __init__(self, frams_module=None, density = 10, resolution = 8, reduce_empty=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
17        """ __init__
18        Args:
19            density (int, optional): density of samplings for frams.ModelGeometry. Defaults to 10.
20            resolution (int, optional): How many intervals are used in each dimension to partition surface samples of Models in the 3D space.
21                The higher the value, the more detailed the comparison and the longer the calculations. Defaults to 3.
22            reduce_empty (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
23            frequency (bool, optional): If we should use frequency distribution. Defaults to False.
24            metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
25            fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
26            verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.           
27        """
28        if frams_module is None:
29            raise ValueError('Framsticks module not provided!')
30        self.frams = frams_module
31
32        self.density = density
33        self.resolution = resolution
34        self.verbose = verbose
35        self.reduce_empty = reduce_empty
36        self.frequency = frequency
37        self.metric = metric
38        self.fixedZaxis = fixedZaxis
39
40
41    def calculateNeighberhood(self,array,mean_coords):
42        """ Calculates number of elements for given sample and set ups the center of this sample
43        to the center of mass (calculated by mean of every coordinate)
44        Args:
45            array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
46            mean_coords ([float,float,float]): default coordinates that are the
47                middle of the sample (used when number of voxels in sample is equal to 0)
48
49        Returns:
50            weight [int]: number of voxels in a sample
51            coordinates [float,float,float]: center of mass for a sample
52        """
53        weight = len(array)
54        if weight > 0:
55            point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
56            return weight, point
57        else:
58            return 0, mean_coords
59
60
61    def calculateDistPoints(self,point1, point2):
62        """ Returns euclidean distance between two points
63        Args (distribution):
64            point1 ([float,float,float]) - coordinates of first point
65            point2 ([float,float,float]) - coordinates of second point
66        Args (frequency):
67            point1 (float) - value of the first sample
68            point2 (float) - value of the second sample
69
70        Returns:
71            [float]: euclidean distance
72        """
73        if self.frequency:
74            return abs(point1-point2)
75        else:
76            return np.sqrt(np.sum(np.square(point1-point2)))
77
78
79    def calculateDistanceMatrix(self,array1, array2):
80        """
81        Args:
82            array1 ([type]): array of size n with points representing the first Model
83            array2 ([type]): array of size n with points representing the second Model
84
85        Returns:
86            np.array(np.array(,dtype=float)): distance matrix n*n
87        """
88        n = len(array1)
89        distMatrix = np.zeros((n,n))
90        for i in range(n):
91            for j in range(n):
92                distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
93        return np.array(distMatrix)
94
95
96    def reduceEmptySignatures_Frequency(self,s1,s2):
97        """Removes samples from signatures if corresponding samples for both models have weight 0.
98        Args:
99            s1 (np.array(,dtype=np.float64)): values of samples
100            s2 (np.array(,dtype=np.float64)): values of samples
101
102        Returns:
103            s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
104            s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
105        """
106        lens = len(s1)
107        indices = []
108        for i in range(lens):
109            if s1[i]==0 and s2[i]==0:
110                    indices.append(i)
111
112        return np.delete(s1, indices), np.delete(s2, indices)
113
114
115    def reduceEmptySignatures_Density(self,s1,s2):
116        """Removes samples from signatures if corresponding samples for both models have weight 0.
117        Args:
118            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
119            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
120
121        Returns:
122            s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
123            s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
124        """
125        lens = len(s1[0])
126        indices = []
127        for i in range(lens):
128            if s1[1][i]==0 and s2[1][i]==0:
129                indices.append(i)
130
131        s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
132        s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
133        return s1, s2
134
135
136    def getSignatures(self,array,edges3,steps3):
137        """Generates signature for array representing the Model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
138
139        Args:
140            array (np.array(np.array(,dtype=float))): array with voxels representing the Model
141            edges3 ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
142            steps3 ([float,float,float]): [size of interval for x axis, size of interval for y axis, size of interval for y axis]
143
144        Returns (distribution):
145           signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
146        Returns (frequency):
147           signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
148        """
149        edges_x,edges_y,edges_z = edges3
150        step_x,step_y,step_z=steps3
151        feature_array = []
152        weight_array = []
153        step_x_half = step_x/2
154        step_y_half = step_y/2
155        step_z_half = step_z/2
156        for x in range(len(edges_x[:-1])):
157            for y in range(len(edges_y[:-1])) :
158                for z in range(len(edges_z[:-1])):
159                    rows=np.where((array[:,0]> edges_x[x]) &
160                                  (array[:,0]<= edges_x[x+1]) &
161                                  (array[:,1]> edges_y[y]) &
162                                  (array[:,1]<= edges_y[y+1]) &
163                                  (array[:,2]> edges_z[z]) &
164                                  (array[:,2]<= edges_z[z+1]))
165                    if self.frequency:
166                        feature_array.append(len(array[rows]))
167                    else:
168                        weight, point = self.calculateNeighberhood(array[rows],[edges_x[x]+step_x_half,edges_y[y]+step_y_half,edges_z[z]+step_z_half])
169                        feature_array.append(point)
170                        weight_array.append(weight)
171
172        if self.frequency:
173            samples = np.array(feature_array,dtype=np.float64)
174            return samples
175        else:
176            return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
177
178
179    def getSignaturesForPair(self,array1,array2):
180        """Generates signatures for given pair of models represented by array of voxels.
181        We calculate space for given models by taking the extremas for each axis and dividing the space by the resolution.
182        This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight which equals to the number of points.
183       
184        Args:
185            array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
186            array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
187
188        Returns:
189            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
190            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
191        """
192
193        min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
194        max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
195        min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
196        max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
197        min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
198        max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
199
200        # We request self.resolution+1 samples since we need self.resolution intervals
201        edges_x,step_x = np.linspace(min_x,max_x,self.resolution+1,retstep=True)
202        edges_y,step_y = np.linspace(min_y,max_y,self.resolution+1,retstep=True)
203        edges_z,step_z = np.linspace(min_z,max_z,self.resolution+1,retstep=True)
204       
205        for edges in (edges_x, edges_y, edges_z):  # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
206            edges[0] -= self.EPSILON
207
208        edges3 = (edges_x,edges_y,edges_z)
209        steps3 = (step_x,step_y,step_z)
210       
211        s1 = self.getSignatures(array1,edges3,steps3)
212        s2 = self.getSignatures(array2,edges3,steps3)   
213       
214        return s1,s2
215
216
217    def getVoxels(self,geno):
218        """Generates voxels for genotype using frams.ModelGeometry
219
220        Args:
221            geno (string): representation of Model in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
222
223        Returns:
224            np.array([np.array(,dtype=float)]: list of voxels representing the Model.
225        """
226        model = self.frams.Model.newFromString(geno)
227        align(model, self.fixedZaxis)
228        model_geometry = self.frams.ModelGeometry.forModel(model)
229
230        model_geometry.geom_density = self.density
231        voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
232        return voxels
233
234
235    def normalize(self, signature):
236        """Normalizes the signature values by dividing each element by the sum of all elements.
237        Args:
238            signature np.array(,dtype=float): A one-dimensional array of signature values.
239
240        Returns:
241            np.array(,dtype=float): A one-dimensional array of normalized signature values.
242        """
243        total = np.sum(signature)
244        return np.divide(signature, total)
245
246
247    def calculateDissimforVoxels(self, voxels1, voxels2):
248        """Calculates EMD for pair of voxels representing models.
249        Args:
250            voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
251            voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
252
253        Returns:
254            float: dissim for pair of list of voxels
255        """
256        numvox1 = len(voxels1)
257        numvox2 = len(voxels2)   
258
259        s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
260
261        reduce_fun = self.reduceEmptySignatures_Frequency if self.frequency else self.reduceEmptySignatures_Density
262        if self.reduce_empty:
263            s1, s2 = reduce_fun(s1,s2)
264
265            if not self.frequency:
266                if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
267                    print("Voxel reduction didn't work properly")
268                    print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
269                    print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
270                    raise RuntimeError("Voxel reduction error!")
271       
272        if self.frequency:
273           s1 = abs(np.fft.fft(s1))
274           s2 = abs(np.fft.fft(s2))
275       
276        if self.metric == 'l1':
277            if self.frequency:
278                out = np.linalg.norm((s1-s2), ord=1)
279            else:
280                out = np.linalg.norm((s1[1]-s2[1]), ord=1)
281
282        elif self.metric == 'l2':
283            if self.frequency:
284                out = np.linalg.norm((s1-s2))
285            else:
286                out = np.linalg.norm((s1[1]-s2[1]))
287
288        elif self.metric == 'emd':
289            if self.frequency:
290                num_points = np.linspace(0, 1, len(s1), True)
291                dist_matrix = self.calculateDistanceMatrix(num_points,num_points)
292            else:
293                dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
294
295            if self.OLD_FPEX:
296                self.libm.fedisableexcept(0x04)  # change default flag value - don't cause exceptions when dividing by 0 (pyemd does it)
297
298            if self.frequency:
299                out = emd(self.normalize(s1),self.normalize(s2),np.array(dist_matrix,dtype=np.float64))
300            else:
301                out = emd(self.normalize(s1[1]),self.normalize(s2[1]),dist_matrix)
302
303            if self.OLD_FPEX:
304                self.libm.feclearexcept(0x04) # restoring default flag values...
305                self.libm.feenableexcept(0x04)
306
307        else:
308            raise ValueError("Wrong metric '%s'"%self.metric)
309
310        return out
311
312
313    def calculateDissimforGeno(self, geno1, geno2):
314        """Calculates EMD for a pair of genotypes.
315        Args:
316            geno1 (string): representation of model1 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
317            geno2 (string): representation of model2 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
318
319        Returns:
320            float: dissim for pair of strings representing models.
321        """     
322
323        voxels1 = self.getVoxels(geno1)
324        voxels2 = self.getVoxels(geno2)
325
326        out = self.calculateDissimforVoxels(voxels1, voxels2)
327
328        if self.verbose == True:
329            print("Intervals: ", self.resolution)
330            print("Geno1:\n",geno1)
331            print("Geno2:\n",geno2)
332            print("EMD:\n",out)
333
334        return out
335
336
337    def getDissimilarityMatrix(self,listOfGeno):
338        """
339        Args:
340            listOfGeno ([string]): list of strings representing genotypes in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
341
342        Returns:
343            np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
344        """
345        numOfGeno = len(listOfGeno)
346        dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
347        listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
348        for i in range(numOfGeno):
349            for j in range(numOfGeno):
350                dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
351        return dissimMatrix
Note: See TracBrowser for help on using the repository browser.