source: framspy/dissimilarity/density-distribution.py @ 1223

Last change on this file since 1223 was 1223, checked in by oriona, 2 years ago

Description of the measures added.

File size: 15.3 KB
Line 
1import numpy as np
2from pyemd import emd
3from ctypes import cdll
4from ctypes.util import find_library
5from alignmodel import align
6
7class DensityDistribution:
8    """ Dissimilarity measures based on the distribution. The structure's bounding box is divided into equal-sized cuboids, the number of which depends on the 'steps' parameter. Then the structure's surface is covered with points, the density of the surface's sampling depends on the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False) a signature is computed as centroids and a number of samples in each cuboid. In the 'frequency' version FFT is computed from the vector containing the number of samples in each cuboid. The distance between signatures can be computed using EMD, L1, or L2 norms.
9    """
10    libm = cdll.LoadLibrary(find_library('m'))
11    EPSILON = 0.0001
12    def __init__(self, frams_module=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
13        """ __init__
14        Args:
15            density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
16            steps (int, optional): How many steps are used for sampling the space of voxels,
17                The higher the value, the more accurate the sampling and the longer the calculations. Defaults to 3.
18            reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
19            frequency (bool, optional): If we should use frequency distribution. Defaults to False.
20            metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
21            fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
22            verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.           
23        """
24        if frams_module is None:
25            raise ValueError('Framsticks module not provided!')
26        self.frams = frams_module
27
28        self.density = density
29        self.steps = steps
30        self.verbose = verbose
31        self.reduce = reduce
32        self.frequency = frequency
33        self.metric = metric
34        self.fixedZaxis = fixedZaxis
35
36
37    def calculateNeighberhood(self,array,mean_coords):
38        """ Calculates number of elements for given sample and set ups the center of this sample
39        to the center of mass (calculated by mean of every coordinate)
40        Args:
41            array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
42            mean_coords ([float,float,float]): default coordinates that are the
43                middle of the sample (used when number of voxels in sample is equal to 0)
44
45        Returns:
46            weight [int]: number of voxels in a sample
47            coordinates [float,float,float]: center of mass for a sample
48        """
49        weight = len(array)
50        if weight > 0:
51            point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
52            return weight, point
53        else:
54            return 0, mean_coords
55
56
57    def calculateDistPoints(self,point1, point2):
58        """ Returns euclidean distance between two points
59        Args (distribution):
60            point1 ([float,float,float]) - coordinates of first point
61            point2 ([float,float,float]) - coordinates of second point
62        Args (frequency):
63            point1 (float) - value of the first sample
64            point2 (float) - value of the second sample
65
66        Returns:
67            [float]: euclidean distance
68        """
69        if self.frequency:
70            return abs(point1-point2)
71        else:
72            return np.sqrt(np.sum(np.square(point1-point2)))
73
74
75    def calculateDistanceMatrix(self,array1, array2):
76        """
77        Args:
78            array1 ([type]): array of size n with points representing firsts model
79            array2 ([type]): array of size n with points representing second model
80
81        Returns:
82            np.array(np.array(,dtype=float)): distance matrix n x n
83        """
84        n = len(array1)
85        distMatrix = np.zeros((n,n))
86        for i in range(n):
87            for j in range(n):
88                distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
89        return np.array(distMatrix)
90
91
92    def reduceSignaturesFreq(self,s1,s2):
93        """Removes samples from signatures if corresponding samples for both models have weight 0.
94        Args:
95            s1 (np.array(,dtype=np.float64)): values of samples
96            s2 (np.array(,dtype=np.float64)): values of samples
97
98        Returns:
99            s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
100            s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
101        """
102        lens = len(s1)
103        indices = []
104        for i in range(lens):
105            if s1[i]==0 and s2[i]==0:
106                    indices.append(i)
107
108        return np.delete(s1, indices), np.delete(s2, indices)
109
110
111    def reduceSignaturesDens(self,s1,s2):
112        """Removes samples from signatures if corresponding samples for both models have weight 0.
113        Args:
114            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
115            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
116
117        Returns:
118            s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
119            s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
120        """
121        lens = len(s1[0])
122        indices = []
123        for i in range(lens):
124            if s1[1][i]==0 and s2[1][i]==0:
125                indices.append(i)
126
127        s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
128        s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
129        return s1, s2
130
131
132    def getSignatures(self,array,steps_all,step_all):
133        """Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
134
135        Args:
136            array (np.array(np.array(,dtype=float))): array with voxels representing model
137            steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
138            step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
139
140        Returns (distribution):
141           signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
142        Returns (frequency):
143           signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
144        """
145        x_steps,y_steps,z_steps = steps_all
146        x_step,y_step,z_step=step_all
147        feature_array = []
148        weight_array = []
149        step_half_x = x_step/2
150        step_half_y = y_step/2
151        step_half_z = z_step/2
152        for x in range(len(x_steps[:-1])):
153            for y in range(len(y_steps[:-1])) :
154                for z in range(len(z_steps[:-1])):
155                    rows=np.where((array[:,0]> x_steps[x]) &
156                                  (array[:,0]<= x_steps[x+1]) &
157                                  (array[:,1]> y_steps[y]) &
158                                  (array[:,1]<= y_steps[y+1]) &
159                                  (array[:,2]> z_steps[z]) &
160                                  (array[:,2]<= z_steps[z+1]))
161                    if self.frequency:
162                        feature_array.append(len(array[rows]))
163                    else:
164                        weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
165                        feature_array.append(point)
166                        weight_array.append(weight)
167
168        if self.frequency:
169            samples = np.array(feature_array,dtype=np.float64)
170            return abs(np.fft.fft(samples))
171        else:
172            return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
173
174
175    def getSignaturesForPair(self,array1,array2):
176        """Generates signatures for given pair of models represented by array of voxels.
177        We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
178        This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
179        which equals to the number of points.
180       
181        Args:
182            array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
183            array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
184            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
185               
186        Returns:
187            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
188            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
189        """
190
191        min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
192        max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
193        min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
194        max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
195        min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
196        max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
197
198        # We request self.steps+1 samples since we need self.steps intervals
199        x_steps,x_step = np.linspace(min_x,max_x,self.steps+1,retstep=True)
200        y_steps,y_step = np.linspace(min_y,max_y,self.steps+1,retstep=True)
201        z_steps,z_step = np.linspace(min_z,max_z,self.steps+1,retstep=True)
202       
203        for intervals in (x_steps, y_steps, z_steps):  # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
204            intervals[0] -= self.EPSILON
205
206        steps_all = (x_steps,y_steps,z_steps)
207        step_all = (x_step,y_step,z_step)
208       
209        s1 = self.getSignatures(array1,steps_all,step_all)
210        s2 = self.getSignatures(array2,steps_all,step_all)   
211       
212        return s1,s2
213
214
215    def getVoxels(self,geno):
216        """Generates voxels for genotype using frams.ModelGeometry
217
218        Args:
219            geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
220
221        Returns:
222            np.array([np.array(,dtype=float)]: list of voxels representing model.
223        """
224        model = self.frams.Model.newFromString(geno)
225        align(model, self.fixedZaxis)
226        model_geometry = self.frams.ModelGeometry.forModel(model)
227
228        model_geometry.geom_density = self.density
229        voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
230        return voxels
231
232
233    def calculateDissimforVoxels(self, voxels1, voxels2):
234        """Calculates EMD for pair of voxels representing models.
235        Args:
236            voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
237            voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
238            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
239
240        Returns:
241            float: dissim for pair of list of voxels
242        """
243        numvox1 = len(voxels1)
244        numvox2 = len(voxels2)   
245
246        s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
247
248        if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
249            print("Bad signature!")
250            print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
251            print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
252            raise ValueError("Bad signature!")
253
254        reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
255        if self.reduce:
256            s1, s2 = reduce_fun(s1,s2)
257
258            if not self.frequency:
259                if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
260                    print("Voxel reduction didnt work properly")
261                    print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
262                    print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
263       
264        if self.metric == 'l1':
265            if self.frequency:
266                out = np.linalg.norm((s1-s2), ord=1)
267            else:
268                out = np.linalg.norm((s1[1]-s2[1]), ord=1)
269
270        elif self.metric == 'l2':
271            if self.frequency:
272                out = np.linalg.norm((s1-s2))
273            else:
274                out = np.linalg.norm((s1[1]-s2[1]))
275
276        elif self.metric == 'emd':
277            if self.frequency:
278                num_points = len(s1)
279                dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
280            else:
281                dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
282
283            self.libm.fedisableexcept(0x04)  # allowing for operation divide by 0 because pyemd requiers it.
284
285            if self.frequency:
286                out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
287            else:
288                out = emd(s1[1],s2[1],dist_matrix)
289
290            self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
291            self.libm.feenableexcept(0x04)
292
293        else:
294            raise ValueError("Wrong metric '%s'"%self.metric)
295
296        return out
297
298
299    def calculateDissimforGeno(self, geno1, geno2):
300        """Calculates EMD for a pair of genotypes.
301        Args:
302            geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
303            geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
304            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
305
306        Returns:
307            float: dissim for pair of strings representing models.
308        """     
309
310        voxels1 = self.getVoxels(geno1)
311        voxels2 = self.getVoxels(geno2)
312
313        out = self.calculateDissimforVoxels(voxels1, voxels2)
314
315        if self.verbose == True:
316            print("Steps: ", self.steps)
317            print("Geno1:\n",geno1)
318            print("Geno2:\n",geno2)
319            print("EMD:\n",out)
320
321        return out
322
323
324    def getDissimilarityMatrix(self,listOfGeno):
325        """
326        Args:
327            listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
328
329        Returns:
330            np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
331        """
332        numOfGeno = len(listOfGeno)
333        dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
334        listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
335        for i in range(numOfGeno):
336            for j in range(numOfGeno):
337                dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
338        return dissimMatrix
Note: See TracBrowser for help on using the repository browser.