Context Navigation

density_distribution.py @ 1329

Last change on this file since 1329 was 1329, checked in by Maciej Komosinski, 3 days ago
Cosmetic
File size: 16.3 KB

Line
1	import numpy as np
2	from pyemd import emd
3	from ctypes import cdll
4	from ctypes.util import find_library
5	from .alignmodel import align
6
7	class DensityDistribution:
8	"""Two dissimilarity measures based on the spatial distribution of two Models. The Model bounding box is divided into a grid of equally-sized cuboids, the number of which is the 'resolution' parameter cubed. Then the Model surface is covered with points; the density of the surface sampling is determined by the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False), the signature of each cuboid is the centroid and the number of samples. In the 'frequency'=True version, FFT is computed from the vector containing the number of samples in each cuboid. The final result of the dissimilarity measure is the distance between the signatures and it can be computed using EMD, L1, or L2 norms (the 'metric' parameter).
9	"""
10
11	libm = find_library('m') # for disabling/enabling floating point exceptions (division by zero occurs in the pyemd library)
12	if libm is not None: # libm.so (the mathematical library) is a part of Linux ecosystem - always present
13	libm = cdll.LoadLibrary(libm)
14	else:
15	print('\nWarning: The "m" library not found - floating point exceptions in pyemd may occur...\n') # but on Windows, pyemd does not seem to cause floating point exceptions
16	EPSILON = 0.0001
17
18	def __init__(self, frams_module=None, density = 10, resolution = 8, reduce_empty=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
19	""" __init__
20	Args:
21	density (int, optional): density of samplings for frams.ModelGeometry. Defaults to 10.
22	resolution (int, optional): How many intervals are used in each dimension to partition surface samples of Models in the 3D space.
23	The higher the value, the more detailed the comparison and the longer the calculations. Defaults to 3.
24	reduce_empty (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
25	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
26	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
27	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
28	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
29	"""
30	if frams_module is None:
31	raise ValueError('Framsticks module not provided!')
32	self.frams = frams_module
33
34	self.density = density
35	self.resolution = resolution
36	self.verbose = verbose
37	self.reduce_empty = reduce_empty
38	self.frequency = frequency
39	self.metric = metric
40	self.fixedZaxis = fixedZaxis
41
42
43	def calculateNeighborhood(self,array,mean_coords):
44	""" Calculates number of elements for given sample and set ups the center of this sample
45	to the center of mass (calculated by mean of every coordinate).
46	Args:
47	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
48	mean_coords ([float,float,float]): default coordinates that are the
49	middle of the sample (used when number of voxels in sample is equal to 0)
50
51	Returns:
52	weight [int]: number of voxels in a sample
53	coordinates [float,float,float]: center of mass for a sample
54	"""
55	weight = len(array)
56	if weight > 0:
57	point = np.mean(array, axis=0) # equivalent to [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
58	return weight, point
59	else:
60	return 0, mean_coords
61
62
63	def calculateDistPoints(self, point1, point2):
64	""" Returns Euclidean distance between two samples (1D for frequency) or points (3D for distribution).
65	Args (frequency):
66	point1 (float) - value of the first sample
67	point2 (float) - value of the second sample
68	Args (distribution):
69	point1 ([float,float,float]) - coordinates of the first point
70	point2 ([float,float,float]) - coordinates of the second point
71
72	Returns:
73	[float]: Euclidean distance
74	"""
75	if self.frequency:
76	return abs(point1-point2)
77	else:
78	return np.linalg.norm(point1-point2, ord=2)
79
80
81	def calculateDistanceMatrix(self, array1, array2):
82	"""
83	Args:
84	array1 ([type]): array of size n with points representing the first Model
85	array2 ([type]): array of size n with points representing the second Model
86
87	Returns:
88	np.array(np.array(,dtype=float)): distance matrix n*n
89	"""
90	n = len(array1)
91	distMatrix = np.zeros((n,n))
92	for i in range(n):
93	for j in range(n):
94	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
95	return np.array(distMatrix)
96
97
98	def reduceEmptySignatures_Frequency(self,s1,s2):
99	"""Removes samples from signatures if corresponding samples for both models have weight 0.
100	Args:
101	s1 (np.array(,dtype=np.float64)): values of samples
102	s2 (np.array(,dtype=np.float64)): values of samples
103
104	Returns:
105	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
106	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
107	"""
108	lens = len(s1)
109	indices = [i for i in range(lens) if s1[i]==0 and s2[i]==0]
110
111	return np.delete(s1, indices), np.delete(s2, indices)
112
113
114	def reduceEmptySignatures_Density(self,s1,s2):
115	"""Removes samples from signatures if corresponding samples for both models have weight 0.
116	Args:
117	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
118	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
119
120	Returns:
121	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
122	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
123	"""
124	lens = len(s1[0])
125	indices = [i for i in range(lens) if s1[1][i]==0 and s2[1][i]==0]
126
127	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
128	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
129	return s1, s2
130
131
132	def getSignatures(self,array,edges3,steps3):
133	"""Generates signature for array representing the Model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
134
135	Args:
136	array (np.array(np.array(,dtype=float))): array with voxels representing the Model
137	edges3 ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
138	steps3 ([float,float,float]): [size of interval for x axis, size of interval for y axis, size of interval for y axis]
139
140	Returns (frequency):
141	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
142	Returns (distribution):
143	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
144	"""
145	edges_x,edges_y,edges_z = edges3
146	step_x,step_y,step_z=steps3
147	feature_array = []
148	weight_array = []
149	step_x_half = step_x/2
150	step_y_half = step_y/2
151	step_z_half = step_z/2
152	for x in range(len(edges_x[:-1])):
153	for y in range(len(edges_y[:-1])) :
154	for z in range(len(edges_z[:-1])):
155	rows=np.where((array[:,0]> edges_x[x]) &
156	(array[:,0]<= edges_x[x+1]) &
157	(array[:,1]> edges_y[y]) &
158	(array[:,1]<= edges_y[y+1]) &
159	(array[:,2]> edges_z[z]) &
160	(array[:,2]<= edges_z[z+1]))
161	if self.frequency:
162	feature_array.append(len(array[rows]))
163	else:
164	weight, point = self.calculateNeighborhood(array[rows],[edges_x[x]+step_x_half,edges_y[y]+step_y_half,edges_z[z]+step_z_half])
165	feature_array.append(point)
166	weight_array.append(weight)
167
168	if self.frequency:
169	samples = np.array(feature_array,dtype=np.float64)
170	return samples
171	else:
172	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
173
174
175	def getSignaturesForPair(self,array1,array2):
176	"""Generates signatures for a given pair of models represented by array of voxels.
177	We calculate space for given models by taking the extremas for each axis and dividing the space by the resolution.
178	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight which equals to the number of points.
179
180	Args:
181	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
182	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
183
184	Returns:
185	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
186	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
187	"""
188
189	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
190	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
191	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
192	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
193	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
194	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
195
196	# We request self.resolution+1 samples since we need self.resolution intervals
197	edges_x,step_x = np.linspace(min_x,max_x,self.resolution+1,retstep=True)
198	edges_y,step_y = np.linspace(min_y,max_y,self.resolution+1,retstep=True)
199	edges_z,step_z = np.linspace(min_z,max_z,self.resolution+1,retstep=True)
200
201	for edges in (edges_x, edges_y, edges_z): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
202	edges[0] -= self.EPSILON
203
204	edges3 = (edges_x,edges_y,edges_z)
205	steps3 = (step_x,step_y,step_z)
206
207	s1 = self.getSignatures(array1,edges3,steps3)
208	s2 = self.getSignatures(array2,edges3,steps3)
209
210	return s1,s2
211
212
213	def getVoxels(self,geno):
214	"""Generates voxels for genotype using frams.ModelGeometry.
215
216	Args:
217	geno (string): representation of Model in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
218
219	Returns:
220	np.array([np.array(,dtype=float)]: list of voxels representing the Model.
221	"""
222	model = self.frams.Model.newFromString(geno)
223	align(model, self.fixedZaxis)
224	model_geometry = self.frams.ModelGeometry.forModel(model)
225
226	model_geometry.geom_density = self.density
227	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
228	return voxels
229
230
231	def normalize(self, signature):
232	"""Normalizes the signature values by dividing each element by the sum of all elements.
233	Args:
234	signature np.array(,dtype=float): A one-dimensional array of signature values.
235
236	Returns:
237	np.array(,dtype=float): A one-dimensional array of normalized signature values.
238	"""
239	total = np.sum(signature)
240	return np.divide(signature, total)
241
242
243	def calculateDissimforVoxels(self, voxels1, voxels2):
244	"""Calculates EMD for a pair of voxels representing models.
245	Args:
246	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
247	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
248
249	Returns:
250	float: dissim for a pair of list of voxels
251	"""
252	numvox1 = len(voxels1)
253	numvox2 = len(voxels2)
254
255	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
256
257	reduce_fun = self.reduceEmptySignatures_Frequency if self.frequency else self.reduceEmptySignatures_Density
258	if self.reduce_empty:
259	s1, s2 = reduce_fun(s1,s2)
260
261	if not self.frequency:
262	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
263	print("Voxel reduction didn't work properly")
264	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
265	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
266	raise RuntimeError("Voxel reduction error!")
267
268	if self.frequency:
269	s1 = abs(np.fft.fft(s1))
270	s2 = abs(np.fft.fft(s2))
271
272	if self.metric == 'l1':
273	if self.frequency:
274	out = np.linalg.norm(s1-s2, ord=1)
275	else:
276	out = np.linalg.norm(s1[1]-s2[1], ord=1)
277
278	elif self.metric == 'l2':
279	if self.frequency:
280	out = np.linalg.norm(s1-s2)
281	else:
282	out = np.linalg.norm(s1[1]-s2[1])
283
284	elif self.metric == 'emd':
285	if self.frequency:
286	num_points = np.linspace(0, 1, len(s1), True)
287	dist_matrix = self.calculateDistanceMatrix(num_points,num_points)
288	else:
289	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
290
291	if self.libm is not None:
292	self.libm.fedisableexcept(0x04) # change default flag value - don't cause "Floating point exception" when dividing by 0 (pyemd does that, for example when comparing two identical histograms, i.e., two identical signatures, for example from two identical phenotypes)
293
294	if self.frequency:
295	out = emd(self.normalize(s1),self.normalize(s2),np.array(dist_matrix,dtype=np.float64))
296	else:
297	out = emd(self.normalize(s1[1]),self.normalize(s2[1]),dist_matrix)
298
299	if self.libm is not None:
300	self.libm.feclearexcept(0x04) # restoring default flag values...
301	self.libm.feenableexcept(0x04)
302
303	else:
304	raise ValueError("Wrong metric '%s'"%self.metric)
305
306	return out
307
308
309	def calculateDissimforGeno(self, geno1, geno2):
310	"""Calculates EMD for a pair of genotypes.
311	Args:
312	geno1 (string): representation of model1 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
313	geno2 (string): representation of model2 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
314
315	Returns:
316	float: dissim for a pair of strings representing models.
317	"""
318
319	voxels1 = self.getVoxels(geno1)
320	voxels2 = self.getVoxels(geno2)
321
322	out = self.calculateDissimforVoxels(voxels1, voxels2)
323
324	if self.verbose == True:
325	print("Intervals: ", self.resolution)
326	print("Geno1:\n",geno1)
327	print("Geno2:\n",geno2)
328	print("EMD:\n",out)
329
330	return out
331
332
333	def getDissimilarityMatrix(self,listOfGeno):
334	"""
335	Args:
336	listOfGeno ([string]): list of strings representing genotypes in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
337
338	Returns:
339	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
340	"""
341	numOfGeno = len(listOfGeno)
342	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
343	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
344	for i in range(numOfGeno):
345	for j in range(numOfGeno): # could only calculate a triangle if the definition of similarity and its calculation guarantees symmetry
346	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
347	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: framspy/dissimilarity/density_distribution.py @ 1329

Download in other formats: