Context Navigation

density-distribution.py @ 1280

Last change on this file since 1280 was 1269, checked in by oriona, 17 months ago
FFT is now calculated after the signature reduction, to increase efficiency.
File size: 15.7 KB

Line
1	import numpy as np
2	from pyemd import emd
3	from ctypes import cdll
4	from ctypes.util import find_library
5	from alignmodel import align
6
7	class DensityDistribution:
8	"""Two dissimilarity measures based on the spatial distribution of two Models. The Model bounding box is divided into a grid of equally-sized cuboids, the number of which is the 'resolution' parameter cubed. Then the Model surface is covered with points; the density of the surface sampling is determined by the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False), a signature of each cuboid is the centroid and the number of samples. In the 'frequency'=True version, FFT is computed from the vector containing the number of samples in each cuboid. The final result of the dissimilarity measure is the distance between the signatures and it can be computed using EMD, L1, or L2 norms (the 'metric' parameter).
9	"""
10
11	libm = cdll.LoadLibrary(find_library('m')) # for disabling/enabling floating point exceptions (division by zero occurs in the EMD library)
12	EPSILON = 0.0001
13
14	def __init__(self, frams_module=None, density = 10, resolution = 8, reduce_empty=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
15	""" __init__
16	Args:
17	density (int, optional): density of samplings for frams.ModelGeometry. Defaults to 10.
18	resolution (int, optional): How many intervals are used in each dimension to partition surface samples of Models in the 3D space.
19	The higher the value, the more detailed the comparison and the longer the calculations. Defaults to 3.
20	reduce_empty (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
21	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
22	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
23	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
24	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
25	"""
26	if frams_module is None:
27	raise ValueError('Framsticks module not provided!')
28	self.frams = frams_module
29
30	self.density = density
31	self.resolution = resolution
32	self.verbose = verbose
33	self.reduce_empty = reduce_empty
34	self.frequency = frequency
35	self.metric = metric
36	self.fixedZaxis = fixedZaxis
37
38
39	def calculateNeighberhood(self,array,mean_coords):
40	""" Calculates number of elements for given sample and set ups the center of this sample
41	to the center of mass (calculated by mean of every coordinate)
42	Args:
43	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
44	mean_coords ([float,float,float]): default coordinates that are the
45	middle of the sample (used when number of voxels in sample is equal to 0)
46
47	Returns:
48	weight [int]: number of voxels in a sample
49	coordinates [float,float,float]: center of mass for a sample
50	"""
51	weight = len(array)
52	if weight > 0:
53	point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
54	return weight, point
55	else:
56	return 0, mean_coords
57
58
59	def calculateDistPoints(self,point1, point2):
60	""" Returns euclidean distance between two points
61	Args (distribution):
62	point1 ([float,float,float]) - coordinates of first point
63	point2 ([float,float,float]) - coordinates of second point
64	Args (frequency):
65	point1 (float) - value of the first sample
66	point2 (float) - value of the second sample
67
68	Returns:
69	[float]: euclidean distance
70	"""
71	if self.frequency:
72	return abs(point1-point2)
73	else:
74	return np.sqrt(np.sum(np.square(point1-point2)))
75
76
77	def calculateDistanceMatrix(self,array1, array2):
78	"""
79	Args:
80	array1 ([type]): array of size n with points representing the first Model
81	array2 ([type]): array of size n with points representing the second Model
82
83	Returns:
84	np.array(np.array(,dtype=float)): distance matrix n*n
85	"""
86	n = len(array1)
87	distMatrix = np.zeros((n,n))
88	for i in range(n):
89	for j in range(n):
90	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
91	return np.array(distMatrix)
92
93
94	def reduceEmptySignatures_Frequency(self,s1,s2):
95	"""Removes samples from signatures if corresponding samples for both models have weight 0.
96	Args:
97	s1 (np.array(,dtype=np.float64)): values of samples
98	s2 (np.array(,dtype=np.float64)): values of samples
99
100	Returns:
101	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
102	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
103	"""
104	lens = len(s1)
105	indices = []
106	for i in range(lens):
107	if s1[i]==0 and s2[i]==0:
108	indices.append(i)
109
110	return np.delete(s1, indices), np.delete(s2, indices)
111
112
113	def reduceEmptySignatures_Density(self,s1,s2):
114	"""Removes samples from signatures if corresponding samples for both models have weight 0.
115	Args:
116	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
117	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
118
119	Returns:
120	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
121	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
122	"""
123	lens = len(s1[0])
124	indices = []
125	for i in range(lens):
126	if s1[1][i]==0 and s2[1][i]==0:
127	indices.append(i)
128
129	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
130	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
131	return s1, s2
132
133
134	def getSignatures(self,array,edges3,steps3):
135	"""Generates signature for array representing the Model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
136
137	Args:
138	array (np.array(np.array(,dtype=float))): array with voxels representing the Model
139	edges3 ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
140	steps3 ([float,float,float]): [size of interval for x axis, size of interval for y axis, size of interval for y axis]
141
142	Returns (distribution):
143	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
144	Returns (frequency):
145	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
146	"""
147	edges_x,edges_y,edges_z = edges3
148	step_x,step_y,step_z=steps3
149	feature_array = []
150	weight_array = []
151	step_x_half = step_x/2
152	step_y_half = step_y/2
153	step_z_half = step_z/2
154	for x in range(len(edges_x[:-1])):
155	for y in range(len(edges_y[:-1])) :
156	for z in range(len(edges_z[:-1])):
157	rows=np.where((array[:,0]> edges_x[x]) &
158	(array[:,0]<= edges_x[x+1]) &
159	(array[:,1]> edges_y[y]) &
160	(array[:,1]<= edges_y[y+1]) &
161	(array[:,2]> edges_z[z]) &
162	(array[:,2]<= edges_z[z+1]))
163	if self.frequency:
164	feature_array.append(len(array[rows]))
165	else:
166	weight, point = self.calculateNeighberhood(array[rows],[edges_x[x]+step_x_half,edges_y[y]+step_y_half,edges_z[z]+step_z_half])
167	feature_array.append(point)
168	weight_array.append(weight)
169
170	if self.frequency:
171	samples = np.array(feature_array,dtype=np.float64)
172	return samples
173	else:
174	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
175
176
177	def getSignaturesForPair(self,array1,array2):
178	"""Generates signatures for given pair of models represented by array of voxels.
179	We calculate space for given models by taking the extremas for each axis and dividing the space by the resolution.
180	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight which equals to the number of points.
181
182	Args:
183	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
184	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
185
186	Returns:
187	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
188	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
189	"""
190
191	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
192	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
193	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
194	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
195	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
196	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
197
198	# We request self.resolution+1 samples since we need self.resolution intervals
199	edges_x,step_x = np.linspace(min_x,max_x,self.resolution+1,retstep=True)
200	edges_y,step_y = np.linspace(min_y,max_y,self.resolution+1,retstep=True)
201	edges_z,step_z = np.linspace(min_z,max_z,self.resolution+1,retstep=True)
202
203	for edges in (edges_x, edges_y, edges_z): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
204	edges[0] -= self.EPSILON
205
206	edges3 = (edges_x,edges_y,edges_z)
207	steps3 = (step_x,step_y,step_z)
208
209	s1 = self.getSignatures(array1,edges3,steps3)
210	s2 = self.getSignatures(array2,edges3,steps3)
211
212	return s1,s2
213
214
215	def getVoxels(self,geno):
216	"""Generates voxels for genotype using frams.ModelGeometry
217
218	Args:
219	geno (string): representation of Model in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
220
221	Returns:
222	np.array([np.array(,dtype=float)]: list of voxels representing the Model.
223	"""
224	model = self.frams.Model.newFromString(geno)
225	align(model, self.fixedZaxis)
226	model_geometry = self.frams.ModelGeometry.forModel(model)
227
228	model_geometry.geom_density = self.density
229	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
230	return voxels
231
232
233	def normalize(self, signature):
234	"""Normalizes the signature values by dividing each element by the sum of all elements.
235	Args:
236	signature np.array(,dtype=float): A one-dimensional array of signature values.
237
238	Returns:
239	np.array(,dtype=float): A one-dimensional array of normalized signature values.
240	"""
241	total = np.sum(signature)
242	return np.divide(signature, total)
243
244
245	def calculateDissimforVoxels(self, voxels1, voxels2):
246	"""Calculates EMD for pair of voxels representing models.
247	Args:
248	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
249	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
250
251	Returns:
252	float: dissim for pair of list of voxels
253	"""
254	numvox1 = len(voxels1)
255	numvox2 = len(voxels2)
256
257	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
258
259	reduce_fun = self.reduceEmptySignatures_Frequency if self.frequency else self.reduceEmptySignatures_Density
260	if self.reduce_empty:
261	s1, s2 = reduce_fun(s1,s2)
262
263	if not self.frequency:
264	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
265	print("Voxel reduction didn't work properly")
266	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
267	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
268	raise RuntimeError("Voxel reduction error!")
269
270	if self.frequency:
271	s1 = abs(np.fft.fft(s1))
272	s2 = abs(np.fft.fft(s2))
273
274	if self.metric == 'l1':
275	if self.frequency:
276	out = np.linalg.norm((s1-s2), ord=1)
277	else:
278	out = np.linalg.norm((s1[1]-s2[1]), ord=1)
279
280	elif self.metric == 'l2':
281	if self.frequency:
282	out = np.linalg.norm((s1-s2))
283	else:
284	out = np.linalg.norm((s1[1]-s2[1]))
285
286	elif self.metric == 'emd':
287	if self.frequency:
288	num_points = np.linspace(0, 1, len(s1), True)
289	dist_matrix = self.calculateDistanceMatrix(num_points,num_points)
290	else:
291	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
292
293	self.libm.fedisableexcept(0x04) # change default flag value - don't cause exceptions when dividing by 0 (pyemd does it)
294
295	if self.frequency:
296	out = emd(self.normalize(s1),self.normalize(s2),np.array(dist_matrix,dtype=np.float64))
297	else:
298	out = emd(self.normalize(s1[1]),self.normalize(s2[1]),dist_matrix)
299
300	self.libm.feclearexcept(0x04) # restoring default flag values...
301	self.libm.feenableexcept(0x04)
302
303	else:
304	raise ValueError("Wrong metric '%s'"%self.metric)
305
306	return out
307
308
309	def calculateDissimforGeno(self, geno1, geno2):
310	"""Calculates EMD for a pair of genotypes.
311	Args:
312	geno1 (string): representation of model1 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
313	geno2 (string): representation of model2 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
314
315	Returns:
316	float: dissim for pair of strings representing models.
317	"""
318
319	voxels1 = self.getVoxels(geno1)
320	voxels2 = self.getVoxels(geno2)
321
322	out = self.calculateDissimforVoxels(voxels1, voxels2)
323
324	if self.verbose == True:
325	print("Intervals: ", self.resolution)
326	print("Geno1:\n",geno1)
327	print("Geno2:\n",geno2)
328	print("EMD:\n",out)
329
330	return out
331
332
333	def getDissimilarityMatrix(self,listOfGeno):
334	"""
335	Args:
336	listOfGeno ([string]): list of strings representing genotypes in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
337
338	Returns:
339	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
340	"""
341	numOfGeno = len(listOfGeno)
342	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
343	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
344	for i in range(numOfGeno):
345	for j in range(numOfGeno):
346	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
347	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: framspy/dissimilarity/density-distribution.py @ 1280

Download in other formats: