Context Navigation

source: framspy/dissimilarity/density_distribution.py @ 1322

Last change on this file since 1322 was 1322, checked in by Maciej Komosinski, 3 months ago
Use libm.so when available to disable exceptions in pyemd, and if not (e.g., on Windows), proceed without it
File size: 16.2 KB

Line
1	import numpy as np
2	from pyemd import emd
3	from ctypes import cdll
4	from ctypes.util import find_library
5	from .alignmodel import align
6
7	class DensityDistribution:
8	"""Two dissimilarity measures based on the spatial distribution of two Models. The Model bounding box is divided into a grid of equally-sized cuboids, the number of which is the 'resolution' parameter cubed. Then the Model surface is covered with points; the density of the surface sampling is determined by the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False), a signature of each cuboid is the centroid and the number of samples. In the 'frequency'=True version, FFT is computed from the vector containing the number of samples in each cuboid. The final result of the dissimilarity measure is the distance between the signatures and it can be computed using EMD, L1, or L2 norms (the 'metric' parameter).
9	"""
10
11	libm = find_library('m') # for disabling/enabling floating point exceptions (division by zero occurs in the pyemd library)
12	if libm is not None: # libm.so (the mathematical library) is a part of Linux ecosystem - always present
13	libm = cdll.LoadLibrary(libm)
14	else:
15	print('\nWarning: The "m" library not found - floating point exceptions in pyemd may occur...\n') # but on Windows, pyemd does not seem to cause floating point exceptions
16	EPSILON = 0.0001
17
18	def __init__(self, frams_module=None, density = 10, resolution = 8, reduce_empty=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
19	""" __init__
20	Args:
21	density (int, optional): density of samplings for frams.ModelGeometry. Defaults to 10.
22	resolution (int, optional): How many intervals are used in each dimension to partition surface samples of Models in the 3D space.
23	The higher the value, the more detailed the comparison and the longer the calculations. Defaults to 3.
24	reduce_empty (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
25	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
26	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
27	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
28	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
29	"""
30	if frams_module is None:
31	raise ValueError('Framsticks module not provided!')
32	self.frams = frams_module
33
34	self.density = density
35	self.resolution = resolution
36	self.verbose = verbose
37	self.reduce_empty = reduce_empty
38	self.frequency = frequency
39	self.metric = metric
40	self.fixedZaxis = fixedZaxis
41
42
43	def calculateNeighberhood(self,array,mean_coords):
44	""" Calculates number of elements for given sample and set ups the center of this sample
45	to the center of mass (calculated by mean of every coordinate)
46	Args:
47	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
48	mean_coords ([float,float,float]): default coordinates that are the
49	middle of the sample (used when number of voxels in sample is equal to 0)
50
51	Returns:
52	weight [int]: number of voxels in a sample
53	coordinates [float,float,float]: center of mass for a sample
54	"""
55	weight = len(array)
56	if weight > 0:
57	point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
58	return weight, point
59	else:
60	return 0, mean_coords
61
62
63	def calculateDistPoints(self,point1, point2):
64	""" Returns euclidean distance between two points
65	Args (distribution):
66	point1 ([float,float,float]) - coordinates of first point
67	point2 ([float,float,float]) - coordinates of second point
68	Args (frequency):
69	point1 (float) - value of the first sample
70	point2 (float) - value of the second sample
71
72	Returns:
73	[float]: euclidean distance
74	"""
75	if self.frequency:
76	return abs(point1-point2)
77	else:
78	return np.sqrt(np.sum(np.square(point1-point2)))
79
80
81	def calculateDistanceMatrix(self,array1, array2):
82	"""
83	Args:
84	array1 ([type]): array of size n with points representing the first Model
85	array2 ([type]): array of size n with points representing the second Model
86
87	Returns:
88	np.array(np.array(,dtype=float)): distance matrix n*n
89	"""
90	n = len(array1)
91	distMatrix = np.zeros((n,n))
92	for i in range(n):
93	for j in range(n):
94	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
95	return np.array(distMatrix)
96
97
98	def reduceEmptySignatures_Frequency(self,s1,s2):
99	"""Removes samples from signatures if corresponding samples for both models have weight 0.
100	Args:
101	s1 (np.array(,dtype=np.float64)): values of samples
102	s2 (np.array(,dtype=np.float64)): values of samples
103
104	Returns:
105	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
106	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
107	"""
108	lens = len(s1)
109	indices = []
110	for i in range(lens):
111	if s1[i]==0 and s2[i]==0:
112	indices.append(i)
113
114	return np.delete(s1, indices), np.delete(s2, indices)
115
116
117	def reduceEmptySignatures_Density(self,s1,s2):
118	"""Removes samples from signatures if corresponding samples for both models have weight 0.
119	Args:
120	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
121	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
122
123	Returns:
124	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
125	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
126	"""
127	lens = len(s1[0])
128	indices = []
129	for i in range(lens):
130	if s1[1][i]==0 and s2[1][i]==0:
131	indices.append(i)
132
133	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
134	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
135	return s1, s2
136
137
138	def getSignatures(self,array,edges3,steps3):
139	"""Generates signature for array representing the Model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
140
141	Args:
142	array (np.array(np.array(,dtype=float))): array with voxels representing the Model
143	edges3 ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
144	steps3 ([float,float,float]): [size of interval for x axis, size of interval for y axis, size of interval for y axis]
145
146	Returns (distribution):
147	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
148	Returns (frequency):
149	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
150	"""
151	edges_x,edges_y,edges_z = edges3
152	step_x,step_y,step_z=steps3
153	feature_array = []
154	weight_array = []
155	step_x_half = step_x/2
156	step_y_half = step_y/2
157	step_z_half = step_z/2
158	for x in range(len(edges_x[:-1])):
159	for y in range(len(edges_y[:-1])) :
160	for z in range(len(edges_z[:-1])):
161	rows=np.where((array[:,0]> edges_x[x]) &
162	(array[:,0]<= edges_x[x+1]) &
163	(array[:,1]> edges_y[y]) &
164	(array[:,1]<= edges_y[y+1]) &
165	(array[:,2]> edges_z[z]) &
166	(array[:,2]<= edges_z[z+1]))
167	if self.frequency:
168	feature_array.append(len(array[rows]))
169	else:
170	weight, point = self.calculateNeighberhood(array[rows],[edges_x[x]+step_x_half,edges_y[y]+step_y_half,edges_z[z]+step_z_half])
171	feature_array.append(point)
172	weight_array.append(weight)
173
174	if self.frequency:
175	samples = np.array(feature_array,dtype=np.float64)
176	return samples
177	else:
178	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
179
180
181	def getSignaturesForPair(self,array1,array2):
182	"""Generates signatures for given pair of models represented by array of voxels.
183	We calculate space for given models by taking the extremas for each axis and dividing the space by the resolution.
184	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight which equals to the number of points.
185
186	Args:
187	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
188	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
189
190	Returns:
191	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
192	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
193	"""
194
195	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
196	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
197	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
198	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
199	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
200	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
201
202	# We request self.resolution+1 samples since we need self.resolution intervals
203	edges_x,step_x = np.linspace(min_x,max_x,self.resolution+1,retstep=True)
204	edges_y,step_y = np.linspace(min_y,max_y,self.resolution+1,retstep=True)
205	edges_z,step_z = np.linspace(min_z,max_z,self.resolution+1,retstep=True)
206
207	for edges in (edges_x, edges_y, edges_z): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
208	edges[0] -= self.EPSILON
209
210	edges3 = (edges_x,edges_y,edges_z)
211	steps3 = (step_x,step_y,step_z)
212
213	s1 = self.getSignatures(array1,edges3,steps3)
214	s2 = self.getSignatures(array2,edges3,steps3)
215
216	return s1,s2
217
218
219	def getVoxels(self,geno):
220	"""Generates voxels for genotype using frams.ModelGeometry
221
222	Args:
223	geno (string): representation of Model in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
224
225	Returns:
226	np.array([np.array(,dtype=float)]: list of voxels representing the Model.
227	"""
228	model = self.frams.Model.newFromString(geno)
229	align(model, self.fixedZaxis)
230	model_geometry = self.frams.ModelGeometry.forModel(model)
231
232	model_geometry.geom_density = self.density
233	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
234	return voxels
235
236
237	def normalize(self, signature):
238	"""Normalizes the signature values by dividing each element by the sum of all elements.
239	Args:
240	signature np.array(,dtype=float): A one-dimensional array of signature values.
241
242	Returns:
243	np.array(,dtype=float): A one-dimensional array of normalized signature values.
244	"""
245	total = np.sum(signature)
246	return np.divide(signature, total)
247
248
249	def calculateDissimforVoxels(self, voxels1, voxels2):
250	"""Calculates EMD for pair of voxels representing models.
251	Args:
252	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
253	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
254
255	Returns:
256	float: dissim for pair of list of voxels
257	"""
258	numvox1 = len(voxels1)
259	numvox2 = len(voxels2)
260
261	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
262
263	reduce_fun = self.reduceEmptySignatures_Frequency if self.frequency else self.reduceEmptySignatures_Density
264	if self.reduce_empty:
265	s1, s2 = reduce_fun(s1,s2)
266
267	if not self.frequency:
268	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
269	print("Voxel reduction didn't work properly")
270	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
271	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
272	raise RuntimeError("Voxel reduction error!")
273
274	if self.frequency:
275	s1 = abs(np.fft.fft(s1))
276	s2 = abs(np.fft.fft(s2))
277
278	if self.metric == 'l1':
279	if self.frequency:
280	out = np.linalg.norm((s1-s2), ord=1)
281	else:
282	out = np.linalg.norm((s1[1]-s2[1]), ord=1)
283
284	elif self.metric == 'l2':
285	if self.frequency:
286	out = np.linalg.norm((s1-s2))
287	else:
288	out = np.linalg.norm((s1[1]-s2[1]))
289
290	elif self.metric == 'emd':
291	if self.frequency:
292	num_points = np.linspace(0, 1, len(s1), True)
293	dist_matrix = self.calculateDistanceMatrix(num_points,num_points)
294	else:
295	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
296
297	if self.libm is not None:
298	self.libm.fedisableexcept(0x04) # change default flag value - don't cause "Floating point exception" when dividing by 0 (pyemd does that, for example when comparing two identical histograms, i.e., two identical signatures, for example from two identical phenotypes)
299
300	if self.frequency:
301	out = emd(self.normalize(s1),self.normalize(s2),np.array(dist_matrix,dtype=np.float64))
302	else:
303	out = emd(self.normalize(s1[1]),self.normalize(s2[1]),dist_matrix)
304
305	if self.libm is not None:
306	self.libm.feclearexcept(0x04) # restoring default flag values...
307	self.libm.feenableexcept(0x04)
308
309	else:
310	raise ValueError("Wrong metric '%s'"%self.metric)
311
312	return out
313
314
315	def calculateDissimforGeno(self, geno1, geno2):
316	"""Calculates EMD for a pair of genotypes.
317	Args:
318	geno1 (string): representation of model1 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
319	geno2 (string): representation of model2 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
320
321	Returns:
322	float: dissim for pair of strings representing models.
323	"""
324
325	voxels1 = self.getVoxels(geno1)
326	voxels2 = self.getVoxels(geno2)
327
328	out = self.calculateDissimforVoxels(voxels1, voxels2)
329
330	if self.verbose == True:
331	print("Intervals: ", self.resolution)
332	print("Geno1:\n",geno1)
333	print("Geno2:\n",geno2)
334	print("EMD:\n",out)
335
336	return out
337
338
339	def getDissimilarityMatrix(self,listOfGeno):
340	"""
341	Args:
342	listOfGeno ([string]): list of strings representing genotypes in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
343
344	Returns:
345	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
346	"""
347	numOfGeno = len(listOfGeno)
348	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
349	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
350	for i in range(numOfGeno):
351	for j in range(numOfGeno):
352	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
353	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: