Context Navigation

source: framspy/dissimilarity/density-distribution.py @ 1211

Last change on this file since 1211 was 1210, checked in by Maciej Komosinski, 20 months ago
EPSILON used only where necessary
File size: 14.6 KB

Line
1	import numpy as np
2	from pyemd import emd
3	from ctypes import cdll
4	from ctypes.util import find_library
5	from alignmodel import align
6
7	class DensityDistribution:
8	libm = cdll.LoadLibrary(find_library('m'))
9	EPSILON = 0.0001
10	def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
11	""" __init__
12	Args:
13	density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
14	steps (int, optional): How many steps is used for sampling space of voxels,
15	The higher value the more accurate sampling and the longer calculations. Defaults to 3.
16	reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
17	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
18	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
19	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
20	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
21	"""
22	if FramsLib == None:
23	raise ValueError('Frams library not provided!')
24	self.frams_lib = FramsLib
25
26	self.density = density
27	self.steps = steps
28	self.verbose = verbose
29	self.reduce = reduce
30	self.frequency = frequency
31	self.metric = metric
32	self.fixedZaxis = fixedZaxis
33
34
35	def calculateNeighberhood(self,array,mean_coords):
36	""" Calculates number of elements for given sample and set ups the center of this sample
37	to the center of mass (calculated by mean of every coordinate)
38	Args:
39	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
40	mean_coords ([float,float,float]): default coordinates that are the
41	middle of the sample (used when number of voxels in sample is equal to 0)
42
43	Returns:
44	weight [int]: number of voxels in a sample
45	coordinates [float,float,float]: center of mass for a sample
46	"""
47	weight = len(array)
48	if weight > 0:
49	point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
50	return weight, point
51	else:
52	return 0, mean_coords
53
54
55	def calculateDistPoints(self,point1, point2):
56	""" Returns euclidean distance between two points
57	Args (distribution):
58	point1 ([float,float,float]) - coordinates of first point
59	point2 ([float,float,float]) - coordinates of second point
60	Args (frequency):
61	point1 (float) - value of the first sample
62	point2 (float) - value of the second sample
63
64	Returns:
65	[float]: euclidean distance
66	"""
67	if self.frequency:
68	return abs(point1-point2)
69	else:
70	return np.sqrt(np.sum(np.square(point1-point2)))
71
72
73	def calculateDistanceMatrix(self,array1, array2):
74	"""
75
76	Args:
77	array1 ([type]): array of size n with points representing firsts model
78	array2 ([type]): array of size n with points representing second model
79
80	Returns:
81	np.array(np.array(,dtype=float)): distance matrix n x n
82	"""
83	n = len(array1)
84	distMatrix = np.zeros((n,n))
85	for i in range(n):
86	for j in range(n):
87	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
88	return np.array(distMatrix)
89
90
91	def reduceSignaturesFreq(self,s1,s2):
92	"""Removes samples from signatures if corresponding samples for both models have weight 0.
93	Args:
94	s1 (np.array(,dtype=np.float64)): values of samples
95	s2 (np.array(,dtype=np.float64)): values of samples
96
97	Returns:
98	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
99	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
100	"""
101	lens = len(s1)
102	indices = []
103	for i in range(lens):
104	if s1[i]==0 and s2[i]==0:
105	indices.append(i)
106
107	return np.delete(s1, indices), np.delete(s2, indices)
108
109
110	def reduceSignaturesDens(self,s1,s2):
111	"""Removes samples from signatures if corresponding samples for both models have weight 0.
112	Args:
113	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
114	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
115
116	Returns:
117	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
118	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
119	"""
120	lens = len(s1[0])
121	indices = []
122	for i in range(lens):
123	if s1[1][i]==0 and s2[1][i]==0:
124	indices.append(i)
125
126	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
127	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
128	return s1, s2
129
130
131	def getSignatures(self,array,steps_all,step_all):
132	"""Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
133
134	Args:
135	array (np.array(np.array(,dtype=float))): array with voxels representing model
136	steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
137	step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
138
139	Returns (distribution):
140	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
141	Returns (frequency):
142	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
143	"""
144	x_steps,y_steps,z_steps = steps_all
145	x_step,y_step,z_step=step_all
146	feature_array = []
147	weight_array = []
148	step_half_x = x_step/2
149	step_half_y = y_step/2
150	step_half_z = z_step/2
151	for x in range(len(x_steps[:-1])):
152	for y in range(len(y_steps[:-1])) :
153	for z in range(len(z_steps[:-1])):
154	rows=np.where((array[:,0]> x_steps[x]) &
155	(array[:,0]<= x_steps[x+1]) &
156	(array[:,1]> y_steps[y]) &
157	(array[:,1]<= y_steps[y+1]) &
158	(array[:,2]> z_steps[z]) &
159	(array[:,2]<= z_steps[z+1]))
160	if self.frequency:
161	feature_array.append(len(array[rows]))
162	else:
163	weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
164	feature_array.append(point)
165	weight_array.append(weight)
166
167	if self.frequency:
168	samples = np.array(feature_array,dtype=np.float64)
169	return abs(np.fft.fft(samples))
170	else:
171	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
172
173
174	def getSignaturesForPair(self,array1,array2):
175	"""generates signatures for given pair of models represented by array of voxels.
176	We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
177	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
178	which equals to the number of points.
179
180	Args:
181	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
182	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
183	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
184
185	Returns:
186	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
187	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
188	"""
189
190	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
191	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
192	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
193	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
194	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
195	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
196
197	x_steps,x_step = np.linspace(min_x,max_x,self.steps,retstep=True)
198	y_steps,y_step = np.linspace(min_y,max_y,self.steps,retstep=True)
199	z_steps,z_step = np.linspace(min_z,max_z,self.steps,retstep=True)
200
201	for intervals in (x_steps, y_steps, z_steps): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
202	intervals[0] -= self.EPSILON
203
204	steps_all = (x_steps,y_steps,z_steps)
205	step_all = (x_step,y_step,z_step)
206
207	s1 = self.getSignatures(array1,steps_all,step_all)
208	s2 = self.getSignatures(array2,steps_all,step_all)
209
210	return s1,s2
211
212
213	def getVoxels(self,geno):
214	""" Generates voxels for genotype using frams.ModelGeometry
215
216	Args:
217	geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
218
219	Returns:
220	np.array([np.array(,dtype=float)]: list of voxels representing model.
221	"""
222	model = self.frams_lib.Model.newFromString(geno)
223	align(model, self.fixedZaxis)
224	model_geometry = self.frams_lib.ModelGeometry.forModel(model)
225
226	model_geometry.geom_density = self.density
227	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
228	return voxels
229
230
231	def calculateDissimforVoxels(self, voxels1, voxels2):
232	""" Calculate EMD for pair of voxels representing models.
233	Args:
234	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
235	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
236	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
237
238	Returns:
239	float: dissim for pair of list of voxels
240	"""
241	numvox1 = len(voxels1)
242	numvox2 = len(voxels2)
243
244	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
245
246	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
247	print("Bad signature!")
248	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
249	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
250	raise ValueError("BAd signature!")
251
252	reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
253	if self.reduce:
254	s1, s2 = reduce_fun(s1,s2)
255
256	if not self.frequency:
257	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
258	print("Voxel reduction didnt work properly")
259	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
260	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
261
262	if self.metric == 'l1':
263	if self.frequency:
264	out = np.linalg.norm((s1-s2), ord=1)
265	else:
266	out = np.linalg.norm((s1[1]-s2[1]), ord=1)
267
268	elif self.metric == 'l2':
269	if self.frequency:
270	out = np.linalg.norm((s1-s2))
271	else:
272	out = np.linalg.norm((s1[1]-s2[1]))
273
274	elif self.metric == 'emd':
275	if self.frequency:
276	num_points = len(s1)
277	dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
278	else:
279	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
280
281	self.libm.fedisableexcept(0x04) # allowing for operation divide by 0 because pyemd requiers it.
282
283	if self.frequency:
284	out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
285	else:
286	out = emd(s1[1],s2[1],dist_matrix)
287
288	self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
289	self.libm.feenableexcept(0x04)
290
291	else:
292	raise ValueError("Wrong metric '%s'"%self.metric)
293
294	return out
295
296
297	def calculateDissimforGeno(self, geno1, geno2):
298	""" Calculate EMD for pair of genos.
299	Args:
300	geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
301	geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
302	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
303
304	Returns:
305	float: dissim for pair of strings representing models.
306	"""
307
308	voxels1 = self.getVoxels(geno1)
309	voxels2 = self.getVoxels(geno2)
310
311	out = self.calculateDissimforVoxels(voxels1, voxels2)
312
313	if self.verbose == True:
314	print("Steps: ", self.steps)
315	print("Geno1:\n",geno1)
316	print("Geno2:\n",geno2)
317	print("EMD:\n",out)
318
319	return out
320
321
322	def getDissimilarityMatrix(self,listOfGeno):
323	"""
324
325	Args:
326	listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
327
328	Returns:
329	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
330	"""
331	numOfGeno = len(listOfGeno)
332	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
333	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
334	for i in range(numOfGeno):
335	for j in range(numOfGeno):
336	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
337	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: