Context Navigation

source: framspy/dissimilarity/density_distribution.py @ 1320

Last change on this file since 1320 was 1295, checked in by Maciej Komosinski, 17 months ago
Updated for Python 3.9+
File size: 15.9 KB

Rev	Line
[1208]	1	import numpy as np
	2	from pyemd import emd
	3	from ctypes import cdll
	4	from ctypes.util import find_library
[1295]	5	from .alignmodel import align
[1208]	6
	7	class DensityDistribution:
[1224]	8	"""Two dissimilarity measures based on the spatial distribution of two Models. The Model bounding box is divided into a grid of equally-sized cuboids, the number of which is the 'resolution' parameter cubed. Then the Model surface is covered with points; the density of the surface sampling is determined by the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False), a signature of each cuboid is the centroid and the number of samples. In the 'frequency'=True version, FFT is computed from the vector containing the number of samples in each cuboid. The final result of the dissimilarity measure is the distance between the signatures and it can be computed using EMD, L1, or L2 norms (the 'metric' parameter).
[1223]	9	"""
[1295]	10
	11	OLD_FPEX = False # update floating point exception handling/pyemd problems (if still needed) for new python versions
	12	if OLD_FPEX:
	13	libm = cdll.LoadLibrary(find_library('m')) # for disabling/enabling floating point exceptions (division by zero occurs in the EMD library)
[1208]	14	EPSILON = 0.0001
[1224]	15
[1225]	16	def __init__(self, frams_module=None, density = 10, resolution = 8, reduce_empty=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
[1208]	17	""" __init__
	18	Args:
[1224]	19	density (int, optional): density of samplings for frams.ModelGeometry. Defaults to 10.
	20	resolution (int, optional): How many intervals are used in each dimension to partition surface samples of Models in the 3D space.
	21	The higher the value, the more detailed the comparison and the longer the calculations. Defaults to 3.
	22	reduce_empty (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
[1208]	23	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
	24	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
	25	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
	26	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
	27	"""
[1219]	28	if frams_module is None:
	29	raise ValueError('Framsticks module not provided!')
	30	self.frams = frams_module
[1208]	31
	32	self.density = density
[1224]	33	self.resolution = resolution
[1208]	34	self.verbose = verbose
[1224]	35	self.reduce_empty = reduce_empty
[1208]	36	self.frequency = frequency
	37	self.metric = metric
	38	self.fixedZaxis = fixedZaxis
	39
	40
	41	def calculateNeighberhood(self,array,mean_coords):
	42	""" Calculates number of elements for given sample and set ups the center of this sample
	43	to the center of mass (calculated by mean of every coordinate)
	44	Args:
	45	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
	46	mean_coords ([float,float,float]): default coordinates that are the
	47	middle of the sample (used when number of voxels in sample is equal to 0)
	48
	49	Returns:
	50	weight [int]: number of voxels in a sample
	51	coordinates [float,float,float]: center of mass for a sample
	52	"""
	53	weight = len(array)
	54	if weight > 0:
	55	point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
	56	return weight, point
	57	else:
	58	return 0, mean_coords
	59
	60
	61	def calculateDistPoints(self,point1, point2):
	62	""" Returns euclidean distance between two points
	63	Args (distribution):
	64	point1 ([float,float,float]) - coordinates of first point
	65	point2 ([float,float,float]) - coordinates of second point
	66	Args (frequency):
	67	point1 (float) - value of the first sample
	68	point2 (float) - value of the second sample
	69
	70	Returns:
	71	[float]: euclidean distance
	72	"""
	73	if self.frequency:
	74	return abs(point1-point2)
	75	else:
	76	return np.sqrt(np.sum(np.square(point1-point2)))
	77
	78
	79	def calculateDistanceMatrix(self,array1, array2):
	80	"""
	81	Args:
[1224]	82	array1 ([type]): array of size n with points representing the first Model
	83	array2 ([type]): array of size n with points representing the second Model
[1208]	84
	85	Returns:
[1224]	86	np.array(np.array(,dtype=float)): distance matrix n*n
[1208]	87	"""
	88	n = len(array1)
	89	distMatrix = np.zeros((n,n))
	90	for i in range(n):
	91	for j in range(n):
	92	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
	93	return np.array(distMatrix)
	94
	95
[1224]	96	def reduceEmptySignatures_Frequency(self,s1,s2):
[1208]	97	"""Removes samples from signatures if corresponding samples for both models have weight 0.
	98	Args:
	99	s1 (np.array(,dtype=np.float64)): values of samples
	100	s2 (np.array(,dtype=np.float64)): values of samples
	101
	102	Returns:
	103	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
	104	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
	105	"""
	106	lens = len(s1)
	107	indices = []
	108	for i in range(lens):
	109	if s1[i]==0 and s2[i]==0:
	110	indices.append(i)
	111
	112	return np.delete(s1, indices), np.delete(s2, indices)
	113
	114
[1224]	115	def reduceEmptySignatures_Density(self,s1,s2):
[1208]	116	"""Removes samples from signatures if corresponding samples for both models have weight 0.
	117	Args:
	118	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	119	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	120
	121	Returns:
	122	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
	123	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
	124	"""
	125	lens = len(s1[0])
	126	indices = []
	127	for i in range(lens):
	128	if s1[1][i]==0 and s2[1][i]==0:
	129	indices.append(i)
	130
	131	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
	132	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
	133	return s1, s2
	134
	135
[1224]	136	def getSignatures(self,array,edges3,steps3):
	137	"""Generates signature for array representing the Model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
[1208]	138
	139	Args:
[1224]	140	array (np.array(np.array(,dtype=float))): array with voxels representing the Model
	141	edges3 ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
	142	steps3 ([float,float,float]): [size of interval for x axis, size of interval for y axis, size of interval for y axis]
[1208]	143
	144	Returns (distribution):
	145	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
	146	Returns (frequency):
	147	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
	148	"""
[1224]	149	edges_x,edges_y,edges_z = edges3
	150	step_x,step_y,step_z=steps3
[1208]	151	feature_array = []
	152	weight_array = []
[1224]	153	step_x_half = step_x/2
	154	step_y_half = step_y/2
	155	step_z_half = step_z/2
	156	for x in range(len(edges_x[:-1])):
	157	for y in range(len(edges_y[:-1])) :
	158	for z in range(len(edges_z[:-1])):
	159	rows=np.where((array[:,0]> edges_x[x]) &
	160	(array[:,0]<= edges_x[x+1]) &
	161	(array[:,1]> edges_y[y]) &
	162	(array[:,1]<= edges_y[y+1]) &
	163	(array[:,2]> edges_z[z]) &
	164	(array[:,2]<= edges_z[z+1]))
[1208]	165	if self.frequency:
	166	feature_array.append(len(array[rows]))
	167	else:
[1224]	168	weight, point = self.calculateNeighberhood(array[rows],[edges_x[x]+step_x_half,edges_y[y]+step_y_half,edges_z[z]+step_z_half])
[1208]	169	feature_array.append(point)
	170	weight_array.append(weight)
	171
	172	if self.frequency:
	173	samples = np.array(feature_array,dtype=np.float64)
[1269]	174	return samples
[1208]	175	else:
	176	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
	177
	178
	179	def getSignaturesForPair(self,array1,array2):
[1214]	180	"""Generates signatures for given pair of models represented by array of voxels.
[1224]	181	We calculate space for given models by taking the extremas for each axis and dividing the space by the resolution.
	182	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight which equals to the number of points.
[1208]	183
	184	Args:
	185	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
	186	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
[1224]	187
[1208]	188	Returns:
	189	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	190	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	191	"""
	192
[1221]	193	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
[1210]	194	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
	195	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
	196	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
	197	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
	198	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
[1208]	199
[1224]	200	# We request self.resolution+1 samples since we need self.resolution intervals
	201	edges_x,step_x = np.linspace(min_x,max_x,self.resolution+1,retstep=True)
	202	edges_y,step_y = np.linspace(min_y,max_y,self.resolution+1,retstep=True)
	203	edges_z,step_z = np.linspace(min_z,max_z,self.resolution+1,retstep=True)
[1208]	204
[1224]	205	for edges in (edges_x, edges_y, edges_z): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
	206	edges[0] -= self.EPSILON
[1208]	207
[1224]	208	edges3 = (edges_x,edges_y,edges_z)
	209	steps3 = (step_x,step_y,step_z)
[1208]	210
[1224]	211	s1 = self.getSignatures(array1,edges3,steps3)
	212	s2 = self.getSignatures(array2,edges3,steps3)
[1208]	213
	214	return s1,s2
	215
	216
	217	def getVoxels(self,geno):
[1214]	218	"""Generates voxels for genotype using frams.ModelGeometry
[1208]	219
	220	Args:
[1224]	221	geno (string): representation of Model in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
[1208]	222
	223	Returns:
[1224]	224	np.array([np.array(,dtype=float)]: list of voxels representing the Model.
[1208]	225	"""
[1219]	226	model = self.frams.Model.newFromString(geno)
[1208]	227	align(model, self.fixedZaxis)
[1219]	228	model_geometry = self.frams.ModelGeometry.forModel(model)
[1208]	229
	230	model_geometry.geom_density = self.density
	231	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
	232	return voxels
	233
	234
[1266]	235	def normalize(self, signature):
[1267]	236	"""Normalizes the signature values by dividing each element by the sum of all elements.
[1266]	237	Args:
[1267]	238	signature np.array(,dtype=float): A one-dimensional array of signature values.
[1266]	239
	240	Returns:
[1267]	241	np.array(,dtype=float): A one-dimensional array of normalized signature values.
[1266]	242	"""
	243	total = np.sum(signature)
	244	return np.divide(signature, total)
	245
	246
[1208]	247	def calculateDissimforVoxels(self, voxels1, voxels2):
[1214]	248	"""Calculates EMD for pair of voxels representing models.
[1208]	249	Args:
	250	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
	251	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
	252
	253	Returns:
	254	float: dissim for pair of list of voxels
	255	"""
	256	numvox1 = len(voxels1)
	257	numvox2 = len(voxels2)
	258
	259	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
	260
[1224]	261	reduce_fun = self.reduceEmptySignatures_Frequency if self.frequency else self.reduceEmptySignatures_Density
	262	if self.reduce_empty:
[1208]	263	s1, s2 = reduce_fun(s1,s2)
	264
	265	if not self.frequency:
	266	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
[1224]	267	print("Voxel reduction didn't work properly")
[1208]	268	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
	269	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
[1224]	270	raise RuntimeError("Voxel reduction error!")
[1208]	271
[1269]	272	if self.frequency:
	273	s1 = abs(np.fft.fft(s1))
	274	s2 = abs(np.fft.fft(s2))
	275
[1208]	276	if self.metric == 'l1':
	277	if self.frequency:
	278	out = np.linalg.norm((s1-s2), ord=1)
	279	else:
	280	out = np.linalg.norm((s1[1]-s2[1]), ord=1)
	281
	282	elif self.metric == 'l2':
	283	if self.frequency:
	284	out = np.linalg.norm((s1-s2))
	285	else:
	286	out = np.linalg.norm((s1[1]-s2[1]))
	287
	288	elif self.metric == 'emd':
	289	if self.frequency:
[1268]	290	num_points = np.linspace(0, 1, len(s1), True)
	291	dist_matrix = self.calculateDistanceMatrix(num_points,num_points)
[1208]	292	else:
	293	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
	294
[1295]	295	if self.OLD_FPEX:
	296	self.libm.fedisableexcept(0x04) # change default flag value - don't cause exceptions when dividing by 0 (pyemd does it)
[1208]	297
	298	if self.frequency:
[1266]	299	out = emd(self.normalize(s1),self.normalize(s2),np.array(dist_matrix,dtype=np.float64))
[1208]	300	else:
[1266]	301	out = emd(self.normalize(s1[1]),self.normalize(s2[1]),dist_matrix)
[1208]	302
[1295]	303	if self.OLD_FPEX:
	304	self.libm.feclearexcept(0x04) # restoring default flag values...
	305	self.libm.feenableexcept(0x04)
[1208]	306
	307	else:
	308	raise ValueError("Wrong metric '%s'"%self.metric)
	309
	310	return out
	311
	312
	313	def calculateDissimforGeno(self, geno1, geno2):
[1219]	314	"""Calculates EMD for a pair of genotypes.
[1208]	315	Args:
[1224]	316	geno1 (string): representation of model1 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
	317	geno2 (string): representation of model2 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
[1208]	318
	319	Returns:
	320	float: dissim for pair of strings representing models.
	321	"""
	322
	323	voxels1 = self.getVoxels(geno1)
	324	voxels2 = self.getVoxels(geno2)
	325
	326	out = self.calculateDissimforVoxels(voxels1, voxels2)
	327
	328	if self.verbose == True:
[1224]	329	print("Intervals: ", self.resolution)
[1208]	330	print("Geno1:\n",geno1)
	331	print("Geno2:\n",geno2)
	332	print("EMD:\n",out)
	333
	334	return out
	335
	336
	337	def getDissimilarityMatrix(self,listOfGeno):
	338	"""
	339	Args:
[1224]	340	listOfGeno ([string]): list of strings representing genotypes in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
[1208]	341
	342	Returns:
	343	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
	344	"""
	345	numOfGeno = len(listOfGeno)
	346	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
	347	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
	348	for i in range(numOfGeno):
	349	for j in range(numOfGeno):
	350	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
	351	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: