Context Navigation

source: framspy/dissimilarity/density-distribution.py @ 1211

Last change on this file since 1211 was 1210, checked in by Maciej Komosinski, 20 months ago
EPSILON used only where necessary
File size: 14.6 KB

Rev	Line
[1208]	1	import numpy as np
	2	from pyemd import emd
	3	from ctypes import cdll
	4	from ctypes.util import find_library
	5	from alignmodel import align
	6
	7	class DensityDistribution:
	8	libm = cdll.LoadLibrary(find_library('m'))
	9	EPSILON = 0.0001
	10	def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
	11	""" __init__
	12	Args:
	13	density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
	14	steps (int, optional): How many steps is used for sampling space of voxels,
	15	The higher value the more accurate sampling and the longer calculations. Defaults to 3.
	16	reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
	17	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
	18	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
	19	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
	20	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
	21	"""
	22	if FramsLib == None:
	23	raise ValueError('Frams library not provided!')
	24	self.frams_lib = FramsLib
	25
	26	self.density = density
	27	self.steps = steps
	28	self.verbose = verbose
	29	self.reduce = reduce
	30	self.frequency = frequency
	31	self.metric = metric
	32	self.fixedZaxis = fixedZaxis
	33
	34
	35	def calculateNeighberhood(self,array,mean_coords):
	36	""" Calculates number of elements for given sample and set ups the center of this sample
	37	to the center of mass (calculated by mean of every coordinate)
	38	Args:
	39	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
	40	mean_coords ([float,float,float]): default coordinates that are the
	41	middle of the sample (used when number of voxels in sample is equal to 0)
	42
	43	Returns:
	44	weight [int]: number of voxels in a sample
	45	coordinates [float,float,float]: center of mass for a sample
	46	"""
	47	weight = len(array)
	48	if weight > 0:
	49	point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
	50	return weight, point
	51	else:
	52	return 0, mean_coords
	53
	54
	55	def calculateDistPoints(self,point1, point2):
	56	""" Returns euclidean distance between two points
	57	Args (distribution):
	58	point1 ([float,float,float]) - coordinates of first point
	59	point2 ([float,float,float]) - coordinates of second point
	60	Args (frequency):
	61	point1 (float) - value of the first sample
	62	point2 (float) - value of the second sample
	63
	64	Returns:
	65	[float]: euclidean distance
	66	"""
	67	if self.frequency:
	68	return abs(point1-point2)
	69	else:
	70	return np.sqrt(np.sum(np.square(point1-point2)))
	71
	72
	73	def calculateDistanceMatrix(self,array1, array2):
	74	"""
	75
	76	Args:
	77	array1 ([type]): array of size n with points representing firsts model
	78	array2 ([type]): array of size n with points representing second model
	79
	80	Returns:
	81	np.array(np.array(,dtype=float)): distance matrix n x n
	82	"""
	83	n = len(array1)
	84	distMatrix = np.zeros((n,n))
	85	for i in range(n):
	86	for j in range(n):
	87	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
	88	return np.array(distMatrix)
	89
	90
	91	def reduceSignaturesFreq(self,s1,s2):
	92	"""Removes samples from signatures if corresponding samples for both models have weight 0.
	93	Args:
	94	s1 (np.array(,dtype=np.float64)): values of samples
	95	s2 (np.array(,dtype=np.float64)): values of samples
	96
	97	Returns:
	98	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
	99	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
	100	"""
	101	lens = len(s1)
	102	indices = []
	103	for i in range(lens):
	104	if s1[i]==0 and s2[i]==0:
	105	indices.append(i)
	106
	107	return np.delete(s1, indices), np.delete(s2, indices)
	108
	109
	110	def reduceSignaturesDens(self,s1,s2):
	111	"""Removes samples from signatures if corresponding samples for both models have weight 0.
	112	Args:
	113	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	114	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	115
	116	Returns:
	117	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
	118	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
	119	"""
	120	lens = len(s1[0])
	121	indices = []
	122	for i in range(lens):
	123	if s1[1][i]==0 and s2[1][i]==0:
	124	indices.append(i)
	125
	126	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
	127	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
	128	return s1, s2
	129
	130
	131	def getSignatures(self,array,steps_all,step_all):
	132	"""Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
	133
	134	Args:
	135	array (np.array(np.array(,dtype=float))): array with voxels representing model
	136	steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
	137	step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
	138
	139	Returns (distribution):
	140	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
	141	Returns (frequency):
	142	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
	143	"""
	144	x_steps,y_steps,z_steps = steps_all
	145	x_step,y_step,z_step=step_all
	146	feature_array = []
	147	weight_array = []
	148	step_half_x = x_step/2
	149	step_half_y = y_step/2
	150	step_half_z = z_step/2
	151	for x in range(len(x_steps[:-1])):
	152	for y in range(len(y_steps[:-1])) :
	153	for z in range(len(z_steps[:-1])):
	154	rows=np.where((array[:,0]> x_steps[x]) &
	155	(array[:,0]<= x_steps[x+1]) &
	156	(array[:,1]> y_steps[y]) &
	157	(array[:,1]<= y_steps[y+1]) &
	158	(array[:,2]> z_steps[z]) &
	159	(array[:,2]<= z_steps[z+1]))
	160	if self.frequency:
	161	feature_array.append(len(array[rows]))
	162	else:
	163	weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
	164	feature_array.append(point)
	165	weight_array.append(weight)
	166
	167	if self.frequency:
	168	samples = np.array(feature_array,dtype=np.float64)
	169	return abs(np.fft.fft(samples))
	170	else:
	171	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
	172
	173
	174	def getSignaturesForPair(self,array1,array2):
	175	"""generates signatures for given pair of models represented by array of voxels.
	176	We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
	177	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
	178	which equals to the number of points.
	179
	180	Args:
	181	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
	182	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
	183	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
	184
	185	Returns:
	186	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	187	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	188	"""
	189
[1210]	190	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
	191	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
	192	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
	193	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
	194	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
	195	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
[1208]	196
	197	x_steps,x_step = np.linspace(min_x,max_x,self.steps,retstep=True)
	198	y_steps,y_step = np.linspace(min_y,max_y,self.steps,retstep=True)
	199	z_steps,z_step = np.linspace(min_z,max_z,self.steps,retstep=True)
	200
[1210]	201	for intervals in (x_steps, y_steps, z_steps): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
	202	intervals[0] -= self.EPSILON
[1208]	203
	204	steps_all = (x_steps,y_steps,z_steps)
	205	step_all = (x_step,y_step,z_step)
	206
	207	s1 = self.getSignatures(array1,steps_all,step_all)
	208	s2 = self.getSignatures(array2,steps_all,step_all)
	209
	210	return s1,s2
	211
	212
	213	def getVoxels(self,geno):
	214	""" Generates voxels for genotype using frams.ModelGeometry
	215
	216	Args:
	217	geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	218
	219	Returns:
	220	np.array([np.array(,dtype=float)]: list of voxels representing model.
	221	"""
	222	model = self.frams_lib.Model.newFromString(geno)
	223	align(model, self.fixedZaxis)
	224	model_geometry = self.frams_lib.ModelGeometry.forModel(model)
	225
	226	model_geometry.geom_density = self.density
	227	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
	228	return voxels
	229
	230
	231	def calculateDissimforVoxels(self, voxels1, voxels2):
	232	""" Calculate EMD for pair of voxels representing models.
	233	Args:
	234	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
	235	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
	236	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
	237
	238	Returns:
	239	float: dissim for pair of list of voxels
	240	"""
	241	numvox1 = len(voxels1)
	242	numvox2 = len(voxels2)
	243
	244	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
	245
	246	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
	247	print("Bad signature!")
	248	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
	249	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
	250	raise ValueError("BAd signature!")
	251
	252	reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
	253	if self.reduce:
	254	s1, s2 = reduce_fun(s1,s2)
	255
	256	if not self.frequency:
	257	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
	258	print("Voxel reduction didnt work properly")
	259	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
	260	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
	261
	262	if self.metric == 'l1':
	263	if self.frequency:
	264	out = np.linalg.norm((s1-s2), ord=1)
	265	else:
	266	out = np.linalg.norm((s1[1]-s2[1]), ord=1)
	267
	268	elif self.metric == 'l2':
	269	if self.frequency:
	270	out = np.linalg.norm((s1-s2))
	271	else:
	272	out = np.linalg.norm((s1[1]-s2[1]))
	273
	274	elif self.metric == 'emd':
	275	if self.frequency:
	276	num_points = len(s1)
	277	dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
	278	else:
	279	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
	280
	281	self.libm.fedisableexcept(0x04) # allowing for operation divide by 0 because pyemd requiers it.
	282
	283	if self.frequency:
	284	out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
	285	else:
	286	out = emd(s1[1],s2[1],dist_matrix)
	287
	288	self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
	289	self.libm.feenableexcept(0x04)
	290
	291	else:
	292	raise ValueError("Wrong metric '%s'"%self.metric)
	293
	294	return out
	295
	296
	297	def calculateDissimforGeno(self, geno1, geno2):
	298	""" Calculate EMD for pair of genos.
	299	Args:
	300	geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	301	geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	302	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
	303
	304	Returns:
	305	float: dissim for pair of strings representing models.
	306	"""
	307
	308	voxels1 = self.getVoxels(geno1)
	309	voxels2 = self.getVoxels(geno2)
	310
	311	out = self.calculateDissimforVoxels(voxels1, voxels2)
	312
	313	if self.verbose == True:
	314	print("Steps: ", self.steps)
	315	print("Geno1:\n",geno1)
	316	print("Geno2:\n",geno2)
	317	print("EMD:\n",out)
	318
	319	return out
	320
	321
	322	def getDissimilarityMatrix(self,listOfGeno):
	323	"""
	324
	325	Args:
	326	listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	327
	328	Returns:
	329	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
	330	"""
	331	numOfGeno = len(listOfGeno)
	332	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
	333	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
	334	for i in range(numOfGeno):
	335	for j in range(numOfGeno):
	336	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
	337	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: