[1208] | 1 | import numpy as np |
---|
| 2 | from pyemd import emd |
---|
| 3 | from ctypes import cdll |
---|
| 4 | from ctypes.util import find_library |
---|
| 5 | from alignmodel import align |
---|
| 6 | |
---|
| 7 | class DensityDistribution: |
---|
| 8 | libm = cdll.LoadLibrary(find_library('m')) |
---|
| 9 | EPSILON = 0.0001 |
---|
| 10 | def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False): |
---|
| 11 | """ __init__ |
---|
| 12 | Args: |
---|
| 13 | density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10. |
---|
| 14 | steps (int, optional): How many steps is used for sampling space of voxels, |
---|
| 15 | The higher value the more accurate sampling and the longer calculations. Defaults to 3. |
---|
| 16 | reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True. |
---|
| 17 | frequency (bool, optional): If we should use frequency distribution. Defaults to False. |
---|
| 18 | metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'. |
---|
| 19 | fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False. |
---|
| 20 | verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False. |
---|
| 21 | """ |
---|
| 22 | if FramsLib == None: |
---|
| 23 | raise ValueError('Frams library not provided!') |
---|
| 24 | self.frams_lib = FramsLib |
---|
| 25 | |
---|
| 26 | self.density = density |
---|
| 27 | self.steps = steps |
---|
| 28 | self.verbose = verbose |
---|
| 29 | self.reduce = reduce |
---|
| 30 | self.frequency = frequency |
---|
| 31 | self.metric = metric |
---|
| 32 | self.fixedZaxis = fixedZaxis |
---|
| 33 | |
---|
| 34 | |
---|
| 35 | def calculateNeighberhood(self,array,mean_coords): |
---|
| 36 | """ Calculates number of elements for given sample and set ups the center of this sample |
---|
| 37 | to the center of mass (calculated by mean of every coordinate) |
---|
| 38 | Args: |
---|
| 39 | array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample. |
---|
| 40 | mean_coords ([float,float,float]): default coordinates that are the |
---|
| 41 | middle of the sample (used when number of voxels in sample is equal to 0) |
---|
| 42 | |
---|
| 43 | Returns: |
---|
| 44 | weight [int]: number of voxels in a sample |
---|
| 45 | coordinates [float,float,float]: center of mass for a sample |
---|
| 46 | """ |
---|
| 47 | weight = len(array) |
---|
| 48 | if weight > 0: |
---|
| 49 | point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])] |
---|
| 50 | return weight, point |
---|
| 51 | else: |
---|
| 52 | return 0, mean_coords |
---|
| 53 | |
---|
| 54 | |
---|
| 55 | def calculateDistPoints(self,point1, point2): |
---|
| 56 | """ Returns euclidean distance between two points |
---|
| 57 | Args (distribution): |
---|
| 58 | point1 ([float,float,float]) - coordinates of first point |
---|
| 59 | point2 ([float,float,float]) - coordinates of second point |
---|
| 60 | Args (frequency): |
---|
| 61 | point1 (float) - value of the first sample |
---|
| 62 | point2 (float) - value of the second sample |
---|
| 63 | |
---|
| 64 | Returns: |
---|
| 65 | [float]: euclidean distance |
---|
| 66 | """ |
---|
| 67 | if self.frequency: |
---|
| 68 | return abs(point1-point2) |
---|
| 69 | else: |
---|
| 70 | return np.sqrt(np.sum(np.square(point1-point2))) |
---|
| 71 | |
---|
| 72 | |
---|
| 73 | def calculateDistanceMatrix(self,array1, array2): |
---|
| 74 | """ |
---|
| 75 | |
---|
| 76 | Args: |
---|
| 77 | array1 ([type]): array of size n with points representing firsts model |
---|
| 78 | array2 ([type]): array of size n with points representing second model |
---|
| 79 | |
---|
| 80 | Returns: |
---|
| 81 | np.array(np.array(,dtype=float)): distance matrix n x n |
---|
| 82 | """ |
---|
| 83 | n = len(array1) |
---|
| 84 | distMatrix = np.zeros((n,n)) |
---|
| 85 | for i in range(n): |
---|
| 86 | for j in range(n): |
---|
| 87 | distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j]) |
---|
| 88 | return np.array(distMatrix) |
---|
| 89 | |
---|
| 90 | |
---|
| 91 | def reduceSignaturesFreq(self,s1,s2): |
---|
| 92 | """Removes samples from signatures if corresponding samples for both models have weight 0. |
---|
| 93 | Args: |
---|
| 94 | s1 (np.array(,dtype=np.float64)): values of samples |
---|
| 95 | s2 (np.array(,dtype=np.float64)): values of samples |
---|
| 96 | |
---|
| 97 | Returns: |
---|
| 98 | s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction |
---|
| 99 | s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction |
---|
| 100 | """ |
---|
| 101 | lens = len(s1) |
---|
| 102 | indices = [] |
---|
| 103 | for i in range(lens): |
---|
| 104 | if s1[i]==0 and s2[i]==0: |
---|
| 105 | indices.append(i) |
---|
| 106 | |
---|
| 107 | return np.delete(s1, indices), np.delete(s2, indices) |
---|
| 108 | |
---|
| 109 | |
---|
| 110 | def reduceSignaturesDens(self,s1,s2): |
---|
| 111 | """Removes samples from signatures if corresponding samples for both models have weight 0. |
---|
| 112 | Args: |
---|
| 113 | s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
| 114 | s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
| 115 | |
---|
| 116 | Returns: |
---|
| 117 | s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction |
---|
| 118 | s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction |
---|
| 119 | """ |
---|
| 120 | lens = len(s1[0]) |
---|
| 121 | indices = [] |
---|
| 122 | for i in range(lens): |
---|
| 123 | if s1[1][i]==0 and s2[1][i]==0: |
---|
| 124 | indices.append(i) |
---|
| 125 | |
---|
| 126 | s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)] |
---|
| 127 | s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)] |
---|
| 128 | return s1, s2 |
---|
| 129 | |
---|
| 130 | |
---|
| 131 | def getSignatures(self,array,steps_all,step_all): |
---|
| 132 | """Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int). |
---|
| 133 | |
---|
| 134 | Args: |
---|
| 135 | array (np.array(np.array(,dtype=float))): array with voxels representing model |
---|
| 136 | steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z |
---|
| 137 | step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis] |
---|
| 138 | |
---|
| 139 | Returns (distribution): |
---|
| 140 | signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights] |
---|
| 141 | Returns (frequency): |
---|
| 142 | signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients |
---|
| 143 | """ |
---|
| 144 | x_steps,y_steps,z_steps = steps_all |
---|
| 145 | x_step,y_step,z_step=step_all |
---|
| 146 | feature_array = [] |
---|
| 147 | weight_array = [] |
---|
| 148 | step_half_x = x_step/2 |
---|
| 149 | step_half_y = y_step/2 |
---|
| 150 | step_half_z = z_step/2 |
---|
| 151 | for x in range(len(x_steps[:-1])): |
---|
| 152 | for y in range(len(y_steps[:-1])) : |
---|
| 153 | for z in range(len(z_steps[:-1])): |
---|
| 154 | rows=np.where((array[:,0]> x_steps[x]) & |
---|
| 155 | (array[:,0]<= x_steps[x+1]) & |
---|
| 156 | (array[:,1]> y_steps[y]) & |
---|
| 157 | (array[:,1]<= y_steps[y+1]) & |
---|
| 158 | (array[:,2]> z_steps[z]) & |
---|
| 159 | (array[:,2]<= z_steps[z+1])) |
---|
| 160 | if self.frequency: |
---|
| 161 | feature_array.append(len(array[rows])) |
---|
| 162 | else: |
---|
| 163 | weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z]) |
---|
| 164 | feature_array.append(point) |
---|
| 165 | weight_array.append(weight) |
---|
| 166 | |
---|
| 167 | if self.frequency: |
---|
| 168 | samples = np.array(feature_array,dtype=np.float64) |
---|
| 169 | return abs(np.fft.fft(samples)) |
---|
| 170 | else: |
---|
| 171 | return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)] |
---|
| 172 | |
---|
| 173 | |
---|
| 174 | def getSignaturesForPair(self,array1,array2): |
---|
| 175 | """generates signatures for given pair of models represented by array of voxels. |
---|
| 176 | We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps. |
---|
| 177 | This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight |
---|
| 178 | which equals to the number of points. |
---|
| 179 | |
---|
| 180 | Args: |
---|
| 181 | array1 (np.array(np.array(,dtype=float))): array with voxels representing model1 |
---|
| 182 | array2 (np.array(np.array(,dtype=float))): array with voxels representing model2 |
---|
| 183 | steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3). |
---|
| 184 | |
---|
| 185 | Returns: |
---|
| 186 | s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
| 187 | s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] |
---|
| 188 | """ |
---|
| 189 | |
---|
[1210] | 190 | min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])]) |
---|
| 191 | max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])]) |
---|
| 192 | min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])]) |
---|
| 193 | max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])]) |
---|
| 194 | min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])]) |
---|
| 195 | max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])]) |
---|
[1208] | 196 | |
---|
| 197 | x_steps,x_step = np.linspace(min_x,max_x,self.steps,retstep=True) |
---|
| 198 | y_steps,y_step = np.linspace(min_y,max_y,self.steps,retstep=True) |
---|
| 199 | z_steps,z_step = np.linspace(min_z,max_z,self.steps,retstep=True) |
---|
| 200 | |
---|
[1210] | 201 | for intervals in (x_steps, y_steps, z_steps): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures()) |
---|
| 202 | intervals[0] -= self.EPSILON |
---|
[1208] | 203 | |
---|
| 204 | steps_all = (x_steps,y_steps,z_steps) |
---|
| 205 | step_all = (x_step,y_step,z_step) |
---|
| 206 | |
---|
| 207 | s1 = self.getSignatures(array1,steps_all,step_all) |
---|
| 208 | s2 = self.getSignatures(array2,steps_all,step_all) |
---|
| 209 | |
---|
| 210 | return s1,s2 |
---|
| 211 | |
---|
| 212 | |
---|
| 213 | def getVoxels(self,geno): |
---|
| 214 | """ Generates voxels for genotype using frams.ModelGeometry |
---|
| 215 | |
---|
| 216 | Args: |
---|
| 217 | geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
| 218 | |
---|
| 219 | Returns: |
---|
| 220 | np.array([np.array(,dtype=float)]: list of voxels representing model. |
---|
| 221 | """ |
---|
| 222 | model = self.frams_lib.Model.newFromString(geno) |
---|
| 223 | align(model, self.fixedZaxis) |
---|
| 224 | model_geometry = self.frams_lib.ModelGeometry.forModel(model) |
---|
| 225 | |
---|
| 226 | model_geometry.geom_density = self.density |
---|
| 227 | voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()]) |
---|
| 228 | return voxels |
---|
| 229 | |
---|
| 230 | |
---|
| 231 | def calculateDissimforVoxels(self, voxels1, voxels2): |
---|
| 232 | """ Calculate EMD for pair of voxels representing models. |
---|
| 233 | Args: |
---|
| 234 | voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1. |
---|
| 235 | voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2. |
---|
| 236 | steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3). |
---|
| 237 | |
---|
| 238 | Returns: |
---|
| 239 | float: dissim for pair of list of voxels |
---|
| 240 | """ |
---|
| 241 | numvox1 = len(voxels1) |
---|
| 242 | numvox2 = len(voxels2) |
---|
| 243 | |
---|
| 244 | s1, s2 = self.getSignaturesForPair(voxels1, voxels2) |
---|
| 245 | |
---|
| 246 | if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]): |
---|
| 247 | print("Bad signature!") |
---|
| 248 | print("Base voxels fig1: ", numvox1, " fig2: ", numvox2) |
---|
| 249 | print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1])) |
---|
| 250 | raise ValueError("BAd signature!") |
---|
| 251 | |
---|
| 252 | reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens |
---|
| 253 | if self.reduce: |
---|
| 254 | s1, s2 = reduce_fun(s1,s2) |
---|
| 255 | |
---|
| 256 | if not self.frequency: |
---|
| 257 | if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]): |
---|
| 258 | print("Voxel reduction didnt work properly") |
---|
| 259 | print("Base voxels fig1: ", numvox1, " fig2: ", numvox2) |
---|
| 260 | print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1])) |
---|
| 261 | |
---|
| 262 | if self.metric == 'l1': |
---|
| 263 | if self.frequency: |
---|
| 264 | out = np.linalg.norm((s1-s2), ord=1) |
---|
| 265 | else: |
---|
| 266 | out = np.linalg.norm((s1[1]-s2[1]), ord=1) |
---|
| 267 | |
---|
| 268 | elif self.metric == 'l2': |
---|
| 269 | if self.frequency: |
---|
| 270 | out = np.linalg.norm((s1-s2)) |
---|
| 271 | else: |
---|
| 272 | out = np.linalg.norm((s1[1]-s2[1])) |
---|
| 273 | |
---|
| 274 | elif self.metric == 'emd': |
---|
| 275 | if self.frequency: |
---|
| 276 | num_points = len(s1) |
---|
| 277 | dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points)) |
---|
| 278 | else: |
---|
| 279 | dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0]) |
---|
| 280 | |
---|
| 281 | self.libm.fedisableexcept(0x04) # allowing for operation divide by 0 because pyemd requiers it. |
---|
| 282 | |
---|
| 283 | if self.frequency: |
---|
| 284 | out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64)) |
---|
| 285 | else: |
---|
| 286 | out = emd(s1[1],s2[1],dist_matrix) |
---|
| 287 | |
---|
| 288 | self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it. |
---|
| 289 | self.libm.feenableexcept(0x04) |
---|
| 290 | |
---|
| 291 | else: |
---|
| 292 | raise ValueError("Wrong metric '%s'"%self.metric) |
---|
| 293 | |
---|
| 294 | return out |
---|
| 295 | |
---|
| 296 | |
---|
| 297 | def calculateDissimforGeno(self, geno1, geno2): |
---|
| 298 | """ Calculate EMD for pair of genos. |
---|
| 299 | Args: |
---|
| 300 | geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
| 301 | geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
| 302 | steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3). |
---|
| 303 | |
---|
| 304 | Returns: |
---|
| 305 | float: dissim for pair of strings representing models. |
---|
| 306 | """ |
---|
| 307 | |
---|
| 308 | voxels1 = self.getVoxels(geno1) |
---|
| 309 | voxels2 = self.getVoxels(geno2) |
---|
| 310 | |
---|
| 311 | out = self.calculateDissimforVoxels(voxels1, voxels2) |
---|
| 312 | |
---|
| 313 | if self.verbose == True: |
---|
| 314 | print("Steps: ", self.steps) |
---|
| 315 | print("Geno1:\n",geno1) |
---|
| 316 | print("Geno2:\n",geno2) |
---|
| 317 | print("EMD:\n",out) |
---|
| 318 | |
---|
| 319 | return out |
---|
| 320 | |
---|
| 321 | |
---|
| 322 | def getDissimilarityMatrix(self,listOfGeno): |
---|
| 323 | """ |
---|
| 324 | |
---|
| 325 | Args: |
---|
| 326 | listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html |
---|
| 327 | |
---|
| 328 | Returns: |
---|
| 329 | np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes |
---|
| 330 | """ |
---|
| 331 | numOfGeno = len(listOfGeno) |
---|
| 332 | dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno]) |
---|
| 333 | listOfVoxels = [self.getVoxels(g) for g in listOfGeno] |
---|
| 334 | for i in range(numOfGeno): |
---|
| 335 | for j in range(numOfGeno): |
---|
| 336 | dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j]) |
---|
| 337 | return dissimMatrix |
---|