Changeset 598 for mds-and-trees/mds_plot.py
- Timestamp:
- 08/25/16 17:44:00 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
mds-and-trees/mds_plot.py
r597 r598 16 16 17 17 18 18 #http://www.nervouscomputer.com/hfs/cmdscale-in-python/ 19 def cmdscale(D): 20 """ 21 Classical multidimensional scaling (MDS) 22 23 Parameters 24 ---------- 25 D : (n, n) array 26 Symmetric distance matrix. 27 28 Returns 29 ------- 30 Y : (n, p) array 31 Configuration matrix. Each column represents a dimension. Only the 32 p dimensions corresponding to positive eigenvalues of B are returned. 33 Note that each dimension is only determined up to an overall sign, 34 corresponding to a reflection. 35 36 e : (n,) array 37 Eigenvalues of B. 38 39 """ 40 # Number of points 41 n = len(D) 42 43 # Centering matrix 44 H = np.eye(n) - np.ones((n, n))/n 45 46 # YY^T 47 B = -H.dot(D**2).dot(H)/2 48 49 # Diagonalize 50 evals, evecs = np.linalg.eigh(B) 51 52 # Sort by eigenvalue in descending order 53 idx = np.argsort(evals)[::-1] 54 evals = evals[idx] 55 evecs = evecs[:,idx] 56 57 # Compute the coordinates using positive-eigenvalued components only 58 w, = np.where(evals > 0) 59 L = np.diag(np.sqrt(evals[w])) 60 V = evecs[:,w] 61 Y = V.dot(L) 62 63 return Y, evals 19 64 20 65 def rand_jitter(arr): … … 31 76 32 77 def compute_mds(distance_matrix, dim): 33 seed = np.random.RandomState(seed=3) 34 mds = manifold.MDS(n_components=int(dim), metric=True, max_iter=3000, eps=1e-9, random_state=seed, dissimilarity="precomputed") 35 embed = mds.fit(distance_matrix).embedding_ 78 embed, evals = cmdscale(distance_matrix) 79 variances = compute_variances(embed) 80 embed = np.asarray([embed[:,i] for i in range(dim)]).T 81 82 percent_variances = [sum(variances[:i+1])/sum(variances) for i in range(dim)] 83 for i,pv in enumerate(percent_variances): 84 print(i+1,"dimension:",pv) 85 36 86 return embed 37 87 … … 41 91 for i in range(len(embed[0])): 42 92 variances.append(np.var(embed[:,i])) 43 percent_variances = [sum(variances[:i+1])/sum(variances) for i in range(len(variances))] 44 return percent_variances 93 return variances 45 94 46 95 … … 69 118 else: 70 119 plt.savefig(outname+".pdf") 120 np.savetxt(outname+".csv", coordinates, delimiter=";") 71 121 72 122 73 123 def main(filename, dimensions=3, outname="", jitter=0, separator='\t'): 124 dimensions = int(dimensions) 74 125 distances = read_file(filename, separator) 75 126 embed = compute_mds(distances, dimensions) 76 127 77 variances_perc = compute_variances(embed)78 for i,vc in enumerate(variances_perc):79 print(i+1,"dimension:",vc)80 81 dimensions = int(dimensions)82 128 if dimensions == 1: 83 129 embed = np.array([np.insert(e, 0, 0, axis=0) for e in embed])
Note: See TracChangeset
for help on using the changeset viewer.