■高斯混合模型(GMM):理念、数学、EM算法和python实现( 二 )
本文插图
将此代码放在for循环中 , 并将其放在类对象中 。
class GMM1D: ''''''Apply GMM to 1D Data'''''' def __init__(self, X, max_iterations): ''''''Initialize data and max_iterations'''''' self.X = X self.max_iterations = max_iterations def run(self): ''''''Initialize parameters mu, var, pi'''''' self.pi = np.array([1/3, 1/3, 1/3]) self.mu = np.array([5,8,1]) self.var = np.array([5,3,1]) r = np.zeros((len(self.X), 3)) for itr in range(self.max_iterations): gauss1 = norm(loc=self.mu[0], scale=self.var[0]) gauss2 = norm(loc=self.mu[1], scale=self.var[1]) gauss3 = norm(loc=self.mu[2], scale=self.var[2]) # E-Step for c,g,p in zip(range(3), [gauss1, gauss2, gauss3], self.pi): r[:,c] = p*g.pdf(xt[:]) for i in range(len(r)): r[i,:] /= np.sum(r[i,:]) fig = plt.figure(figsize=(10,10)) ax0 = fig.add_subplot(111) for i in range(len(r)): ax0.scatter(xt[i],0,c=r[i,:],s=100) for g,c in zip([gauss1.pdf(np.linspace(-15,15)),gauss2.pdf(np.linspace(-15,15)),gauss3.pdf(np.linspace(-15,15))],['r','g','b']): ax0.plot(np.linspace(-15,15),g,c=c,zorder=0) plt.show() # M-Step mc = np.sum(r, axis=0) self.pi = mc/len(self.X) self.mu = np.sum(r*np.vstack((self.X, self.X, self.X)).T, axis=0)/mc self.var = [] for c in range(len(self.pi)): self.var.append(np.sum(np.dot(r[:,c]*(self.X[i] - self.mu[c]).T, r[:,c]*(self.X[i] - self.mu[c])))/mc[c]) gmm = GMM1D(xt, 10) gmm.run()
本文插图
我们已经建立并运行了一个一维数据模型 。 同样的原理也适用于更高维度(≥2D) 。 唯一的区别是我们将使用多元高斯分布 。 让我们为2D模型编写Python代码 。
让我们生成一些数据并编写我们的模型
from sklearn.datasets.samples_generator import make_blobs from scipy.stats import multivariate_normal X,Y = make_blobs(cluster_std=1.5,random_state=20,n_samples=500,centers=3) X = np.dot(X, np.random.RandomState(0).randn(2,2)) plt.figure(figsize=(8,8)) plt.scatter(X[:, 0], X[:, 1]) plt.show()
【■高斯混合模型(GMM):理念、数学、EM算法和python实现】
本文插图
class GMM2D: ''''''Apply GMM to 2D data'''''' def __init__(self, num_clusters, max_iterations): ''''''Initialize num_clusters(K) and max_iterations for the model'''''' self.num_clusters = num_clusters self.max_iterations = max_iterations def run(self, X): ''''''Initialize parameters and run E and M step storing log-likelihood value after every iteration'''''' self.pi = np.ones(self.num_clusters)/self.num_clusters self.mu = np.random.randint(min(X[:, 0]), max(X[:, 0]), size=(self.num_clusters, len(X[0]))) self.cov = np.zeros((self.num_clusters, len(X[0]), len(X[0]))) for n in range(len(self.cov)): np.fill_diagonal(self.cov[n], 5) # reg_cov is used for numerical stability i.e. to check singularity issues in covariance matrix self.reg_cov = 1e-6*np.identity(len(X[0])) x,y = np.meshgrid(np.sort(X[:,0]), np.sort(X[:,1])) self.XY = np.array([x.flatten(), y.flatten()]).T # Plot the data and the initial model fig0 = plt.figure(figsize=(10,10)) ax0 = fig0.add_subplot(111) ax0.scatter(X[:, 0], X[:, 1]) ax0.set_title(''Initial State'') for m, c in zip(self.mu, self.cov): c += self.reg_cov multi_normal = multivariate_normal(mean=m, cov=c) ax0.contour(np.sort(X[:, 0]), np.sort(X[:, 1]), multi_normal.pdf(self.XY).reshape(len(X), len(X)), colors = 'black', alpha = 0.3) ax0.scatter(m[0], m[1], c='grey', zorder=10, s=100) fig0.savefig('GMM2D Initial State.png') plt.show() self.log_likelihoods = [] for iters in range(self.max_iterations): # E-Step self.ric = np.zeros((len(X), len(self.mu))) for pic, muc, covc, r in zip(self.pi, self.mu, self.cov, range(len(self.ric[0]))): covc += self.reg_cov mn = multivariate_normal(mean=muc, cov=covc) self.ric[:, r] = pic*mn.pdf(X) for r in range(len(self.ric)): self.ric[r, :] = self.ric[r, :] / np.sum(self.ric[r, :]) # M-step self.mc = np.sum(self.ric, axis=0) self.pi = self.mc/np.sum(self.mc) self.mu = np.dot(self.ric.T, X) / self.mc.reshape(self.num_clusters,1) self.cov = [] for r in range(len(self.pi)): covc = 1/self.mc[r] * (np.dot( (self.ric[:, r].reshape(len(X), 1)*(X-self.mu[r]) ).T, X - self.mu[r]) + self.reg_cov) self.cov.append(covc) self.cov = np.asarray(self.cov) self.log_likelihoods.append(np.log(np.sum([self.pi[r]*multivariate_normal(self.mu[r], self.cov[r] + self.reg_cov).pdf(X) for r in range(len(self.pi))]))) fig1 = plt.figure(figsize=(10,10)) ax1 = fig1.add_subplot(111) ax1.scatter(X[:, 0], X[:, 1]) ax1.set_title(''Iteration '' + str(iters)) for m, c in zip(self.mu, self.cov): c += self.reg_cov multi_normal = multivariate_normal(mean=m, cov=c) ax1.contour(np.sort(X[:, 0]), np.sort(X[:, 1]), multi_normal.pdf(self.XY).reshape(len(X), len(X)), colors = 'black', alpha = 0.3) ax1.scatter(m[0], m[1], c='grey', zorder=10, s=100) fig1.savefig(''GMM2D Iter '' + str(iters) + ''.png'') plt.show() fig2 = plt.figure(figsize=(10,10)) ax2 = fig2.add_subplot(111) ax2.plot(range(0, iters+1, 1), self.log_likelihoods) ax2.set_title('Log Likelihood Values') fig2.savefig('GMM2D Log Likelihood.png') plt.show() def predict(self, Y): ''''''Predicting cluster for new samples in array Y'''''' predictions = [] for pic, m, c in zip(self.pi, self.mu, self.cov): prob = pic*multivariate_normal(mean=m, cov=c).pdf(Y) predictions.append([prob]) predictions = np.asarray(predictions).reshape(len(Y), self.num_clusters) predictions = np.argmax(predictions, axis=1) fig2 = plt.figure(figsize=(10,10)) ax2 = fig2.add_subplot(111) ax2.scatter(X[:, 0], X[:, 1], c='c') ax2.scatter(Y[:, 0], Y[:, 1], marker='*', c='k', s=150, label = 'New Data') ax2.set_title(''Predictions on New Data'') colors = ['r', 'b', 'g'] for m, c, col, i in zip(self.mu, self.cov, colors, range(len(colors))): #c += reg_cov multi_normal = multivariate_normal(mean=m, cov=c) ax2.contour(np.sort(X[:, 0]), np.sort(X[:, 1]), multi_normal.pdf(self.XY).reshape(len(X), len(X)), colors = 'black', alpha = 0.3) ax2.scatter(m[0], m[1], marker='o', c=col, zorder=10, s=150, label = 'Centroid ' + str(i+1)) for i in range(len(Y)): ax2.scatter(Y[i, 0], Y[i, 1], marker='*', c=colors[predictions[i]], s=150) ax2.set_xlabel('X-axis') ax2.set_ylabel('Y-axis') ax2.legend() fig2.savefig('GMM2D Predictions.png') plt.show() return predictions让我们对此模型进行一些预测
推荐阅读
- 初中数学@初中数学三角形倒角模型专题“8字模型”(含经典练习附答案)
- 体坛夏洛克称双方有代沟,没比赛“嘴仗”却不断!纳达尔回绝克耶高斯聊天邀请
- 智东西OpenAI追踪AI模型效率:每16个月翻一番!超越摩尔定律
- 科学家@人类寿命的极限是多少?科学家建立模型,这个数字你能接受吗?
- 量子位AI就能让它动起来!再也不怕3D动画拖更了,只要做出角色3D模型
- 购车小助理将搭载e-Power混合动力系统,日产发布新劲客预告图
- #王者荣耀#王者荣耀:马可波罗“高达”皮肤火了!模型霸气,有点像千手观音
- 大哥说事我们是认真的!导弹是模型,官方:涉及机密不能透露,搞笑
- 混合动力:混动车电池坏了,可以当燃油车来开吗?比亚迪和丰田的差距有点大
- 快科技Intel酷睿i5-L15G7处理器现身:10nm 3D封装、5核x86混合架构
