import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data/kmeans_01.csv')
X = df.values
print(X.shape)
plt.scatter(X[:,0], X[:,1], edgecolors='k')
plt.show()
from sklearn.cluster import KMeans
kmeans_3 = KMeans(n_clusters=3)
kmeans_3.fit(X)
print(kmeans_3.labels_)
plt.scatter(X[:,0], X[:,1], edgecolors='k', c=kmeans_3.labels_)
plt.show()
print(kmeans_3.cluster_centers_)
pred = kmeans_3.predict([[40,35]])
print(pred)
kmeans_3 = KMeans(n_clusters=5)
kmeans_3.fit(X)
print(kmeans_3.labels_)
plt.scatter(X[:,0], X[:,1],edgecolors='k', c=kmeans_3.labels_)
plt.show()
df = pd.read_csv('data/kmeans_02.csv')
pts = df.values
print(pts.shape)
plt.scatter(pts[:,0], pts[:,1], edgecolors='k')
plt.show()
The cell below loads the following functions:
distance(P,Q)
assign(pts, ctrs)
newCenters(pts, clAssign, K)
icDist(pts, ctrs, clAssign)
kMeans(pts, K)
%run -i "Snippets/snippet20.py"
The next cell initializes a few variables used by in this example.
%run -i "Snippets/snippet21.py"
Repeatedly executing the cell below will illustrated the KMeans algorithm step-by-step.
%run -i "Snippets/snippet22.py"
pts = X
plt.scatter(X[:,0], X[:,1], edgecolors='k')
plt.show()
K = 3
sel = np.random.choice(range(0,X.shape[0]), K, replace=False)
centers = X[sel,:]
plt.scatter(X[:,0], X[:,1], edgecolors='k')
plt.scatter(centers[:,0], centers[:,1], edgecolors='k', c='r')
plt.show()
clusters = assign(X, centers)
plt.scatter(X[:,0], X[:,1], edgecolors='k', c=clusters)
plt.scatter(centers[:,0], centers[:,1], edgecolors='k', c='r')
plt.show()
curDist = icDist(X, centers, clusters)
print("New Distance is " + str(curDist))
centers = newCenters(pts, clusters, K)
plt.scatter(X[:,0], X[:,1], edgecolors='k', c=clusters)
plt.scatter(centers[:,0], centers[:,1], edgecolors='k', c='r')
plt.show()
clusters = assign(X, centers)
curDist = icDist(X, centers, clusters)
print("New Distance is " + str(curDist))
centers = newCenters(pts, clusters, K)
plt.scatter(X[:,0], X[:,1], edgecolors='k', c=clusters)
plt.scatter(centers[:,0], centers[:,1], edgecolors='k', c='r')
plt.show()