Commit eb44de35 authored by Céline Meillier's avatar Céline Meillier

ajout de la fonction illustrationKmeans necessaire dans le cours sur la...

ajout de la fonction illustrationKmeans necessaire dans le cours sur la classification pour visualiser etape par etape
parent 41d6995f
from copy import deepcopy
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
def dist(a, b, axe = 1):
return np.linalg.norm(a - b, axis=axe)
def illustrationKmeans(X,k, xlabel = 'x', ylabel = 'y'):
colors = ['r', 'g', 'b', 'y', 'c', 'm']
# X coordinates of random centroids
C_x = np.random.uniform(np.min(X[:,0]), np.max(X[:,0]), size=k)
# Y coordinates of random centroids
C_y = np.random.uniform(np.min(X[:,1]), np.max(X[:,1]), size=k)
C = np.array(list(zip(C_x, C_y)), dtype=np.float32)
# To store the value of centroids when it updates
C_old = np.zeros(C.shape)
# Cluster Lables(0, 1, 2)
clusters = np.zeros(len(X))
# Error func. - Distance between new centroids and old centroids
error = dist(C, C_old, None)
# Loop will run till the error becomes zero
while error != 0:
# Assigning each value to its closest cluster
for i in range(len(X)):
distances = dist(X[i], C)
cluster = np.argmin(distances)
clusters[i] = cluster
# Storing the old centroid values
C_old = deepcopy(C)
# Finding the new centroids by taking the average value
fig, ax = plt.subplots()
for i in range(k):
points = [X[j] for j in range(len(X)) if clusters[j] == i]
C[i] = np.mean(points, axis=0)
points2 = np.array([X[j] for j in range(len(X)) if clusters[j] == i])
ax.scatter(points2[:, 0], points2[:, 1], s=100, c=colors[i])
plt.xlabel(xlabel)
plt.ylabel(ylabel)
error = dist(C, C_old, None)
ax.scatter(C[:, 0], C[:, 1], marker='*', s=200, c='#050505')
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment