load fisheriris;
disp('First few rows of the dataset:');
disp(meas(1:5,:));
meas_scaled = zscore(meas);
disp('Scaled Data:');
disp(meas_scaled(1:5,:));
k = 3;
[idx_kmeans, C] = kmeans(meas_scaled,k);
figure;
gscatter(meas_scaled(:,1), meas_scaled(:,2), idx_kmeans, 'rgb', 'xo*');
hold on;
plot(C(:,1), C(:,2), 'kx', 'MarkerSize', 15, 'LineWidth', 3);
title('K-Means Clustering (Scaled Data)');
xlabel('Feature 1 (Standardized)');
ylabel('Feature 2 (Standardized)');
legend('Cluster 1', 'Cluster 2', 'Cluster 3', 'Centroids');
hold off;
saveas(gcf, 'KMeans_Clustering_Figure.png');
Z = linkage(meas_scaled, 'ward');
figure;
dendrogram(Z);
title('Hierarchical Clustering Dendrogram');
xlabel('Sample Index');
ylabel('Distance');
saveas(gcf, 'Hierarchical_Clustering_Figure.png');
epsilon = 0.5;
minPts = 5;
[idx_dbscan, corepts] = dbscan(meas_scaled, epsilon, minPts);
figure;
gscatter(meas_scaled(:,1), meas_scaled(:,2), idx_dbscan, 'rgb', 'xo*');
title('DBSCAN Clustering');
xlabel('Feature 1 (Standardized)');
ylabel('Feature 2 (Standardized)');
saveas(gcf, 'DBSCAN_Clustering_Figure.png');
silhouette_score_kmeans = silhouette(meas_scaled, idx_kmeans);
fprintf('Average Silhouette Score for K-Means: %.2f\n', mean(silhouette_score_kmeans));
silhouette_score_dbscan = silhouette(meas_scaled, idx_dbscan);
fprintf('Average Silhouette Score for DBSCAN: %.2f\n', mean(silhouette_score_dbscan));
db_index_kmeans = daviesbouldin(meas_scaled, idx_kmeans);
fprintf('Davies-Bouldin Index for K-Means: %.2f\n', db_index_kmeans);
db_index_dbscan = daviesbouldin(meas_scaled, idx_dbscan);
fprintf('Davies-Bouldin Index for DBSCAN: %.2f\n', db_index_dbscan);
true_labels = grp2idx(species);
ari_kmeans = rand_index(true_labels, idx_kmeans);
fprintf('Adjusted Rand Index for K-Means: %.2f\n', ari_kmeans);
ari_dbscan = rand_index(true_labels, idx_dbscan);
fprintf('Adjusted Rand Index for DBSCAN: %.2f\n', ari_dbscan);
figure;
subplot(2,2,1);
gscatter(meas_scaled(:,1), meas_scaled(:,2), idx_kmeans, 'rgb', 'xo*');
title('K-Means Clustering');
subplot(2,2,2);
gscatter(meas_scaled(:,1), meas_scaled(:,2), clusterdata(meas_scaled, 'linkage', 'ward', 'maxclust', 3), 'rgb', 'xo*');
title('Hierarchical Clustering');
subplot(2,2,3);
gscatter(meas_scaled(:,1), meas_scaled(:,2), idx_dbscan, 'rgb', 'xo*');
title('DBSCAN Clustering');
saveas(gcf, 'Clustering_Comparison_Figure.png');
writetable(table(meas_scaled, idx_kmeans), 'KMeans_Clustering_Results.csv');
writetable(table(meas_scaled, idx_dbscan), 'DBSCAN_Clustering_Results.csv');
function RI = rand_index(trueLabels, predictedLabels)
trueLabels = trueLabels(:);
predictedLabels = predictedLabels(:);
n = length(trueLabels);
A = 0;
B = 0;
for i = 1:n-1
for j = i+1:n
if (trueLabels(i) == trueLabels(j)) && (predictedLabels(i) == predictedLabels(j))
A = A + 1;
elseif (trueLabels(i) ~= trueLabels(j)) && (predictedLabels(i) ~= predictedLabels(j))
B = B + 1;
end
end
end
totalPairs = n * (n - 1) / 2;
RI = (A + B) / totalPairs;
end
function db_index = daviesbouldin(X, idx)
numClusters = length(unique(idx));
DB = 0;
for i = 1:numClusters
cluster_i = X(idx == i, :);
centroid_i = mean(cluster_i, 1);
Si = mean(pdist2(cluster_i, centroid_i));
max_similarity = 0;
for j = 1:numClusters
if i ~= j
cluster_j = X(idx == j, :);
centroid_j = mean(cluster_j, 1);
Sj = mean(pdist2(cluster_j, centroid_j));
Dij = pdist2(centroid_i, centroid_j);
similarity = (Si + Sj) / Dij;
max_similarity = max(max_similarity, similarity);
end
end
DB = DB + max_similarity;
end
db_index = DB / numClusters;
end
First few rows of the dataset:
5.1000 3.5000 1.4000 0.2000
4.9000 3.0000 1.4000 0.2000
4.7000 3.2000 1.3000 0.2000
4.6000 3.1000 1.5000 0.2000
5.0000 3.6000 1.4000 0.2000
Scaled Data:
-0.8977 1.0156 -1.3358 -1.3111
-1.1392 -0.1315 -1.3358 -1.3111
-1.3807 0.3273 -1.3924 -1.3111
-1.5015 0.0979 -1.2791 -1.3111
-1.0184 1.2450 -1.3358 -1.3111
Average Silhouette Score for K-Means: 0.65
Average Silhouette Score for DBSCAN: 0.47
Davies-Bouldin Index for K-Means: 0.83
Davies-Bouldin Index for DBSCAN: 0.33
Adjusted Rand Index for K-Means: 0.82
Adjusted Rand Index for DBSCAN: 0.75