77 lines
2.3 KiB
Python
77 lines
2.3 KiB
Python
|
# 导入必要的库
|
||
|
import numpy as np
|
||
|
from sklearn.datasets import load_iris
|
||
|
from sklearn.cluster import KMeans
|
||
|
from sklearn_extra.cluster import KMedoids
|
||
|
from sklearn.metrics import silhouette_score
|
||
|
import matplotlib.pyplot as plt
|
||
|
import matplotlib
|
||
|
import os
|
||
|
import psutil
|
||
|
|
||
|
# 设置环境变量以消除Joblib警告
|
||
|
os.environ['LOKY_MAX_CPU_COUNT'] = str(psutil.cpu_count(logical=False))
|
||
|
|
||
|
# 设置Matplotlib支持中文的字体
|
||
|
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体
|
||
|
matplotlib.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
|
||
|
|
||
|
# 加载Iris数据集
|
||
|
iris = load_iris()
|
||
|
X = iris.data # 特征数据
|
||
|
y = iris.target # 目标标签
|
||
|
|
||
|
# K均值聚类算法
|
||
|
def kmeans_clustering(X, n_clusters=3):
|
||
|
"""
|
||
|
使用K均值算法对数据进行聚类
|
||
|
:param X: 输入数据
|
||
|
:param n_clusters: 聚类数量
|
||
|
:return: 聚类标签
|
||
|
"""
|
||
|
kmeans = KMeans(n_clusters=n_clusters, random_state=42) # 初始化KMeans对象
|
||
|
kmeans.fit(X) # 拟合数据
|
||
|
labels = kmeans.labels_ # 获取聚类标签
|
||
|
return labels
|
||
|
|
||
|
# K中心点聚类算法
|
||
|
def kmedoids_clustering(X, n_clusters=3):
|
||
|
"""
|
||
|
使用K中心点算法对数据进行聚类
|
||
|
:param X: 输入数据
|
||
|
:param n_clusters: 聚类数量
|
||
|
:return: 聚类标签
|
||
|
"""
|
||
|
kmedoids = KMedoids(n_clusters=n_clusters, random_state=42) # 初始化KMedoids对象
|
||
|
kmedoids.fit(X) # 拟合数据
|
||
|
labels = kmedoids.labels_ # 获取聚类标签
|
||
|
return labels
|
||
|
|
||
|
# 分析聚类结果
|
||
|
def analyze_clustering(X, labels, algorithm_name):
|
||
|
"""
|
||
|
分析聚类结果并可视化
|
||
|
:param X: 输入数据
|
||
|
:param labels: 聚类标签
|
||
|
:param algorithm_name: 算法名称
|
||
|
"""
|
||
|
silhouette_avg = silhouette_score(X, labels) # 计算轮廓系数
|
||
|
print(f"{algorithm_name} 轮廓系数: {silhouette_avg}")
|
||
|
|
||
|
# 可视化聚类结果
|
||
|
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
|
||
|
plt.title(f"{algorithm_name} 聚类结果")
|
||
|
plt.show()
|
||
|
|
||
|
# 主函数
|
||
|
def main():
|
||
|
# 使用K均值算法进行聚类
|
||
|
kmeans_labels = kmeans_clustering(X)
|
||
|
analyze_clustering(X, kmeans_labels, "K均值")
|
||
|
|
||
|
# 使用K中心点算法进行聚类
|
||
|
kmedoids_labels = kmedoids_clustering(X)
|
||
|
analyze_clustering(X, kmedoids_labels, "K中心点")
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|