1、层次聚类方法,相较于划分聚类方法,其优点是什么?
2、理解并掌握层次聚类方法结果的解读策略。
3、了解sklearn.cluster.AgglomerativeClustering方法的使用及编程方法。
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html#sklearn.cluster.AgglomerativeClustering
一、层次聚类方法的调用
# sklearn库
from sklearn.cluster import AgglomerativeClustering
"""
linkage取值:
single:最小距离
complete:最大距离
ward:最小方差
average:平均距离
"""
# 创建模型(定义层次聚类)
model=AgglomerativeClustering(n_clusters=聚类中心(质心)个数,affinity=距离计算公式,linkage=距离计算方法)
# 训练模型
# 输入:一个二维数组表示的样本矩阵
# 输出:每个样本最终的结果
model.fit(输入)
print("每个样本所属的簇(类别):",model.labels_)
二、应用案例
from sklearn import datasets, cluster
# iris数据集
X = datasets.load_iris().data[:10]
clust = cluster.AgglomerativeClustering(n_clusters = 3, linkage='ward')
labels = clust.fit_predict(X)
from scipy.cluster.hierarchy import dendrogram, ward, single
from sklearn import datasets
import matlplotlib.pyplot as plt
# iris数据集
X = datasets.load_iris().data[:10]
linkage_matrix = ward(X)
dendogram(linkage_matrix)
plt.show
层次聚类的三种参数
from sklearn import datasets
iris = datasets.load_iris()
from sklearn.cluster import AgglomerativeClustering
ward = AgglomerativeClustering(n_clusters=3)
ward_pred = ward.fit_predict(iris.data)
complete = AgglomerativeClustering(n_clusters=3, linkage="complete")
complete_pred = complete.fit_predict(iris.data)
avg = AgglomerativeClustering(n_clusters=3, linkage="average")
avg_pred = avg.fit_predict(iris.data)
from sklearn.metrics import adjusted_rand_score
ward_ar_score = adjusted_rand_score(iris.target, ward_pred)
complete_ar_score = adjusted_rand_score(iris.target, complete_pred)
avg_ar_score = adjusted_rand_score(iris.target, avg_pred)
Scores:
Ward: 0.731198556771
Complete: 0.642251251836
Average: 0.759198707107
# 标准化数据
from sklearn import preprocessing
normalized_X = preprocessing.normalize(iris.data)
ward = AgglomerativeClustering(n_clusters=3)
ward_pred = ward.fit_predict(normalized_X)
complete = AgglomerativeClustering(n_clusters=3, linkage="complete")
complete_pred = complete.fit_predict(normalized_X)
avg = AgglomerativeClustering(n_clusters=3, linkage="average")
avg_pred = avg.fit_predict(normalized_X)
ward_ar_score = adjusted_rand_score(iris.target, ward_pred)
complete_ar_score = adjusted_rand_score(iris.target, complete_pred)
avg_ar_score = adjusted_rand_score(iris.target, avg_pred)
Scores:
Ward: 0.885697031028
Complete: 0.644447235392
Average: 0.558371443754
转载自:2 聚类 - 层次聚类 - 简书 (jianshu.com)
https://www.jianshu.com/p/6b61e09f0bcc?utm_campaign=maleskine&utm_content=note&utm_medium=seo_notes&utm_source=recommendation