import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split #数据切分
from sklearn.metrics import classification_report,confusion_matrix #查准率、查全率、混淆矩阵
import random
#载入数据
iris = datasets.load_iris()
x_train,x_test,y_train,y_test = train_test_split(iris,iris.target,test_size = 0.2)
#由于鸢尾花数据都是按类别排列好了的,现在需要打乱数据
#相当于x_train,x_test,y_train,y_test = train_test_split(iris,iris.target,test_size = 0.2) 实现的功能
data_size = iris.data.shape[0]
index = [i for i in np.arange(data_size)]
random.shuffle(index)
iris.data = iris.data[index]
iris.target = iris.target[index]
#切分数据集
test_size = int(data_size * 0.2)
x_train = iris.data[test_size:]
x_test = iris.data[:test_size]
y_train = iris.target[test_size:]
y_test = iris.target[:test_size]
prediction = []
#调用knn(x_test,x_data,y_data,k)函数
for i in range(x_test.shape[0]):
prediction.append(knn(x_test[i],x_train,y_train,5))
print(classification_report(y_test,prediction))
print(confusion_matrix(y_test,prediction))