分类算法——基于heart数据集实现

embedded/2024/11/24 15:33:55/

1 heart数据集——描述性统计分析

import matplotlib.pyplot as plt
import pandas as pd# Load the dataset
heart = pd.read_csv(r"heart.csv", sep=',')# Check the columns in the DataFrame
print(heart.columns)a=heart.loc[:, 'y'].value_counts()
print(a)
heart.loc[:, 'y'].value_counts().plot(kind='bar')
#设置0和1的标签,0为无心脏病,1为有心脏病
plt.xticks([0, 1], ['No heart disease', 'Yes heart disease'])
#设置横坐标旋转45度
plt.xticks(rotation=0)
# 设置矩形数据标签
for x, y in enumerate(heart.loc[:, 'y'].value_counts()):plt.text(x, y, '%s' % y, ha='center', va='bottom')
#更改颜色
plt.bar([0, 1], heart.loc[:, 'y'].value_counts(), color=['#FF0000', '#00FF00'])#设置标题
plt.title('Heart disease distribution')
plt.show()
Index(['sbp', 'tobacco', 'ldl', 'adiposity', 'age', 'y'], dtype='object')
y
0    302
1    160
Name: count, dtype: int64

在这里插入图片描述

2 Cp交叉验证,选择最优的k值进行判别分析

#Cp交叉验证,选择最优的k值进行判别分析
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifierX = heart.iloc[:, 0:5]
y = heart.loc[:, 'y']
k_range = range(1, 31)
k_scores = []
for k in k_range:knn = KNeighborsClassifier(n_neighbors=k)scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy')k_scores.append(scores.mean())plt.plot(k_range, k_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross-Validated Accuracy')#选择最优的k值
k = k_scores.index(max(k_scores)) + 1
print('Optimal k: %d' % k)
#绘制最优k值在图中的位置
plt.plot(k_range, k_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross-Validated Accuracy')
plt.scatter(k, max(k_scores), color='red')#显示最优k直在图中等于多少
plt.text(k, max(k_scores), '(%d, %.2f)' % (k, max(k_scores)), ha='center', va='bottom')
plt.show()
Optimal k: 22

在这里插入图片描述

KNN分类

#使用最优k值建立KNN进行分类
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)# Initialize and fit the KNN classifier
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)# Predict and print accuracy
y_pred = knn.predict(X_test)
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))#绘制决策区域
from matplotlib.colors import ListedColormap
import numpy as np
from sklearn.decomposition import PCAdef plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):# Reduce dimensionality to 2D using PCApca = PCA(n_components=2)X_pca = pca.fit_transform(X)# setup marker generator and color mapmarkers = ('s', 'x', 'o', '^', 'v')colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')cmap = ListedColormap(colors[:len(np.unique(y))])# plot the decision surfacex1_min, x1_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1x2_min, x2_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution))Z = classifier.predict(pca.inverse_transform(np.array([xx1.ravel(), xx2.ravel()]).T))Z = Z.reshape(xx1.shape)plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)plt.xlim(xx1.min(), xx1.max())plt.ylim(xx2.min(), xx2.max())for idx, cl in enumerate(np.unique(y)):plt.scatter(x=X_pca[y == cl, 0], y=X_pca[y == cl, 1],alpha=0.8, c=[cmap(idx)],marker=markers[idx], label=cl)# highlight test samplesif test_idx:X_test, y_test = X_pca[test_idx, :2], y[test_idx]plt.scatter(X_test[:, 0], X_test[:, 1],alpha=1.0, linewidth=1, marker='o',s=55, label='test set')# Plot decision regions using PCA-transformed features
X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined, y=y_combined, classifier=knn, test_idx=range(len(y_train), len(y_train) + len(y_test)))
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(loc='upper left')
plt.show()
Accuracy: 0.69

在这里插入图片描述

朴素贝叶斯分类

#朴素贝叶斯分类
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from matplotlib.colors import ListedColormap# Load the dataset
heart = pd.read_csv(r"heart.csv", sep=',')# Select features and target
X = heart.iloc[:, 0:5]
y = heart.loc[:, 'y']# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)# Initialize and fit the Gaussian Naive Bayes classifier
gnb = GaussianNB()
gnb.fit(X_train, y_train)# Predict and print accuracy
y_pred = gnb.predict(X_test)
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))# Define the function to plot decision regions
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.decomposition import PCAdef plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):# Reduce dimensionality to 2D using PCApca = PCA(n_components=2)X_pca = pca.fit_transform(X)# setup marker generator and color mapmarkers = ('s', 'x', 'o', '^', 'v')colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')cmap = ListedColormap(colors[:len(np.unique(y))])# plot the decision surfacex1_min, x1_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1x2_min, x2_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution))Z = classifier.predict(pca.inverse_transform(np.array([xx1.ravel(), xx2.ravel()]).T))Z = Z.reshape(xx1.shape)plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)plt.xlim(xx1.min(), xx1.max())plt.ylim(xx2.min(), xx2.max())for idx, cl in enumerate(np.unique(y)):plt.scatter(x=X_pca[y == cl, 0], y=X_pca[y == cl, 1],alpha=0.8, c=[cmap(idx)],marker=markers[idx], label=cl)# # highlight test samples# if test_idx:#     X_test, y_test = X_pca[test_idx, :2], y[test_idx]#     plt.scatter(X_test[:, 0], X_test[:, 1],#                 alpha=1.0, linewidth=1, marker='o',#                 s=55, label='test set')# Plot decision regions using PCA-transformed features
X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined, y=y_combined, classifier=gnb, test_idx=range(len(y_train), len(y_train) + len(y_test)))
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(loc='upper left')
plt.show()
Accuracy: 0.70

在这里插入图片描述

SVM分类

#使用SVM进行分类
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_scorefrom sklearn.svm import SVC# Load the dataset
heart = pd.read_csv(r"heart.csv", sep=',')
# Select features and target
X = heart.iloc[:, 0:5]
y = heart.loc[:, 'y']# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)# Initialize and fit the SVM classifier
svm = SVC(kernel='linear', C=1.0, random_state=0)
svm.fit(X_train, y_train)# Predict and print accuracy
y_pred = svm.predict(X_test)
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
Accuracy: 0.66

# Plot decision regions using PCA-transformed features
X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined, y=y_combined, classifier=svm, test_idx=range(len(y_train), len(y_train) + len(y_test)))
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(loc='upper left')
plt.show()

在这里插入图片描述

随机森林分类

# Import necessary libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import pydotplus
from IPython.display import Image# Load the dataset
heart = pd.read_csv(r"heart.csv", sep=',')# Select features and target
X = heart.iloc[:, 0:5]
y = heart.loc[:, 'y']# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)# Initialize and fit the Decision Tree classifier
tree = DecisionTreeClassifier(max_depth=3, random_state=0)
tree.fit(X_train, y_train)# Predict and print accuracy
y_pred = tree.predict(X_test)
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))# Export the decision tree to a file
export_graphviz(tree, out_file='tree.dot', feature_names=X.columns)# Convert the dot file to a png
graph = pydotplus.graph_from_dot_file('tree.dot')
Image(graph.create_png())# Plot decision regions using PCA-transformed features
X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined, y=y_combined, classifier=tree, test_idx=range(len(y_train), len(y_train) + len(y_test)))
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(loc='upper left')
plt.show()
Accuracy: 0.68

在这里插入图片描述

决策树分类


#绘制出决策树
from sklearn.tree import plot_tree
plt.figure(figsize=(20, 10))
plot_tree(tree, filled=True, feature_names=X.columns, class_names=['0', '1'])
plt.show()

在这里插入图片描述


http://www.ppmy.cn/embedded/140150.html

相关文章

【C++】二叉搜索树详解:插入、删除、查找的最佳实践与优化策略

个人主页: 起名字真南的CSDN博客 个人专栏: 【数据结构初阶】 📘 基础数据结构【C语言】 💻 C语言编程技巧【C】 🚀 进阶C【OJ题解】 📝 题解精讲 目录 📌 前言📌 1 二叉搜索树的概念📌 2 二叉…

禁止Chrome的自动升级

一、需求分析 因为用Chromeselenium做了网页自动化填写任务,如果Google Chrome浏览器自动升级,就会导致chromedriver加载失败,自动化任务失效,因此需要禁止Chrome浏览器的自动升级。 二、当前环境 三、实际配置 运行注册表编辑…

企业OA管理系统:Spring Boot技术实践与案例分析

3系统分析 3.1可行性分析 通过对本企业OA管理系统实行的目的初步调查和分析,提出可行性方案并对其一一进行论证。我们在这里主要从技术可行性、经济可行性、操作可行性等方面进行分析。 3.1.1技术可行性 本企业OA管理系统采用SSM框架,JAVA作为开发语言&a…

web——sqliabs靶场——第十五关——post时间盲注

还是post传参 搞了个高级的脚本,看看 #!/usr/bin/python3 # -*- coding: utf-8 -*-# 修改payload,data # 添加了time.sleep(0.05) # default # 修改时要注意间隔 import requests from optparse import OptionParser import time import threading# 存…

【Spark】【大数据技术基础】课程 实验七 Spark基础编程实验

实验七:Spark初级编程实践 一、实验目的 掌握使用 Spark 访问本地文件和 HDFS 文件的方法 掌握 Spark 应用程序的编写、编译和运行方法 二、实验平台 操作系统:Ubuntu16.04 Spark版本:2.1.0 scala版本:2.11.8 Hadoop版本&…

Python 使用 Token 认证方案连接 Kubernetes (k8s) 的详细过程

在 Kubernetes 中,使用 Token 认证是一种常见的客户端身份验证方式,尤其适用于 ServiceAccount。以下是详细的步骤,包括如何查看 Token、获取 API 服务地址、配置远程连接,以及如何在 Python 中连接 k8s。 1. 获取 Token 首先&a…

速度革命:esbuild如何改变前端构建游戏 (1)

什么是 esbuild? esbuild 是一款基于 Go 语言开发的 JavaScript 构建打包工具,以其卓越的性能著称。相比传统的构建工具(如 Webpack),esbuild 在打包速度上有着显著的优势,能够将打包速度提升 10 到 100 倍…

「Qt Widget中文示例指南」如何为窗口实现流程布局?(一)

Qt 是目前最先进、最完整的跨平台C开发工具。它不仅完全实现了一次编写,所有平台无差别运行,更提供了几乎所有开发过程中需要用到的工具。如今,Qt已被运用于超过70个行业、数千家企业,支持数百万设备及应用。 本文将展示如何为不…