SVM支持向量机分类——基于Python实现

SVM支持向量机 分类

1.描述统计

python">
from numpy import *
from scipy import *
from pandas import *
import matplotlib.pyplot as pltimport seaborn as sns
glass=read_csv('../data/第5章数据/Glass.csv',sep=',')
glass.head()
glass['Type'].value_counts()

Type
2    76
1    70
7    29
3    17
5    13
6     9
Name: count, dtype: int64

划分训练与测试集

python">
import random
random.seed(1234)
train_index=random.sample(list(glass.index),int(0.7*len(glass.index)))
test_index=list(set(list(glass.index))-set(train_index))
train_data=glass.iloc[train_index,:]
test_data=glass.iloc[test_index,:]
#训练集与测试集均包含所有类别
train_data['Type'].value_counts()

Type
1    55
2    50
7    21
3    10
5     7
6     6
Name: count, dtype: int64

python">test_data['Type'].value_counts()

Type
2    26
1    15
7     8
3     7
5     6
6     3
Name: count, dtype: int64

2.建立SVM模型

python">
from sklearn import svm
clf=svm.SVC(C=4,tol=1e-6,kernel='linear',gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
test_datac = test_data.copy()
value = clf.predict(test_data[clf.feature_names_in_])
test_datac.loc[:,'SVM_pred'] = value
test_datac.head()
result=test_datac.iloc[:,0].groupby( [test_datac['SVM_pred'],test_datac['Type']]).count().unstack().fillna(0)
result

Type	1	2	3	5	6	7
SVM_pred
1	9.0	7.0	5.0	0.0	0.0	1.0
2	6.0	18.0	2.0	0.0	2.0	3.0
5	0.0	1.0	0.0	5.0	0.0	0.0
6	0.0	0.0	0.0	0.0	1.0	0.0
7	0.0	0.0	0.0	1.0	0.0	4.0

SVM with rbf kernel

python">
clf=svm.SVC(C=4,tol=1e-6,kernel='rbf',gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
value = clf.predict(test_data[clf.feature_names_in_])
test_datac = test_data.copy()
test_datac['SVM_pred']= value
test_datac.head()
result=test_datac.iloc[:,0].groupby([test_datac['SVM_pred'],test_datac['Type']]).count().unstack().fillna(0)
result

Type	1	2	3	5	6	7
SVM_pred
1	12.0	6.0	6.0	0.0	0.0	1.0
2	3.0	19.0	1.0	1.0	2.0	2.0
5	0.0	1.0	0.0	5.0	0.0	0.0
6	0.0	0.0	0.0	0.0	1.0	1.0
7	0.0	0.0	0.0	0.0	0.0	4.0

SVM with polynomial kernel

python">
clf=svm.SVC(C=4,tol=1e-6,kernel='poly',degree=4,gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
value = clf.predict(test_data.iloc[:,0:9])
test_datac = test_data.copy()
test_datac['SVM_pred'] = value
test_datac.head()
result=test_datac.iloc[:,0].groupby( [test_datac['SVM_pred'], test_datac['Type']]).count().unstack().fillna(0)
result

Type	1	2	3	5	6	7
SVM_pred
1	14.0	7.0	6.0	0.0	0.0	1.0
2	1.0	18.0	0.0	0.0	1.0	2.0
3	0.0	0.0	1.0	0.0	0.0	0.0
5	0.0	1.0	0.0	5.0	0.0	0.0
6	0.0	0.0	0.0	0.0	2.0	1.0
7	0.0	0.0	0.0	1.0	0.0	4.0

svm—libsvm3.21用法示例

python">#例5.2
import re
import numpy as np
from sklearn.datasets import dump_svmlight_file
#利用dump_svmlight_file可以生成svmlight文件
X_list = []
#UCI HAR Dataset
with open('../data/第5章数据/X_train.txt', 'r') as file:for line in file:row = re.split(r'\s+', line.strip())X_list.append(row)
y_list=[]with open('../UCI HAR Dataset/UCI HAR Dataset/train/y_train.txt', 'r') as file:for line in file:row = re.split(r'\s+', line.strip())y_list.append(row)
X = np.array(X_list).astype(float)
y = np.array(y_list).reshape(-1).astype(int)
dump_svmlight_file(X, y, '../data/第5章数据/train.txt')X_list = []
with open('../UCI HAR Dataset/UCI HAR Dataset/test/X_test.txt', 'r') as file:for line in file:row = re.split(r'\s+', line.strip())X_list.append(row)
y_list=[]
with open('../UCI HAR Dataset/UCI HAR Dataset/test/y_test.txt', 'r') as file:for line in file:row = re.split(r'\s+', line.strip())y_list.append(row)
X = np.array(X_list).astype(float)
y = np.array(y_list).reshape(-1).astype(int)
dump_svmlight_file(X, y, '../data/第5章数据/test.txt')from libsvm.svmutil import *
y,x =svm_read_problem('../data/第5章数据/train.txt')
y1,x1=svm_read_problem('../data/第5章数据/test.txt')
m1=svm_train(y,x,'-t 0')
m2=svm_train(y,x,'-t 1')
p_labs, p_acc, p_vals=svm_predict(y1, x1, m1)
p_labs, p_acc, p_vals=svm_predict(y1, x1, m2)

Accuracy = 96.4031% (2841/2947) (classification)
Accuracy = 90.7703% (2675/2947) (classification)

svm决策边界

python">import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm# Function to plot decision boundary
def plot_decision_boundary(clf, X, y, title):# Create a mesh to plot the decision boundaryh = .02  # step size in the meshx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))# Plot the decision boundary by assigning a color to each point in the meshZ = clf.predict(np.c_[xx.ravel(), yy.ravel()])Z = Z.reshape(xx.shape)plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)# Plot the training pointsplt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm, edgecolors='k')plt.xlabel('Feature 1')plt.ylabel('Feature 2')plt.title(title)# Example data (replace with your actual data)
X = train_data.iloc[:, 0:2].values  # Use only the first two features for visualization
y = train_data['Type'].values# Create a figure with subplots
plt.figure(figsize=(15, 5))# Linear kernel
plt.subplot(1, 3, 1)
clf_linear = svm.SVC(C=4, kernel='linear', gamma=0.1, decision_function_shape='ovr')
clf_linear.fit(X, y)
plot_decision_boundary(clf_linear, X, y, 'SVM with Linear Kernel')# RBF kernel
plt.subplot(1, 3, 2)
clf_rbf = svm.SVC(C=4, kernel='rbf', gamma=0.1, decision_function_shape='ovr')
clf_rbf.fit(X, y)
plot_decision_boundary(clf_rbf, X, y, 'SVM with RBF Kernel')# Polynomial kernel
plt.subplot(1, 3, 3)
clf_poly = svm.SVC(C=4, kernel='poly', degree=4, gamma=0.1, decision_function_shape='ovr')
clf_poly.fit(X, y)
plot_decision_boundary(clf_poly, X, y, 'SVM with Polynomial Kernel')# Show the plots
plt.tight_layout()
plt.show()