1数据预处理
import pandas as pd
import glob
import os
path = 'path_to_your_csv_files/'
all_files = glob. glob( os. path. join( path, "*.csv" ) ) df_list = [ ]
for file in all_files: df = pd. read_csv( file ) df_list. append( df)
data = pd. concat( df_list, ignore_index= True )
data[ '保存时间' ] = pd. to_datetime( data[ '保存时间' ] )
data = data. dropna( )
print ( data. dtypes)
2数据探索性分析(EDA)
import plotly. io as pio
import plotly. graph_objs as go
from sklearn. preprocessing import MinMaxScaler
import matplotlib. pyplot as plt
plt. rcParams[ 'font.family' ] = [ 'SimHei' ]
plt. rcParams[ 'axes.unicode_minus' ] = False
print ( data. describe( ) )
variables_to_analyze = [ '主轴电机实时转速' , '主轴电机实时电流' , '腔体压力' , '喂料电机实时转速' , '喂料电机实时电流' , '循环泵口压力' , '出料压力' ]
fig = go. Figure( )
for variable in variables_to_analyze: fig. add_trace( go. Scatter( x= data[ '保存时间' ] , y= data[ variable] , mode= 'lines' , name= variable) )
fig. update_layout( title= '设备参数在一个月内的曲线变化图' , xaxis_title= '时间' , yaxis_title= '参数值' , hovermode= 'x unified' )
pio. show( fig)
corr_matrix = data. corr( )
print ( corr_matrix)
3数据可视化
import seaborn as sns
plt. figure( figsize= ( 12 , 10 ) )
sns. heatmap( corr_matrix, annot= True , cmap= 'coolwarm' , fmt= '.2f' )
plt. title( '参数相关性热力图' )
plt. show( )
plt. figure( figsize= ( 8 , 6 ) )
sns. scatterplot( x= '主轴电机实时转速' , y= '主轴电机实时电流' , data= data)
plt. xlabel( '主轴电机实时转速' )
plt. ylabel( '主轴电机实时电流' )
plt. title( '主轴电机实时转速与主轴电机实时电流的关系' )
plt. show( )
4特征工程
data[ '转速电流比' ] = data[ '主轴电机实时转速' ] / data[ '主轴电机实时电流' ]
features = [ '主轴电机设定转速' , '主轴电机实时转速' , '主轴电机实时电流' , '转速电流比' , '腔体压力' ]
target = '出料压力' X = data[ features]
y = data[ target]
5建模与预测
from sklearn. model_selection import train_test_split
from sklearn. linear_model import LinearRegression
from sklearn. metrics import mean_squared_error, r2_score
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= 0.2 , random_state= 42 )
model = LinearRegression( )
model. fit( X_train, y_train)
y_pred = model. predict( X_test)
mse = mean_squared_error( y_test, y_pred)
r2 = r2_score( y_test, y_pred) print ( f'Mean Squared Error: { mse} ' )
print ( f'R^2 Score: { r2} ' )