付费后可以查看加密内容,获取下载链接,若链接失效,联系微信yeeyea
import os
import pandas as pd
df = pd.read_csv("CYKEZR.csv",encoding='gb18030' )
df.head()
relative compactness surface area wall area roof area overall height orientation glazing area glazing area distribution heating load cooling load 0 0.98 514.5 294.0 110.25 7.0 2 0.0 0 15.55 21.33 1 0.98 514.5 294.0 110.25 7.0 3 0.0 0 15.55 21.33 2 0.98 514.5 294.0 110.25 7.0 4 0.0 0 15.55 21.33 3 0.98 514.5 294.0 110.25 7.0 5 0.0 0 15.55 21.33 4 0.90 563.5 318.5 122.50 7.0 2 0.0 0 20.84 28.28
df.isnull().any()
relative compactness False
surface area False
wall area False
roof area False
overall height False
orientation False
glazing area False
glazing area distribution False
heating load False
cooling load False
dtype: bool
X = df.iloc[:,:-2]
y_heating = df['heating load']
y_cooling = df['cooling load']
# 解决中文显示问题
# from yellowbrick.style.rcmod import set_aesthetic
# set_aesthetic(font = 'SimHei')
import matplotlib
matplotlib.rcParams['font.sans-serif']=['SimHei']
matplotlib.rcParams['axes.unicode_minus']=False # 用来正常显示负号
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from yellowbrick.regressor import PredictionError
X_train, X_test, y_train, y_test = train_test_split(X, y_heating, test_size=0.2, random_state=42)
# 可视化及验证
model = Lasso()
visualizer = PredictionError(model) # line_color='r'
visualizer.fit(X_train, y_train)
print(visualizer.score(X_test, y_test))
visualizer.poof()
0.7940649566807682
X_train, X_test, y_train, y_test = train_test_split(X, y_cooling, test_size=0.2, random_state=42)
# 可视化及验证
model = Lasso()
visualizer = PredictionError(model) # line_color='r'
visualizer.fit(X_train, y_train)
print(visualizer.score(X_test, y_test))
visualizer.poof()
0.7722075069396912
from yellowbrick.model_selection import LearningCurve
model = Ridge()
visualizer = LearningCurve(model, cv=5)
visualizer.fit(X, y_heating)
visualizer.poof()
model = Ridge()
visualizer = LearningCurve(model, cv=5)
visualizer.fit(X, y_cooling)
visualizer.poof()
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from yellowbrick.regressor import ResidualsPlot
# 创建训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y_heating, test_size=0.2, random_state=42)
# 可视化及验证
model = Ridge()
visualizer = ResidualsPlot(model)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()
# 创建训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y_cooling, test_size=0.2, random_state=42)
# 可视化及验证
model = Ridge()
visualizer = ResidualsPlot(model)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()