房价预测（线性回归） - 数学建模社区-数学中国

from sklearn.datasets import fetch_california_housing
4 w( L- M3 Z! {, x' D
1 o( M3 D" n; W
from sklearn.model_selection import train_test_split9 ]# g' [4 u+ l: H
" u3 w! i2 i7 e3 c, A# R% J
from sklearn.preprocessing import StandardScaler
# ?: Y" W4 l) Z R) }8 g5 a
b+ ^7 _4 x) Q B8 a* l/ w4 r; o
from sklearn.linear_model import LinearRegression0 N; f& G( m2 {' {
1 B7 a( P2 v5 b' ^+ l
from sklearn.metrics import mean_squared_error8 `4 u! s2 h0 R; a R3 r" l
1 Q7 s* J2 |& x
& g6 ` Z6 j7 B* i# ^. B6 O
7 l2 D; n4 F6 Z0 E+ C/ ?% t
# 加载数据集
' e$ I+ Y" X9 S- \/ F; |
& r/ N/ }) z4 y. T
housing = fetch_california_housing()4 ]1 `; `5 s- W- d, `/ x
6 m; K7 C& X) X
X, y = housing.data, housing.target
1 y0 C; \! E7 T6 O. `4 ~9 ~
( S( p: _& o- V" R3 ]
. w; D1 ?/ }( O$ ~
/ W# r! @; b1 p
# 划分训练集和测试集; @0 [/ D& i+ Q2 l/ U
& S: ?. @; o# V
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
2 `: k0 S. L4 V/ \
0 V5 ~- m6 W7 V; _
) z& q5 S9 ~# U6 L( ]5 b0 V# O# Y
* R4 l! Q9 y" `- t0 _( L0 G: R
# 数据预处理：标准化
5 j! f1 L4 P9 T6 F5 ]5 B- F
2 e: g4 r* O! t1 r+ ~$ ?. V
scaler = StandardScaler()
6 v0 J' S3 d' z6 G$ d6 ?
& m, i9 c" z; L; |2 p
X_train_scaled = scaler.fit_transform(X_train)
- P1 r, e- j' `3 Q4 X$ X# }
7 q1 J" t% |3 d7 y L9 s
X_test_scaled = scaler.transform(X_test)

复制代码

复制代码

from sklearn.feature_selection import SelectKBest, f_regression* N9 K. q: C! f6 @# [: j
/ J% X6 ]9 ~/ d. ?
# 特征选择
( K7 f# `$ W8 o1 f. E% g+ J
selector = SelectKBest(score_func=f_regression, k=5)
2 R# o( q" s; v+ a% L9 L
X_train_selected = selector.fit_transform(X_train_scaled, y_train)( d% {: V/ X' u) I @7 M
X_test_selected = selector.transform(X_test_scaled)
( K d8 O! @, L" ?7 e- n6 d
b- H% Z# C8 u. d
# 使用选择的特征重新训练模型' j& F8 Y9 @4 N1 J4 ^
model.fit(X_train_selected, y_train)
1 a3 r: Q/ j& \4 F" T
y_pred_selected = model.predict(X_test_selected)
2 C7 z" B, T; L- y2 {
5 J3 V3 }8 k0 {( J
# 评估
2 z9 C7 l H B |, P C/ y" a
mse_selected = mean_squared_error(y_test, y_pred_selected)
% |. I' |, m$ }7 R9 y+ w
print(f"Mean Squared Error with selected features: {mse_selected}")

复制代码