) n9 z/ F+ Z; c! U; ]5 }" nif not train_on_gpu:6 e. f0 U- P" F6 _
print('CUDA is not available. Training on CPU ...')( d& c3 e9 O# w
else:5 S5 o6 l2 m7 F9 f1 ^6 J3 O5 {
print('CUDA is available! Training on GPU ...') D7 k9 W/ O8 o2 t: ~5 t
& L J1 y; t' w. b+ z+ J' @8 hdevice = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')% t$ }7 L4 Q" }! |% }! ?
1 + U* t8 g: R! U' m `* B( P2 1 ]: X, Q5 B3 s8 q5 }- C' s- O' Y3( D2 }+ ~3 w+ k1 X* Q
4$ H% Y& I/ z, I
54 s0 f' q% q/ M% d1 B9 D
6 % A. X& P5 `% w. X7 ; N0 v6 D9 e- X1 r8. u a: j; g, w' h) I) O
9% D3 U! a2 b1 W+ ^
CUDA is not available. Training on CPU ...' B1 \5 p6 s; ?6 y5 w; o' h
12 w/ F4 T4 ~5 s
# 将一些层定义为false,使其不自动更新 " L5 e5 Q) v* h4 m4 idef set_parameter_requires_grad(model, feature_extracting):5 h3 S, i I- b/ h2 r* d" A4 b& M: M
if feature_extracting: 4 A) }! Y/ z. V- j# Z( H! w# ` for param in model.parameters():: r7 Y& t4 F9 v: [" P
param.requires_grad = False + k; v# a. k% t. |* t/ j" l1 - k( F" z; a$ n9 s' c. V6 A2; p! Y# R* M4 G1 a1 P
3 + V$ _& O9 `+ {3 A) e. E4 " h% Q# l3 A7 ]* n5 & j8 `$ ~+ m0 H' `2 R# 打印模型架构告知是怎么一步一步去完成的- U% T; v' K& _3 X% v5 x, N0 k& n
# 主要是为我们提取特征的* I' T( L, m$ s4 Z" O* w" }
+ g! H( |! w" d0 k x7 y2 ~* ?; N# [& fmodel_ft = models.resnet152() {, U# |7 ?) \
model_ft - U* o0 X! V/ d. P# C9 ^1 }# H+ I1 , ?* T1 g X6 d4 v! I7 A2 G2& f/ m8 i. g0 g# L, n& F2 d8 E
3 % L; B7 Y4 i3 ?% V* `5 z40 y* y. ~/ N4 S7 M. @' C9 e3 u
5 . x B. ^, t( t$ t) `ResNet( ! K- P; `) Q* T( \7 G (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)! v Z7 \+ ]- E# C8 l
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) " `! h. e( K& Y& t; }+ N: B, k (relu): ReLU(inplace=True) " U8 @8 J) J2 |" F* M& u; M7 M: t (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) 5 b2 F7 L8 ?9 W+ G Z- z (layer1): Sequential( " v$ I/ G- ]& W" h- N& P3 b (0): Bottleneck(! t6 ]/ v3 o j& h Z! K1 O
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)+ s9 A- N" ^" C( t" M( R
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ( q1 G; |6 a- h1 L3 V8 L' g0 c. J (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)$ b+ A' @9 I2 M% p; c
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) 5 k$ f) Q! a7 I0 K) y1 K6 Y/ f3 H (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)2 S; v7 U, C+ E: G( d$ H% A J
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ^3 R: u- b1 l3 j (relu): ReLU(inplace=True) , p9 `* o' t _: B4 I (downsample): Sequential(1 P* A, l4 K- k2 ?
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)) k) v% {1 d' `/ u4 J8 j
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) $ R! I4 i C9 \" x) E ) ( ^7 @% V, e" C: h; N9 t; I1 } )0 @3 q4 V/ z2 {4 w' D0 {
中间还有很多输出结果,我们着重看模型架构的两个层级就完了,缩略。。。# B- x/ o) i. E- C
(2): Bottleneck(% h6 f5 }" F4 H
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) / R' v* R5 B1 Y( [ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) 2 M0 o7 Q1 T: }8 { E (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) A5 D3 m& g4 u8 x: G0 s v
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)5 h# `& i. a/ n$ O- u- @6 f$ r
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) , J7 U9 F1 Z9 l$ P" w. Y4 j (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) # O, L9 S7 l. u7 ] (relu): ReLU(inplace=True) ; k; S7 q+ C# w )6 Y7 w0 a9 }& h8 T5 `- J
)" W3 T1 h( u: y9 e" w
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))0 Y2 y) O( s3 @. Z% |/ \' s7 s
(fc): Linear(in_features=2048, out_features=1000, bias=True)) \ F+ C6 }& P
) - Q+ V) I7 \% j% s! L 8 c- A) H, @$ y9 z1 u: G& O/ y16 l& P. z5 h- t Y( c6 a
2* m# q9 L _% H* S+ u6 k) E( Q/ S
3* u6 N- O! r4 B6 S2 e9 d% T
4 1 M) [3 o9 W' d1 n5# V; P' Y* g. f9 ^! A/ i; ]7 j
6' ~0 J! {1 y. ]
7 & i% [, d3 \4 |8 ' E9 K6 X" ?5 _4 h2 [98 X% H( x9 C7 ?$ N3 Y" m
10$ [6 [$ g& l, [' a x
117 o1 S/ d" q" C% T _
12 $ Q- n# v M( R13. S6 R6 Q# L7 V; l, _& \
14 ( D$ r+ X9 r C8 Y3 L; F15: ?% V/ Q# d/ v8 B5 g
16 5 T* u# D5 P; x2 Z17 / s8 D( ]1 f W% m* i2 \18 ' H& n0 L3 U6 a$ w19* W' |. K f# }2 U/ Y5 e7 I
20 + a7 A2 D8 O/ L$ t7 h4 [; h' d21 J5 v: t$ J* y5 p, l3 p; [7 ^" p
22) A) d; R) m8 S& b+ U1 j
23 3 k5 W! W1 p4 c+ k0 `( w+ Z2 J241 y- b0 e9 Q: [$ r. I. m
25 5 O, v2 q+ l: ?) S& z d26 : F, e$ c& T6 O3 D9 r* s- `# D6 H27 7 p8 L$ n) l2 V8 {( j# `28, O/ O, M* b2 q* v$ {# L5 i/ O9 E
29 ) V4 C, m! N6 J4 r30 & N. F% o" W6 }, S) K# C5 `1 V31 , Q) O* |7 H6 T: C# |32 # G% C* ]* o. L" I6 S33 2 _( m2 f5 n: \) T Q最后是1000分类,2048输入,分为1000个分类2 ]7 |# l6 y9 |! k8 [9 {
而我们需要将我们的任务进行调整,将1000分类改为102输出6 x8 q3 F8 @( f! s8 S& U/ D
" C9 P% u3 v0 d' R
6.初始化模型架构 4 P+ {% N4 I z: r步骤如下: 8 ?+ ?$ p- L/ k' F! ?' m) g$ A$ x& h" B" F4 h& W
将训练好的模型拿过来,并pre_train = True 得到他人的权重参数6 I; ^* {3 V( a
可以自己指定一下要不要把某些层给冻住,要冻住的可以指定(将梯度更新改为False)0 ^2 V% v+ x. i4 Q
无论是分类任务还是回归任务,还是将最后的FC层改为相应的参数4 A: R6 D. T& [2 g' l8 r j. q
官方文档链接 - t. N: H# w A. O2 {* }https://pytorch.org/vision/stable/models.html 1 u" b4 l3 I6 b: U, M4 Q! C/ a6 ~, i$ T1 [5 Q3 h+ q. |
# 将他人的模型加载进来 # s: a3 I- m- X2 S/ _7 \def initialize_model(model_name, num_classes, feature_extract, use_pretrained = True):5 J. I8 V) F8 m6 a# D) N
# 选择适合的模型,不同的模型初始化参数不同 1 H7 k+ c6 D! p- t8 M model_ft = None' \" x5 f, m2 n( X6 y
input_size = 0 ' b. `; t' z7 x* T4 ]& C G& A& W& j/ m0 t0 Y2 ? if model_name == "resnet": " n1 x3 u) _$ Z- l# y6 G """4 p6 O* \" o) `- X+ Y
Resnet152 0 E, I2 j( q% [: T """. x' h* h% e% h. Y
" G) F' K# B% @ # 1. 加载与训练网络 ! y* l$ a9 B( c2 d/ ]* a model_ft = models.resnet152(pretrained = use_pretrained)3 n. }4 O3 T' L/ w
# 2. 是否将提取特征的模块冻住,只训练FC层 # U; d( u. K; n! F# J" L/ I set_parameter_requires_grad(model_ft, feature_extract)8 p: f- U' Q1 q5 h
# 3. 获得全连接层输入特征1 }& \% ?; g7 {+ K
num_frts = model_ft.fc.in_features; [) n% J% t5 s" y2 |
# 4. 重新加载全连接层,设置输出102/ E$ D6 ]2 _+ b0 u& r( p
model_ft.fc = nn.Sequential(nn.Linear(num_frts, 102), |* R3 e k( b1 c' M nn.LogSoftmax(dim = 1)) # 默认dim = 0(对列运算),我们将其改为对行运算,且元素和为1- {; X' J4 T0 ~4 t; K1 a2 a
input_size = 224& F& F- o+ [; K* O' a- J$ ^# U0 s2 c$ ?" {
: q+ c0 r4 D& X elif model_name == "alexnet":0 ^9 e$ \; s* i; M1 Z
""" 6 f7 {0 G; B9 V' w4 D7 }2 A8 Z Alexnet" M% x( T) c J, ?7 u. s, |
""" 6 B: B; j' }) x: q2 T model_ft = models.alexnet(pretrained = use_pretrained)+ @' }, D1 G% K: O: @, c% C
set_parameter_requires_grad(model_ft, feature_extract) 2 p5 i1 P: }" N( Y ) y& q7 S# ~$ q0 z # 将最后一个特征输出替换 序号为【6】的分类器 o9 u9 f( w- s& g: D% U
num_frts = model_ft.classifier[6].in_features # 获得FC层输入+ ^& A& L8 L) @; P
model_ft.classifier[6] = nn.Linear(num_frts, num_classes)9 t& Y: B6 U# z5 @
input_size = 224% h3 [! {" r0 T/ J7 G
* O u8 p9 L$ @9 H0 t
elif model_name == "vgg":/ W+ a. S$ m$ [0 {' c; p: }
""". h3 A9 }' d* K0 M8 O: v& k# Q
VGG11_bn ; W: N' ~( ?5 y3 n! o' B """5 G6 M6 {" m& K9 r& H' h1 k
model_ft = models.vgg16(pretrained = use_pretrained) ; P( u. [0 [' E& s5 I9 n set_parameter_requires_grad(model_ft, feature_extract) 7 Y& S& ?7 E* A% S% [ h num_frts = model_ft.classifier[6].in_features 8 V* Z6 g* F! A5 D3 {8 U5 C; S* } model_ft.classifier[6] = nn.Linear(num_frts, num_classes) ) [; I0 _# g. |; Z* l* @ input_size = 224 % J& Q+ U6 M) ^* `' X3 t* r" w$ Q: ^
elif model_name == "squeezenet": & l. I# _$ i2 p# c$ e """ ; H$ g m9 p& \: V1 a5 N2 A3 F Squeezenet / r w/ C9 t; V6 O3 C8 Z """ * t+ H7 m% F8 X model_ft = models.squeezenet1_0(pretrained = use_pretrained) - M5 k! t! n' \% }+ N set_parameter_requires_grad(model_ft, feature_extract) 6 J: Q8 W+ t/ z$ w) `1 e+ x2 S: \ model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size = (1, 1), stride = (1, 1))6 F. [: s5 j% Z7 c
model_ft.num_classes = num_classes 5 n% j9 C# p. g5 Y' S- A& b0 o- n input_size = 2246 j3 j. x i, @! D6 |( v0 @0 T% p
. r; I% t W% q! m' l# X S
elif model_name == "densenet": * H$ a$ k' I+ I. r5 V: o """ : C6 y- D+ [) W& a Densenet- _; K$ M6 z2 w, n' z, |$ G1 m
""" 3 p! G0 w( p& L0 l1 R0 b) b model_ft = models.desenet121(pretrained = use_pretrained)" y' E( A6 Y6 m, K; u: J
set_parameter_requires_grad(model_ft, feature_extract)( _ `+ }6 y6 F7 {
num_frts = model_ft.classifier.in_features/ f- f2 l6 A6 z0 L1 k: E4 {
model_ft.classifier = nn.Linear(num_frts, num_classes)* J; D- ?7 Y+ e- @/ c' _
input_size = 224 - {% H0 C+ A( I& v6 u1 v/ d8 y* J, Z9 @8 B2 Y% L
elif model_name == "inception":/ s2 l# K; f2 v7 B/ H
"""' b4 m/ o8 g- M
Inception V3 C5 \. R& {& `+ p; {/ N4 b' y, s
""" ' X8 u! T& p4 m+ p: s. d& s1 G9 G( j model_ft = models.inception_V(pretrained = use_pretrained) # b) E( Y L* I# k set_parameter_requires_grad(model_ft, feature_extract)8 E9 ]8 |" \' {! G
5 Y, K3 B# D2 U) }! a
num_frts = model_ft.AuxLogits.fc.in_features * e) \9 H8 c0 g+ h( T) F3 g! A model_ft.AuxLogits.fc = nn.Linear(num_frts, num_classes); b5 `6 q4 Z1 @+ A5 X
: \+ c( V" b0 N" m# q
num_frts = model_ft.fc.in_features9 o5 J# y- T; c6 t% `- i$ Q0 t& B
model_ft.fc = nn.Linear(num_frts, num_classes)+ I% v: r4 z; {/ a
input_size = 299% p- g% c% W8 p5 g& d( q- t7 S
1 w8 T4 D7 r2 z. r3 K
else:9 r3 L" V( H+ J! o. j
print("Invalid model name, exiting...") 3 o) W9 V, L; `8 a c/ a1 F: |7 K: i exit()& q! \- {/ E" V. a
7 W% X) \- h9 l" i
return model_ft, input_size & r5 ^: a' K6 z4 q; T) P1 l6 c# L2 ]( L- T1 l
19 ^$ h& @6 D" h! S
2 4 ]) z% {0 j4 ~1 \3# Z" T; D5 @; Z y$ X+ `, @
4 : }: T3 x$ {, I& U0 @) Q53 l3 D5 m$ Q! ?, N" K6 G
6 % N3 ^1 O3 u8 ]0 V9 X+ x7# S( O, R3 H* m) o1 o8 F
8! P0 p D; c/ @8 X# t/ w6 h$ O0 ^4 m
9 & `6 {: k1 o' u107 \# K, n. d( b& a0 Z6 _9 n
11" V% C# |- B" g2 x4 W0 w- W! m1 d$ I
120 [ m, y" B1 _( Q [
13% ~7 E6 o0 a+ b3 O
14: b" ^3 o$ b% n3 c; p2 e2 ?8 y
15 : z6 P2 l' I) O2 p" {. X16 % D& p% w. E x$ t17$ g5 K7 c( I. q( ^
18 * Q* @( c7 i7 k$ G; A7 [. G5 ]: \195 @0 g6 a* l0 n
20 * o5 c" J! S9 n213 `: ~& G$ A+ v4 }! \; t q1 k
22 7 B; {$ T C0 z* m- g6 Q23' O2 o! S1 M, R! M0 Y
24 ! S+ Y& A' F; Q% c25 3 p1 V- N6 w/ K. }# U5 Q26* ~: K3 F% d% Y+ m' O ^
27 - V1 H# V- X( P( G9 m; J28 ! t$ g. I: m4 x& M. u; |% g' M29. Y3 a, G! S; f. ^ u. ~- F
30 * O% I. s- f3 N, O, l8 }( [% B313 o( N) M( x! h* ] k2 k
32 ; [9 M3 I. I _3 c, R33' e; G) v6 i$ M- `* ]# p
349 I: c7 P* |9 b6 p
35 / H- N+ W# B1 H$ v, p+ E, q36 7 X# E; q6 Y$ H0 X37 ! E9 J: P# I0 r# ]* q1 _38 2 E! v% ~5 n+ N0 L0 P39$ n6 N$ q2 Z' l
40" Z& `" b8 L8 h) o* L
411 L3 }! y/ k0 @. ~" j- v0 L' R
424 [+ z& t0 }, R% E8 z/ S
435 z0 X& z+ U% `- ` [. W
444 J5 m' p: R; z& f4 W
45 # N" G( _2 H7 l% B' ]2 T2 }46. I$ S* {3 g3 S
478 x; W4 R6 p, e+ V% ]
48 2 n" p1 t3 M2 ~5 S49 . W6 o1 q: L- {" s7 r- {505 w. D* y) q* J: U' V: @
51 8 Z) r( e$ B4 F" a. l. H. I52 1 R6 r- {7 q: F% A4 T7 s/ X, ~. E53, s* g- e, G- }. X6 L
54 , ~; Z& s" S8 Z7 h55 3 Z- {" X# Q* P1 }0 j56 , i' |: Q7 W/ U57' r& n4 A f' A* H; u/ Y# u$ [" I( K
587 R+ l9 x* m0 }# S& H1 n
59 ( j% I5 e! f2 _; b60 ; w! A" D; A5 b/ A61 : U' U1 y* S b$ r+ j4 |62 ( \* V+ y& T K- N& c9 |63 7 G9 ~ `" ^; w. _64 # u6 j `: c" B5 t: V8 I6 r65 $ t( s, m( E/ c& ]3 k66 ' q! J# b" U( V- h2 n2 U67# |! l' B# U0 Q8 P2 o3 k
68" ?6 P3 l" T5 R8 h* e) Y
69 N! G' R% k8 L70 0 u$ F \! g9 [- K# l; C1 O( s71# B$ L0 o& ^' W4 X0 y3 ?6 T1 ?" j4 W+ D
72' A) L0 {# }: V# D4 p h
73+ C% }. ?* l: X. P+ z6 `" o; e+ b% @
74' `6 C6 J6 K& E5 E7 n5 F* ]
75% C) a7 Z3 p9 @/ U1 x
76+ q( _3 Q" p C5 M: X3 e6 d2 @
77 2 H1 D- Q( t9 d! E782 v, W8 b: P* t* r, h
79 ' J9 c3 C# L1 ?80' m( [$ E9 K; L* K
816 I3 Y$ |+ ^8 ?8 y
82 5 w- k; t- R- f- [6 P1 u' `6 b. D83 0 I* X+ g# m; k. c- }7. 设置需要训练的参数 3 X4 t6 _: ~7 U% B- r6 N6 S, \, V# 设置模型名字、输出分类数6 n6 v- R" X7 f, [: ^- g3 V
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained = True)6 S/ Y7 [1 ^# o, J% p2 c7 H
( c$ U6 x: W: I3 R/ \' u" o
# GPU 计算1 c1 Q: B& l3 \
model_ft = model_ft.to(device)6 w) m, x2 M$ [! k' a
3 v8 T- Q1 J+ E# U U
# 模型保存, checkpoints 保存是已经训练好的模型,以后使用可以直接读取 5 [; L+ G* @: O. I! n) Y8 r3 jfilename = 'checkpoint.pth') b# D' ^5 ~: M# ~. X. p% m0 b
, e. i- W( C; P% D* _4 g
# 是否训练所有层7 O2 l: z! h W8 j5 Q# l
params_to_update = model_ft.parameters() 6 ^8 S) M& f; Q+ U. D# 打印出需要训练的层& Z( `& H' I# v5 b
print("Params to learn:")7 P2 I. S2 s! B( k& D$ ~1 I# p
if feature_extract: & ^7 W; m1 G, u6 K0 Y/ L params_to_update = []- J1 `4 x, `) N% y$ x
for name, param in model_ft.named_parameters(): , q9 o9 }+ C% x9 d if param.requires_grad == True:9 S J' z4 R3 i: g
params_to_update.append(param): _9 g4 Y, S i
print("\t", name)* p" G n7 J$ b( B% R' }
else:+ a5 O/ X9 |- C4 y
for name, param in model_ft.named_parameters(): * [' d5 M1 T* V( H0 h if param.requires_grad ==True: ( F. b0 k6 s$ t, ^8 Y, S2 q- { print("\t", name)/ p5 R6 o$ \2 }, t3 H4 {
( J7 ]3 o; k: ~' N/ S1 a- f1. w3 @- l0 h Z
2! L6 D C: g5 m+ i' l1 B3 r
3( ~) B& t7 x% m
43 s* [7 R, U5 d. V+ \# J
50 p: S. ]& |/ z- Q' b @1 Z. r- t
6 0 u# p. Q( T' {( t6 f5 ~7* K- I$ I' _% @5 n5 v2 i# `
8 : G5 P2 X; U0 g' p- k, v9) ]" `3 k" P2 g, j6 R
10 2 z1 h9 A {. F* t11 $ e' a+ Q7 `/ {, x) U. d12/ Z% b4 p/ L, ]4 n! |9 d# H
13 3 s5 a7 m5 S) Z3 J: h14. {5 I4 A% Y& Y5 ~7 O
15. _6 J6 e- Q& O. w# e( l, T: K4 T
16 3 h3 g' g0 k. l$ j17 8 l- K' Z- I! _5 o18 ; }6 w' Z2 K" T5 S7 z9 w0 T2 a" r9 `19 # |8 @" |+ Y2 q: k20 & X+ _1 `9 X6 Q$ m& V E7 G7 \21 & R8 J: `8 F4 W/ \, {223 n( I: E8 q- k( A1 o
23 3 K V( r# ~( j, M' nParams to learn:6 m. o. t, v) @* P: S+ D
fc.0.weight % c, _) \# ]. q fc.0.bias 7 q7 n3 @2 g/ M v1 m& @; Y5 s* E' l! z& H
2' _6 ?7 _- T; W3 i! w* }& q, @
31 y1 [% h" Z1 v( h1 W
7. 训练与预测 - y9 q+ v& i2 P. n; G7.1 优化器设置 0 `" a- ^# u3 _) X# 优化器设置* t( U/ C7 c9 q+ d! L8 C4 r
optimizer_ft = optim.Adam(params_to_update, lr = 1e-2); P2 |6 f5 x& H- P# R
# 学习率衰减策略6 A. a4 ?3 x; f' }
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)3 M b3 J1 u2 E8 N9 m& u' y4 X3 u0 b
# 学习率每7个epoch衰减为原来的1/10 ) T/ f0 P5 h9 ]" [8 l3 R0 {# @# 最后一层使用LogSoftmax(), 故不能使用nn.CrossEntropyLoss()来计算 O* ^0 {. ~% B5 R. h
$ j; |$ V" T% ]6 A* Y1 b3 _
criterion = nn.NLLLoss() ' |& @- V7 S2 M# ^( ^1: ~' O0 H7 P, P! I+ n" I
2 ) W8 K' H% I& N1 t2 _3 {8 G3 + ^- U. @8 n% }44 E( b/ R' Q" T0 H
5% O6 y% d/ x2 r7 x* ^
6 / h' J* P5 x9 d) p; b g72 K ?! [% o% B# p+ O
8 2 @* q3 |, A/ G# 定义训练函数 7 s9 ]% s/ j+ W4 u3 G- f7 V$ B' k#is_inception:要不要用其他的网络/ h1 m. t/ { M) ~+ J( \
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10, is_inception=False,filename=filename):# i0 h3 W( L) c
since = time.time() & h% W ]' m4 j, R) \. f2 q #保存最好的准确率 0 _+ B1 ^4 |2 M8 g best_acc = 0* ?! z6 w- r' g% G8 J9 e
"""* v% u3 d6 I+ ]+ e
checkpoint = torch.load(filename) . m) q. i( [3 Q best_acc = checkpoint['best_acc']- p( L5 u# u& g7 r# ~% u; k; C2 @
model.load_state_dict(checkpoint['state_dict']) _0 r/ y, d- o' r0 i% W
optimizer.load_state_dict(checkpoint['optimizer'])% T( y- V- y1 {
model.class_to_idx = checkpoint['mapping'] . K$ k) e3 a& l( [( C6 @& D I """. W) A; F& s( X. {
#指定用GPU还是CPU3 k; ~. R) o0 P4 u
model.to(device)$ R. A) n: U: A3 Y
#下面是为展示做的; W0 C$ I4 D. n) B
val_acc_history = [] % v# Z" V2 t+ i4 l+ D8 D train_acc_history = []. O2 Q/ I$ `- C8 }
train_losses = [] ) l1 C% A, z, Y! h8 ] valid_losses = []$ u( t5 S& z. i6 w
LRs = [optimizer.param_groups[0]['lr']]% i1 t8 D0 [1 z9 J- M2 [
#最好的一次存下来" [/ m5 g7 M8 w+ z( R3 | ^: X
best_model_wts = copy.deepcopy(model.state_dict()) 1 ^& s: S! l5 g5 {0 E4 @8 C+ |. a5 r- L* _- S2 B; h! x: d, q0 T
for epoch in range(num_epochs):; ?' G1 C: ^# p- e9 v% [/ i
print('Epoch {}/{}'.format(epoch, num_epochs - 1)) . y1 X: v! S' T1 h& G( a4 t% I# x$ N print('-' * 10) / k$ Q3 K( r/ l5 Y* L! N7 G8 |( B" x6 a! H8 J3 h' `
# 训练和验证 ! p+ Q% W+ ?4 R# i3 X: }7 V for phase in ['train', 'valid']: 3 y6 Y; D# X8 W o) h2 v+ S if phase == 'train':+ ^- C1 j2 g3 E: t& G# ]
model.train() # 训练4 t' b; \- T/ E' C5 D
else: 1 ?7 n; `0 C: b4 N2 F1 Q: ` t5 \ model.eval() # 验证 ' x* W9 \8 b( Y5 v4 s4 S* i9 d! b) ]0 T6 k" @! k: P
running_loss = 0.0/ m+ {5 J) S, T) J q- w
running_corrects = 0/ [2 J: K- r) l
( X# i+ f& e' i q
# 把数据都取个遍* f- c- M- u$ I* P3 l1 h
for inputs, labels in dataloaders[phase]:6 X* N. ?0 i- G! E n6 x9 x
#下面是将inputs,labels传到GPU, C5 g% W1 w- r5 W+ e! }
inputs = inputs.to(device): h4 R) Z8 m% B' ~- V3 f) @5 Z: t
labels = labels.to(device) : v Z( ^5 X6 _: O y6 ?" b; _+ _" R! M/ c2 A, j/ L/ h
# 清零5 W2 V4 B5 Y+ q0 P% W7 E( ?# b6 ]2 C8 N
optimizer.zero_grad()! A/ M+ _. l+ R9 d4 Y9 a0 i9 O
# 只有训练的时候计算和更新梯度. B9 k6 t/ ~ h: }7 I& X
with torch.set_grad_enabled(phase == 'train'):' z' n- q- b5 q) s
#if这面不需要计算,可忽略* j% Y" S. |6 I8 g/ ~* l& |
if is_inception and phase == 'train':8 j! C3 [. a0 Y4 f. r: j4 S
outputs, aux_outputs = model(inputs) ) F/ j7 z# n' ?' X8 j; }4 D3 e; g loss1 = criterion(outputs, labels) * b& i' r7 e0 x loss2 = criterion(aux_outputs, labels) . b# _4 K3 b" O: F9 M, | loss = loss1 + 0.4*loss2- ~; B% E1 |6 h1 U6 w
else:#resnet执行的是这里 ; I& ]$ z- `- ^: {0 [, x q4 v outputs = model(inputs)* X$ m% N u6 [1 }, O
loss = criterion(outputs, labels) + f6 F9 k& j- t" n& S 6 n' f: h; v3 {' r2 y T #概率最大的返回preds : N/ Q- l* n+ e- x3 ] _, preds = torch.max(outputs, 1)! h3 `; e- u8 C) @' N9 b
1 p2 B6 J+ m- Q7 O # 训练阶段更新权重 9 \6 F% n3 V9 x8 |5 F, n- F if phase == 'train':( B4 I/ f: {5 h& X& e3 A/ x
loss.backward() 4 W, n, R) j" v! D& z) o, M/ |+ r' V$ x optimizer.step()9 S; v8 ?! [/ _2 t. R
7 ~/ ]/ g5 Y/ V6 h0 r% N# b
# 计算损失 ' j& q2 y+ H6 R3 R! f running_loss += loss.item() * inputs.size(0)3 }) p# u8 @! E8 {4 B
running_corrects += torch.sum(preds == labels.data) 5 t4 a; v- P1 r, I. g, h7 x- C 8 S4 w+ o. s! p8 ? #打印操作 ! y+ J% t$ V5 {+ H7 q) G" [& `" }, [ epoch_loss = running_loss / len(dataloaders[phase].dataset)) w% U7 I! J# c1 n$ F
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) ! v5 v$ m& p/ W8 l% Y 0 h& K) D7 Q2 d+ P6 ?, f0 {" c9 f
time_elapsed = time.time() - since 9 Y5 \, M6 S a# s1 f print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) ' P1 o D0 E. d4 H8 s print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))( [: ?1 k. Q$ l' s
( L2 }* B6 @# ~; [4 ? g9 C, f- J/ n8 b # 得到最好那次的模型 ' J5 a9 L* r$ @9 a3 ], k) Z if phase == 'valid' and epoch_acc > best_acc:" k% ]1 @' t+ L- w4 L; [- S: Z9 d
best_acc = epoch_acc/ s( ^: R/ g% }" }/ A
#模型保存! B4 r ]- b8 x) z- Z: p4 n
best_model_wts = copy.deepcopy(model.state_dict()) & ^6 g4 h: `% h9 `( k+ S state = {0 k7 _; @+ x* J1 x5 L& I1 \$ Z1 E
#tate_dict变量存放训练过程中需要学习的权重和偏执系数 ) L. C2 e/ j/ P: O3 M2 g 'state_dict': model.state_dict(), : \6 z1 M& i; R5 h7 _0 E7 ] 'best_acc': best_acc,1 _* p5 n* y' O, {7 Y
'optimizer' : optimizer.state_dict(),2 r3 y ]: y4 p$ T' R' Y5 q
}4 U3 _( k E$ [4 u/ t, c. k
torch.save(state, filename) d0 B0 _# c1 x+ G I: K9 k if phase == 'valid':3 c0 Y3 k# a7 [2 P1 D2 L
val_acc_history.append(epoch_acc), N7 ~( _+ f0 E
valid_losses.append(epoch_loss)4 f/ _( E9 V0 R
scheduler.step(epoch_loss) ) n7 k8 {, s6 @, G) j% C/ B if phase == 'train': 9 p5 u" m# N( }7 X, n train_acc_history.append(epoch_acc)$ w5 r" \" k. D
train_losses.append(epoch_loss) }+ H- `: u, o) g6 h6 U ! X7 P0 B) h/ o, P5 ?9 c# R print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))( H4 S* x$ G, o4 W3 ?' i
LRs.append(optimizer.param_groups[0]['lr']) " Q0 W( ?* V" D! B1 X6 ?1 C3 p- E- G print(). ^2 j7 ]5 G3 h9 ]
, y" G4 J5 R- t/ Y1 t% R* p
time_elapsed = time.time() - since % n' E" W; p, x! e/ W$ r print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 6 O+ ]' b4 P3 b( k) r8 x e" J: I& A print('Best val Acc: {:4f}'.format(best_acc))" T* |. A( X; v( b) G2 z
: {0 N6 H- p& F
# 保存训练完后用最好的一次当做模型最终的结果, F0 O5 |3 u) r
model.load_state_dict(best_model_wts)" b9 m9 F+ @; B# c6 ]4 p
return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs / v4 O0 Z* D h; H8 @+ E8 l
/ C% C* H7 u. \, \5 } * u% T' P# S; x) T1 3 \/ e, V' d X5 r2 3 S* C8 X# V% x# b' O) Z3 5 Y5 n% l' Z, X2 Z8 c) | K43 p7 C1 Q* N2 i: q
56 X, C+ y0 L) C* n+ q- `; s
6 . ~6 S$ e( r* X7 o; G5 E7 W! k9 _' N5 o
8 . a1 Y, G5 x [. t& ?. b9 _5 ~9 4 S8 [" i# x- l! Y10; U4 C' B" |; r$ S* }. g
11. Y- U: p) ]" p7 P. B5 Y7 O4 J3 {
127 B7 F( m" ]) h( J7 A# x: g
13 5 |: q( t. A0 D% v% F14 3 x: q( v9 t5 x0 l) j152 ]% q% R. p4 n K2 [. j/ l
163 H+ ?! I" G4 L
17 i2 W# G2 Y2 _4 A18: h/ z0 h, k! I2 ?9 d% H
19 7 H) N3 S5 z; ]5 S! B# K R0 H20- A. h8 z- { l8 H; A
21. F1 F6 e, o" o+ }2 V6 L$ n- i1 y
22 . n/ R3 a! ~+ _7 U4 i- P23# ]* i. s* j4 n
24* k: G) L; \. @) s6 c
25 i' {* Y5 [0 D, r0 {26 9 ?% i: ?5 T' }27 $ M2 U* B0 D R28- J W4 K: s* h7 S3 G1 o
29+ j; j7 {5 k% ?7 _
30 , l7 z- T5 o9 Y31 7 e" S7 o: k# {4 h32( B8 o! p6 L# O% t
33# m2 k O8 l0 m- k3 F- W! c6 ?
34, q: v/ _4 ^+ m% Y* {8 I$ F4 }
35; J: _+ r* h, M. Q2 \" a0 I% D1 L. d
361 E& o( P, |) ~
37/ g$ Z5 |. V7 p
385 o3 W8 C5 r0 ?: j7 K
39$ n6 o- ~1 l& f! I3 t7 \3 @7 `
40 ( L) F! q/ m8 n; g% O411 L7 r) G0 @' q" m
429 p& J& g. W& S9 @; Z2 u) t' [3 [+ E
43 ! ?7 O9 C/ C+ H) a) X44( x0 R7 P$ A/ K5 G2 ~ a6 q
45 # j4 e$ X) @; q+ @" k46 2 }; Y4 a4 l. ~( c9 n471 C; l7 j/ v) U& E) e5 o) z
48 9 U0 @7 g# W, N49 @) E# o5 @+ a& @" [+ \50 w% u6 G6 R7 f
51' z d- e# S" d4 Y* n
52 8 o1 y' F& ~) Y- `- y( ~53 1 ^: u" q7 q% _1 J! ]54 . r+ k7 N/ t5 n) g: |- W5 A. c55 4 X+ \2 ]- A* d3 q( t7 v56! Z3 W* U. x. k6 W1 x7 N
57 9 ?4 s& d4 m/ A6 W58 % v/ N; U! z$ _* m59, Q9 G6 A I, U* n, B% Y
60; D- E/ J! ~7 x1 m
61$ r9 v9 P- _" ?6 L: h
62% F1 d9 A4 s1 R7 U# Z! j* u
63, B9 x6 q$ O& X, |! W2 L# h
64: G- a; a6 B$ N' j
654 \9 s) k& [( D; n1 Y2 x7 t' A
66 - J- I6 N* b( M3 d67 1 p, Y2 P# b' ~! ]/ p% L68( v5 Z% C' N' A `* W
69* Z2 @8 _+ x; U, l; p- y
70 : m; ~+ T! w& k8 i0 M71 & J6 G6 l5 Z, ~, h* s4 j; N72 e# A( Z! z% K1 h5 \, t7 X73! s4 m, B% a/ q0 a$ |, Z
74 . X. e* Q4 T3 [0 k75 2 j6 e" g2 {6 E; W) a76 & J% U' N; ^1 j77% C6 g- }- z( v+ m n
78 , g" O+ \9 c( u$ |9 X79 ! a% Q$ k9 q4 x1 M80 # Y. e, T( M0 U) J$ O6 b817 Y% m. h3 M( I( ?& X& b
82 * g6 X9 p7 Q+ t83/ V, {: i. u) c# x% v1 n' ~
84 2 t1 G$ E, f6 ^85 9 t# {" c& r1 m6 S, L3 M% p. G5 f: [869 A' N2 x$ s' i9 D0 J
87 / ]7 p- n2 k6 H- Y* e880 y7 Z K9 A+ f0 R
89 4 y& J2 v. ]. W/ u7 P! F90( x5 J2 Q. `5 d1 b
91 6 E: ]$ ?6 [) E$ Q" e4 \1 |4 v92) z0 w3 ~" F5 D7 X. ?6 t: w( W
935 D2 X; P: L$ w* X$ R
94 7 L7 G+ f' o8 o# c- C951 l Y5 \4 [. f& x; m' b8 b) r9 i
96 $ T( b! L$ D3 Y) D/ H/ s% E97 ! _0 ?1 ~! g$ F& K9 {. S2 Y988 }( ^6 d9 Z4 q% I" W9 i' A
99$ W0 O# p, Z* k! Y; N
100 ) {4 d3 I3 F6 m, M7 n) v* }( _. \101) C+ c `1 k b t- Y" \- n
1020 `; E+ \1 {/ n) V4 Z$ w2 o
103 + {! ~- j& b3 s2 {0 f% R- Y1041 i5 H% h/ ^ n, x! O
105 1 X# M5 _/ F6 O% B1 [% s106 . h# j8 x4 O6 s, J$ m107 Z# ? A/ |4 h: E
1088 ]" S8 {7 q% p" Y
109* e/ D- C) `0 e( ~2 D
110' Z m6 F# g `
1113 k! ^$ t. O7 `; [( t$ |& h
112 1 p! e, G. f, n6 ^" ~9 {- B! N7.2 开始训练模型 : j" H$ @# u% k& B6 E我这里只训练了4轮(因为训练真的太长了),大家自己玩的时候可以调大训练轮次- ~! d* f) J8 N3 C