% m/ ~& \9 z; \7 s, e' Q# 是否训练所有层3 m7 Y# ]* D; h( `
params_to_update = model_ft.parameters() ( R% }- g% R' A- ^! f- w; z# 打印出需要训练的层8 F# k% }1 a9 n
print("Params to learn:") # N% N, J& A5 [4 r& ]. i" aif feature_extract: & Y+ x+ N2 e! l j! S3 B params_to_update = []3 G) W T& R' S
for name, param in model_ft.named_parameters(): 0 y$ F$ j; j6 O# ]6 z# g if param.requires_grad == True: ! f- v1 ?; V( K9 p6 ?$ S, T2 Q params_to_update.append(param) 1 `! ]- v3 P) C; a% z print("\t", name) ( I/ h0 y7 _+ Y; l- p- q, q5 Melse: , w( s( q, p5 s* U0 d for name, param in model_ft.named_parameters():) [8 K) ]$ o7 K9 k2 g3 n3 p" A
if param.requires_grad ==True:2 q1 }: g( l6 E) m8 x. _
print("\t", name) ( K, w% F& Q* q( i$ D# k2 r7 T+ [ b0 {- w- |; [% N
1 0 ^5 `. t5 {$ Z$ r0 ]# V2" U" y0 n j' v. D
3 ' r" @; P% S ]49 A, j' o+ E c9 Z3 j' B; g5 c p
5 ; P' ?6 j' r6 K$ F6/ j' d# ]6 D0 B7 @
76 M; A- c j) m) R z
87 \$ Q$ r( n, t" H
9- Q3 Q3 g3 s l7 F& V+ E! M
10- ^* ` f+ Y1 T. o% q
11 3 r) C( m Q- R- E8 \2 G9 Z) s5 W1 ^12 5 X# Q' O' n& ?, j9 P0 d! m; I1 f( E; X13 3 t# v/ y7 G# R14% @4 P# i7 R$ \* ?4 ~3 H3 w
15 8 E# Z$ A$ C p* o4 q7 B* k d16: B$ u: j B5 @6 b
17 7 U% x4 p: u7 c( ~) }189 k/ I* U8 P# Y& ~& I% m& a
19 I; r( d) N* O C- b X% P( ~
202 H- w, t6 F6 h ^: q
21 5 R0 x* ?9 ^- B- b0 _8 x$ n22 2 v/ a8 [# E' `5 B23 8 n& ?/ \( T6 e% kParams to learn:# g$ O# ~" P, z/ x) B9 C) D/ s6 j
fc.0.weight, O6 K9 O4 G1 l" G: H" N5 \- a$ a
fc.0.bias( G, R! h4 G; _8 e" ]* u
1 L* U. h0 x6 m+ J
2 ! R7 P: V* _* O" ^8 P+ u t3" h7 j; F. |: L
7. 训练与预测! J) }4 J! v: f. t8 ?; Z4 m
7.1 优化器设置 * \* @3 A! }# H# H/ k; a/ I9 `# 优化器设置 ( B: w# f& N* Foptimizer_ft = optim.Adam(params_to_update, lr = 1e-2): [: ?, M" ^9 p) D
# 学习率衰减策略& J6 M7 E: d! ^* g: j \
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1); }# d1 W- }$ C2 ]2 o
# 学习率每7个epoch衰减为原来的1/10% Q" k& W% h4 i5 j
# 最后一层使用LogSoftmax(), 故不能使用nn.CrossEntropyLoss()来计算 / q) ?2 g& d7 t $ V* W; g& E- @( |9 c e; ~criterion = nn.NLLLoss() + {6 b# Y! K9 n ]2 E5 `; `1 n# \ c" o0 G* y( t2- J0 _% l4 J9 o
3 0 Z2 a# A& F4 s5 e6 E4: O$ v, j; F* E: g0 a
5 c" t$ m+ v+ ^63 ~% F) @: q) d$ X$ s4 Z* h
7. W. Q% Y! X; r K9 C
8 & x5 i, P+ Q5 L ^( q/ K# 定义训练函数 M' S8 [/ j0 e7 N#is_inception:要不要用其他的网络- z5 D5 t* T- A# {) `8 i
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10, is_inception=False,filename=filename): , z$ d6 h) A; A/ R- }3 t5 F# C since = time.time() 3 g) G Y7 V* M( V #保存最好的准确率1 Y+ B: k! @/ i
best_acc = 0/ l, ?5 K( v* i2 y" R) L0 h4 L
"""+ t& A7 d2 U' w3 F4 M, T" H' P
checkpoint = torch.load(filename)( P% M6 g/ g/ ~9 i
best_acc = checkpoint['best_acc'] 0 @# E- Y5 D' l; S r model.load_state_dict(checkpoint['state_dict'])9 v) J9 J. C' S$ A& g _
optimizer.load_state_dict(checkpoint['optimizer']) ) |. S9 t" z. h9 b; e model.class_to_idx = checkpoint['mapping'] 8 x- r4 g. ^; _6 B5 N """ 5 C+ A Z% j: y9 O( p* i) R #指定用GPU还是CPU 5 z9 m3 D0 Q9 j# h6 \7 E model.to(device) * Y% n' m" E7 n #下面是为展示做的. L& [1 M5 X1 z3 ]3 C% ^, ^, F( u
val_acc_history = [] ~$ \7 P: `! r4 E) _
train_acc_history = [] ) G1 j: N! |. M8 C* O) s( I train_losses = []5 K) X. w" {) }7 g {# H
valid_losses = []; w8 W1 ?* q9 X6 ^% p4 I( X
LRs = [optimizer.param_groups[0]['lr']] . w5 k( y# Q( E #最好的一次存下来 ' ^; S0 d+ @0 v% v+ E F best_model_wts = copy.deepcopy(model.state_dict())& F" J% F0 l) y$ R3 r3 A# y
* g0 ~- Q; a* W$ }
for epoch in range(num_epochs):( |/ ?4 K% R+ T9 |; ~. x7 l; [
print('Epoch {}/{}'.format(epoch, num_epochs - 1))# o. A) Q6 g% k! j8 m. g P
print('-' * 10)( p; V( k8 N% ~- o) J: ^# e, ^
$ l0 k" f% ?9 P
# 训练和验证 1 _. ?+ G6 ~7 y$ F7 Y7 l7 q' [$ R7 z for phase in ['train', 'valid']: 5 H0 S$ ]! J$ y. `* \. |! j if phase == 'train': 5 v0 D! {- P' z% N model.train() # 训练 9 u9 V( T. m( c0 S else: 0 u- y! _7 _7 l model.eval() # 验证 & Q2 [$ B1 @) M. G Y ' [6 S$ v& W# D3 l8 |8 | running_loss = 0.0& f% _/ M# F7 T3 u
running_corrects = 0 E9 ^0 N B" W( G# ~1 |3 s5 \
2 L, h# h. p" ^# p; ^' u6 E T # 把数据都取个遍 9 e& B1 d9 p7 N9 e& e for inputs, labels in dataloaders[phase]: 8 |$ @ A+ ^$ D. F' c4 a #下面是将inputs,labels传到GPU - s# v* i$ t, P" V3 c inputs = inputs.to(device), F! |! E) G5 y( P& K$ H
labels = labels.to(device)( c% ]1 r. n5 R
) T1 y* h+ G# D+ V; W+ n+ Z9 S8 l # 清零 - m, Q: }6 k$ S% }# U4 `% g; W optimizer.zero_grad()" R/ K# f( { u0 `4 x- \
# 只有训练的时候计算和更新梯度 % X( N7 ?9 p% G' C1 I1 ^ with torch.set_grad_enabled(phase == 'train'):$ g% f% t, k! Y& [ P1 Z, x" Y
#if这面不需要计算,可忽略 1 W- ^1 q5 }# c, y5 X: Y if is_inception and phase == 'train':7 _+ N7 i9 w4 S: S3 V( z
outputs, aux_outputs = model(inputs)/ a+ Z' Y0 h/ B' w h: f, P
loss1 = criterion(outputs, labels)0 h9 U% _* d3 U. r0 b
loss2 = criterion(aux_outputs, labels) * i8 x$ Q3 p& R6 g loss = loss1 + 0.4*loss2! ? W# ^/ s# s% z1 W! T2 f
else:#resnet执行的是这里 & R2 Y2 h, ]& X2 d |4 F! |/ k# ~ outputs = model(inputs) 4 E# ?& [' e7 p7 s0 G3 E loss = criterion(outputs, labels) 5 \! p" x& I, \4 X( k. N/ w; b1 t2 U: U& Z1 O2 x- ~
#概率最大的返回preds, d; q& H9 O2 o8 O+ m
_, preds = torch.max(outputs, 1) * [7 @2 i; Y! w7 t2 ^ ' p( H8 ~$ L, r! } # 训练阶段更新权重* y7 Q7 g5 j* s/ r9 x0 P0 J
if phase == 'train': 2 }7 ^* X- ~% T+ L loss.backward() ! `1 y6 H) u s9 s4 ]1 ? optimizer.step()7 }2 i" U. P( S# K) A) c
. X( P6 V& w/ h- B! H$ D; k/ K$ P
# 计算损失, Z7 b9 n: M, s: p1 Z! B
running_loss += loss.item() * inputs.size(0)% b9 ?% }" B) ~' |5 U
running_corrects += torch.sum(preds == labels.data) " f$ }( \# w( C! d) U ^& O8 a+ T0 ?) W4 N5 F7 X3 q5 _
#打印操作 2 j9 Z5 K- `, Q; G# \3 h m% q3 ] epoch_loss = running_loss / len(dataloaders[phase].dataset)0 T5 W( g- w- W [& W8 Z8 T& }8 d3 h
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)+ y7 S. C. u1 {3 W# p) }8 N& Y/ J( l* v
% b2 J6 o# C% C! @
8 v( J2 I- q# {# O- N time_elapsed = time.time() - since6 J/ y- x& D+ b' V8 s$ _" F" v
print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 0 G' U c, p) _5 b# X, T9 R* t3 v print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) 3 b6 v4 H/ h# i5 C- I/ j% ]0 W" }# y" j+ A
s) n. l4 h; e7 j) U
# 得到最好那次的模型 + i2 g, o' N- _6 C2 J' H if phase == 'valid' and epoch_acc > best_acc: 8 h- P; M" ~* e Z best_acc = epoch_acc' f2 p% ?$ o2 [, @+ {
#模型保存 - s8 V- r ~0 s best_model_wts = copy.deepcopy(model.state_dict())/ c( Z# g" S2 F$ r5 {. c1 \
state = {$ `) W `8 `- ]9 G. ~1 R
#tate_dict变量存放训练过程中需要学习的权重和偏执系数 . S7 e8 ~5 c0 _9 Y! m 'state_dict': model.state_dict(),7 ~$ `" x7 Q: f8 Y. A
'best_acc': best_acc,: ?) O/ Y1 h2 P- T5 q1 ]
'optimizer' : optimizer.state_dict(),5 H6 ]9 z3 a9 `
} / M0 B4 B; N9 _5 {* |9 G- r torch.save(state, filename) + A0 O& v, A4 B* I/ }' T if phase == 'valid':7 ^+ G3 A; a' @5 l9 g4 z" d! m
val_acc_history.append(epoch_acc)! f* R& n6 c/ ~8 C7 s; b
valid_losses.append(epoch_loss)9 R1 @) ]( Y6 {/ A+ ^% ^
scheduler.step(epoch_loss) ; b/ ?, c# F& ~5 M' J7 J4 N( G4 c4 c if phase == 'train': " j' {+ H- o* E. h train_acc_history.append(epoch_acc) / B" m$ ^( G- d& z. A" p train_losses.append(epoch_loss) 6 ^. Z; R4 q! B& A* P / R* K% S V3 I, S# Q print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))/ P/ c$ n c7 r- m' K1 g! d, ^
LRs.append(optimizer.param_groups[0]['lr'])+ j0 F+ e/ D ^0 h) J3 a
print()4 G, Q6 C. q/ @' l8 R
' V& a5 H0 Q- Y; R
time_elapsed = time.time() - since. H* f: D$ e1 P' Q, Q. s7 M
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))6 B! o, y% o: n
print('Best val Acc: {:4f}'.format(best_acc)) ; t/ z+ Q# f: b7 O $ Z2 c: W4 h: ^7 W& z# y- Q # 保存训练完后用最好的一次当做模型最终的结果% S5 r+ q1 F' }' M; [
model.load_state_dict(best_model_wts) $ l, t3 T$ i$ F: y/ D9 R return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs 1 s, _( k% B2 z, E% _& i5 m
; @ E; h/ f: x: ?7 {) e
3 y$ O, O* O2 b% C- L2 R
1 $ ^- l) z0 z# v2 6 w2 R* b9 k( O+ I1 b v9 ]3 ) B% D0 T$ n o4 - [, S: ^" t! D# t5# E+ v$ w' E. P1 c# x7 Z
6 * e# \1 v- _ S/ v, F71 ` o* D! O- \
8$ o/ ~6 y6 x+ w' a/ H( }2 R, F
9 2 B- D3 u+ E* v' w% v10 ' G/ j! }- D' Y" J5 f" F11 5 r+ D2 d9 b3 q' S* O# T9 R1 c3 {12. P9 }4 v. s# o+ z8 ]. Q5 L
13 % l' Q7 U+ m7 C$ P) e4 _# T: J: [) v. e14 ; c4 ~# t% Y, P# L- ~) G& [15 0 r8 V( c' q1 _# K* n! j9 z16 3 n( p7 h8 l( x$ ] J173 x: D/ _$ u+ A3 \: ?- @
18 " }4 R! Y% k; J j E. r19 ( D% G- Q! _# l# y# R9 d20+ X2 r( h- K0 ]
21; P% {4 t- ]" w
22 ' Q: o4 o8 T W23- f c9 i' @6 ^$ f9 d" p, r9 j
24 ~$ Z: i. E7 ?0 @0 g/ E255 e$ `. X6 {0 j U
26 6 l* }% X2 X9 y4 i7 @1 k2 M/ [27( H% `' ^# A$ }; _' s* g9 ? Q% C
28 6 w2 W2 ^0 ^) C29 _2 y+ E" o/ G/ X9 Y6 ~6 G
30( M9 O& k1 U- `/ n" R( y9 ?
316 p7 w% ?& U. E4 o
327 n6 `7 a* t8 ?* k) u- s( r2 @) `3 ~
33 ; I$ ^4 J" y8 n f345 x7 }0 u+ H9 u2 }
35# R$ g6 u( D+ |* K' O# J$ ^
36 ; Q& t5 z8 l( `9 n% |37 0 F% s8 Q4 y3 H: K, ^/ [38) Y" l4 `! S5 P9 v. s1 _
39 3 [+ s, Z% t5 S. Y7 D4 B' Q- d40. J' T) \ H. K+ M# M% l, I1 j) c
41 & j6 n4 p' V/ C0 e/ a0 f' k W: L42- d! {* \; |, |3 e2 ]# K
43* {+ E$ C1 Q/ X
44! d n* x0 y6 S6 P7 ]$ |
45 3 T) e' O3 H: ?4 C, M46 & z1 ?9 }3 N% A; N47" t! V9 C) [8 Q4 O" H5 K
48 8 W3 w; v2 i; _+ m& f8 U49 + I5 ?6 G# M2 h- K% Z50& `; R. ~( ]" Z& i) w
51 ! f' y' a' q2 |; I0 C52 3 E' ~/ n. o( a4 C9 a" j53 . q" @7 E6 a Q) A( k% D54 : o" t' k6 j" \- t' \6 }55" ^% u) t: M# D1 \* z( D
56 # V6 m; ~ W1 N" I57 3 U9 ]) f; ?; m- ]' U( F- T58 , v; D2 w( \2 D2 `1 ^: _59 * `+ `% I* R) S3 u. v" ]60 ; b* R( ~0 K& v" @" G61! _# x5 Y' \0 I- {
62+ j K( h2 N; F) |' u
63$ M- y. `' |9 A' X/ W& I- e1 G: o) U
64& R* i1 B4 i+ m# \' y, [- i
65 : v* w8 |# l9 h6 U( ~) o/ N66 , }9 }# U7 P( U3 |% W T67 / c2 z; K# x3 i" v9 x) i68 ' \# z% \& W+ P+ e9 G69 + T \% J0 s k% o4 U0 ?6 k& y70 0 C8 h4 h. ^( ]( _0 `/ r71 . l# V+ W1 E" I, x, }. A5 X: H72 # l: l; J' D: |" \73) W2 t+ u# R, U- j
74$ Q) \! u$ L% p
75 c- \- b/ V9 q; a. \+ E5 m
76 ( `/ p8 ]. N+ K2 H77 8 p! V$ I, f" d C) B! Q0 B78 ' x1 }- `+ i5 q) h$ A+ R& S* H79 1 s( {- k4 _6 n% B80 \6 g- N6 C' h8 T) _1 C1 v4 ?81* F8 V8 l4 p) x; s. N4 G
829 x1 X4 R; u) r1 r$ [4 w
83 ) j* {% j) Z* T84 8 Z! F- C( F X" W85) f: q, H/ ^4 s
86. n: i; M" q2 \; e8 t. h. H
87. |+ c" o0 N# N* D; n: K1 y
88& ~4 l8 f. |$ c1 }! }, v
89 + a0 G+ u2 `9 J90 ! H" i% l/ g: _) f* ?. i, e91 # w! L" F" v& U- i92) A9 g/ p* J9 }5 X! p/ Z$ J% c8 a
93) m3 j, s7 |- p/ d
94# ~( j7 Y. l" G' ~4 I/ H% c
95$ F+ ^& b$ t& V" D# J
96 $ B4 P- ~) l8 }9 P7 I' f: L6 n97 8 d0 K/ f4 T2 H5 v% h" C1 i98: u5 r- z" l+ @( y
99# Y# Z$ o0 ]7 M9 q z
100 , y" K7 r- H( r) J& f* V101 , Y2 K( m8 d& ?2 e9 R: E1027 B8 C! J8 y& g( q
103. ?3 L2 k) C( e9 f+ t: G
104 ' a" L* c# A0 ?. f105; l+ X% ^7 A" o* e' U4 u
1069 t3 b. }* I# w' {
107 ; V6 W9 V, _# r! ~/ O+ Y. P108 # p; L, j; d% j1090 S" Y5 h9 C; k( d5 N
110% l S8 ]; M$ L- p% ~2 w! l$ B2 R
1112 L% d9 z( l& e, h
112/ S4 ` D( K# I7 y" }
7.2 开始训练模型 0 {' C( l- {; g) d+ o3 [+ v我这里只训练了4轮(因为训练真的太长了),大家自己玩的时候可以调大训练轮次4 r2 B/ g' T5 q8 k7 Q
( ]; L7 U: V7 ]0 B
#若太慢,把epoch调低,迭代50次可能好些$ z# V- Z1 R: ~& W/ o3 {" F) A8 R
#训练时,损失是否下降,准确是否有上升;验证与训练差距大吗?若差距大,就是过拟合( p x) L' {% x8 U% I) _4 T
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=5, is_inception=(model_name=="inception")) R) p4 x. E2 H9 P8 ~ - [% X! n7 g5 I1& Q" ?$ O+ X6 a) M8 X, r, u- H4 u
2! X% O" X+ J7 |) f' y1 p& j
36 u/ @ W3 v# F; C
44 R' Y2 s" o: `0 B( x- T
Epoch 0/4 : q: y" A3 Z- Q) z---------- ( N) V6 v4 g$ | v" \5 P' CTime elapsed 29m 41s 3 P) i! A1 P( R& z3 K" g/ U& h4 Ktrain Loss: 10.4774 Acc: 0.3147 `* J) q( i% x
Time elapsed 32m 54s # S" ]) ~$ C- j* Ovalid Loss: 8.2902 Acc: 0.47194 Q5 k# m& w' Y
Optimizer learning rate : 0.0010000 & t; H, }3 T% A8 Y% i & @5 W' p) Q! O/ B$ A' gEpoch 1/4 + `. t3 K$ o2 j5 [: r8 R s----------0 V& s& u4 a4 m1 b9 i- M
Time elapsed 60m 11s ! r7 n- w3 y6 D, Wtrain Loss: 2.3126 Acc: 0.7053 5 l4 `+ L% k" d4 ~/ F( O5 VTime elapsed 63m 16s $ X, \9 Z4 O1 r a) f1 J8 Z8 jvalid Loss: 3.2325 Acc: 0.6626 3 y$ d) S3 m4 \5 p" ?; v! IOptimizer learning rate : 0.0100000" f9 \, a5 f! G& k7 J, U8 }
* ~, Y: f4 c8 R
Epoch 2/4' i7 E' y% Z5 ~. V# e
---------- - Z# J Z& \" T4 k2 K9 YTime elapsed 90m 58s! u$ u( w( ]8 X
train Loss: 9.9720 Acc: 0.4734 6 Y) j5 X! |! p0 v1 R- F% K/ LTime elapsed 94m 4s8 d/ s# D6 X' a7 o
valid Loss: 14.0426 Acc: 0.4413 & |* U0 D4 @) KOptimizer learning rate : 0.0001000 % t! x! c; k' S% \5 d, k. o6 F % V$ p, B. m4 G: @( K* eEpoch 3/4 " o' w9 f( w: e& a9 S----------1 y# t3 ~1 x# R/ P7 p- @+ r
Time elapsed 132m 49s ) @% a- ^! }' O4 `+ {$ ^5 q- @( Xtrain Loss: 5.4290 Acc: 0.6548 & W- w) E: j! p( TTime elapsed 138m 49s " |/ U/ G# o* d* l6 t' S( pvalid Loss: 6.4208 Acc: 0.6027) T$ ^# _/ n" D, ]4 X
Optimizer learning rate : 0.0100000/ d0 h, a0 L, G4 v1 s. i
. r! m- c& N$ ^( U
Epoch 4/42 z- }& @+ O6 t
----------. e4 S- h' t0 P6 L
Time elapsed 195m 56s " m0 n" m& x: x' gtrain Loss: 8.8911 Acc: 0.55192 s/ N: S0 F( f$ X. L; B
Time elapsed 199m 16s! a2 y4 {' d; m1 Z2 L- z* _
valid Loss: 13.2221 Acc: 0.4914) m9 c. Z$ N- y/ U/ {
Optimizer learning rate : 0.0010000 " r) d3 r( p. B; \# i5 a, R# {5 x3 j' W0 H8 s) A& ]
Training complete in 199m 16s 5 z l+ p7 G/ o6 U pBest val Acc: 0.662592! W3 E$ Z6 K; r+ v0 A% i5 n
! [/ y; ^7 s$ v$ u$ {6 z/ t
1 p9 D6 P z: V
2 3 W/ }1 H* u' L# S3) X Z/ t+ m+ W5 @/ d3 s0 D
4 - J6 W! y2 D1 ]4 ^3 o4 z( p50 j) f3 A: d% ]: b2 K
6 D0 m3 ?% g; @, k% i( l
7 - B2 t/ d5 p5 R* W2 \$ d& [8 . r! o' O/ ?; a0 j9. f; B S4 ]2 N
10 g5 P0 h/ G/ e11 . M. V* ?% s9 w, J4 |6 G12# t# H# B" ]& ]- r4 ]
13 . H5 a6 S* S) O3 q0 o14 4 O8 M6 P# L( I% }( @% _15 6 h) d/ I2 a& S# y, E1 i( Z0 s$ w16 8 z6 ~/ s) x- B; l, @$ X. @$ K17: v9 c6 x, n& n% F; J0 R4 ^0 x
18 . @$ b' I( q8 {190 d/ \4 }8 i, l) `% E0 P
20 8 R( D% r+ {3 J5 f6 {# g ?21 . J0 h6 T0 }1 Q& O- V8 f22 ( q6 V1 M7 P0 p. R6 m4 ^, B23 " b4 L. W0 s0 ]" P! V( I3 ?! e24. c& I$ C8 t# b9 C7 f$ I+ ?0 t
25 # Q1 y1 N1 l5 J' k3 O. z26 * M& x7 a- ~6 a) H6 k3 m27 % W) m. S" K; H28 ' q7 I. Q$ ]0 Z; K$ K29" R* a# q0 o6 c4 W% q
30 5 J0 f, a3 l& {& {: L0 B31 ! F! ~' u& w2 T% n1 Y t% Z+ r32 1 h1 h7 F2 q& q8 A. ]33 * Q7 G! k; J7 p* \- W2 f* H" f34 : M J; N0 P) q35: U. r; @ |+ T
36 ) q4 M" H8 I7 I2 X) x+ V0 x8 R4 h3 M37 g- Z" M: m1 ?7 |8 o2 y% L/ r$ }38) T! t2 B% \; e6 m8 |3 o
39 ) q. B! g \. y, G, X4 m0 c40" N! l& L1 K3 w( f! C
41 : S, m1 K! ]2 w( I, ?42# O( A+ q; t. \" {7 r) B3 c
7.3 训练所有层: o% H: P6 X! {; e# S
# 将全部网络解锁进行训练 # s C! H5 |2 _/ T4 E2 Hfor param in model_ft.parameters(): " x8 _1 I8 v0 F. K* \. @ param.requires_grad = True 1 K4 S7 J) \1 ~5 e2 l" n3 L 6 R/ G) ]6 _, F$ Z% J+ V2 S# 再继续训练所有的参数,学习率调小一点\ 6 ?' ]4 C. u6 hoptimizer = optim.Adam(params_to_update, lr = 1e-4)2 N. Y6 M6 H; p D
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size = 7, gamma = 0.1) 6 H4 O9 i' C8 J8 ~: G7 Q 2 n1 @* R: X/ \. F# 损失函数 / ]& }) x$ j r2 K$ m2 v. Pcriterion = nn.NLLLoss()' S7 e9 T& x9 m5 W- p& C8 h( S
12 Y; [7 G P4 n7 W b4 T8 {5 k3 _
2. _2 p9 P! \0 c
3 % ~9 t) q6 c) h1 ~# K4 ' c: y O5 g( H. i$ F2 L) x51 t# D8 B2 t9 X( X
6 ; {% X5 Z: m. |& ~2 b78 z2 o. J0 f9 R* R. ]/ v! _
8 # U/ a$ u5 I* A K4 u* F0 Q) n9' z8 O$ G! H* i5 G
10& @2 R/ T4 R/ Q8 A. F" d: L
# 加载保存的参数 , X) M) C. o2 \2 b/ v. t# 并在原有的模型基础上继续训练3 m: B& c+ R5 p7 E1 f0 O8 E7 Y
# 下面保存的是刚刚训练效果较好的路径4 I. B( B2 C! j# M/ [* e' k M
checkpoint = torch.load(filename)/ q5 q7 z$ h! V# h5 ?
best_acc = checkpoint['best_acc']$ U9 `3 ]; n& A( C" _; E, I
model_ft.load_state_dict(checkpoint['state_dict'])& E# h, @9 Y. h& F( c& Q
optimizer.load_state_dict(checkpoint['optimizer']) 4 ?" H: n0 ?! q11 r1 J1 A6 k I) J9 m# ^0 x
2" T' N$ K" v: _+ ^2 h; ]$ w7 H
3: E+ m, o( o% g+ N
42 B8 C7 F& ]' }, m( u& }7 \* f7 H' u. P
56 B! b! p; ?0 r5 s. s/ e) }: R
6( {6 P& d9 c$ q$ K; u3 X8 C$ ^
7 # d$ r$ }) s; S. D7 s开始训练. [, v% I8 _( r0 p
注:这里训练时长会变得别慢:我的显卡是1660ti,仅供各位参考2 d' r/ [+ K! U8 K3 p6 y, w7 {: B) l$ y
9 z& d& O+ T1 q- U: B' \
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer, num_epochs=2, is_inception=(model_name=="inception")), S% H; Q: ]* c4 ]+ T1 g
14 |6 g- F/ ^7 V
Epoch 0/1/ W+ A# O- F, f% G- r7 w6 ]/ N
---------- 5 p3 O: g- r% w7 }- y- s& PTime elapsed 35m 22s , h- C3 {1 r" T/ x) a( z( ?0 p5 T* R, `train Loss: 1.7636 Acc: 0.7346" O+ V! j, G' ~9 j& |! l
Time elapsed 38m 42s * z! K* W k7 O/ l" c! vvalid Loss: 3.6377 Acc: 0.6455" \4 i! {2 _# k4 A
Optimizer learning rate : 0.00100004 g f' X0 m; C3 z% P4 [