4 M! C* p7 A$ R3 c7 f$ p6.初始化模型架构 % ^$ i/ }- ^6 F! k9 Y% f步骤如下:6 Q- ?# R5 \4 { d
) F6 R, e( T3 K. P
将训练好的模型拿过来,并pre_train = True 得到他人的权重参数" Y2 G* w0 A7 ~9 ?
可以自己指定一下要不要把某些层给冻住,要冻住的可以指定(将梯度更新改为False)9 A: P2 Q0 ~1 j" O$ a& e# Q, ?
无论是分类任务还是回归任务,还是将最后的FC层改为相应的参数 : j3 K) a# c4 M官方文档链接 - U$ f N6 O# ^https://pytorch.org/vision/stable/models.html8 q! Q& F% V2 M9 i' W% _. o' X
: ?0 x* p1 m6 F) s1 \) I# 将他人的模型加载进来& m/ J, D% u0 }5 I
def initialize_model(model_name, num_classes, feature_extract, use_pretrained = True):& T! n# p/ `# h! Y7 z$ q) L( T
# 选择适合的模型,不同的模型初始化参数不同% f$ C! X8 z" Y
model_ft = None- c- ?! b4 g% x% X* \5 J
input_size = 0 3 V. Z0 _2 s6 A9 E3 s4 T! c# E, K5 }0 e& V6 M o) ?) u
if model_name == "resnet":" z' d8 I( `& c( k. u
""" * @* g- V, r6 N$ E" ^( g Resnet152 6 `, Y% m- o7 y5 I. `, Q' a$ A" T """8 @1 c8 Q/ t/ s8 g" w+ ~! d
3 F$ W% ?5 y8 o; o7 ~9 I. s/ T # 1. 加载与训练网络, h+ W7 b! `$ @3 Q; v. S
model_ft = models.resnet152(pretrained = use_pretrained) 7 ^% S8 E+ Q' `% D2 i # 2. 是否将提取特征的模块冻住,只训练FC层1 t6 H$ Z! i; W( y
set_parameter_requires_grad(model_ft, feature_extract)" e7 l5 F- E6 J+ u9 X' }
# 3. 获得全连接层输入特征# O: F' F r3 g+ Y3 l
num_frts = model_ft.fc.in_features+ M& C6 _$ K* C# `* e$ T- j4 T# f/ P
# 4. 重新加载全连接层,设置输出102 + q1 F& t# m7 Q' a model_ft.fc = nn.Sequential(nn.Linear(num_frts, 102), 6 ?) ^- ]5 P- }2 O: u" W# Y nn.LogSoftmax(dim = 1)) # 默认dim = 0(对列运算),我们将其改为对行运算,且元素和为1 , Y) [, X/ u a w) ?1 o9 [ input_size = 224 $ _5 L0 C( q) N! q" \ 9 P5 w2 j* G- C5 d0 W7 z elif model_name == "alexnet":0 D; t" i& c# Q# i
"""* `" t1 C5 N. {+ q6 {
Alexnet ) G1 m/ z% Q' A0 L; L """ ( {9 t3 ?1 u9 [ model_ft = models.alexnet(pretrained = use_pretrained) 6 l) t# b e- ^! E3 c, j2 Z/ N V; { set_parameter_requires_grad(model_ft, feature_extract) 3 C' V& d: d! G& G( R+ _; Y: h! n: m
# 将最后一个特征输出替换 序号为【6】的分类器 + Z7 A! L* g$ O0 s5 Y' j, R num_frts = model_ft.classifier[6].in_features # 获得FC层输入 $ y$ {# u9 v7 j i/ n& w. d4 ? model_ft.classifier[6] = nn.Linear(num_frts, num_classes) - g& z V$ n& ?6 H, ]' }- B# k) x' G input_size = 224 + ]9 f4 o- U Z2 b3 f& M. d5 o/ r ; y" l' `% j9 k# P* W0 [ elif model_name == "vgg":0 Y: R! `. r9 P _) o9 k i
""" 9 G. O: |. J) x6 a$ _ VGG11_bn ! A* Q. R3 s5 R* D """ 3 @: s$ G0 [, ^/ w: m# e* w& | model_ft = models.vgg16(pretrained = use_pretrained) - A6 d. X. p4 c8 Y9 j$ @: w+ v! {% C set_parameter_requires_grad(model_ft, feature_extract). N% [ \. g9 y9 L+ v
num_frts = model_ft.classifier[6].in_features - G# u% @. |0 `* h model_ft.classifier[6] = nn.Linear(num_frts, num_classes) # N% ~) f% F3 ]1 y5 `. |7 Q input_size = 2245 ^4 S8 E d2 }+ E9 [3 f: ]# ?
. `3 Y Y, B$ n, [, X, B5 H6 }) u elif model_name == "squeezenet":1 B; C7 Y5 ^- y) N5 V: M
""") q3 q# n( M8 H. o4 Y- \: H
Squeezenet ) v7 p1 F, P h) h2 v; [ """ 6 K F) J% M) {, B8 V" _. y% `7 v model_ft = models.squeezenet1_0(pretrained = use_pretrained) 2 S- M$ l2 p* f2 \$ f' W1 m! l+ s$ T set_parameter_requires_grad(model_ft, feature_extract) ! e( a2 S K2 x& { model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size = (1, 1), stride = (1, 1))5 h7 J+ s8 [* s1 B, V, x- K
model_ft.num_classes = num_classes1 j) @7 z1 B* m) W, |
input_size = 224 ' c) S" j1 a4 F; u( W : P- U: g8 H# N0 _ elif model_name == "densenet":1 A( I9 e! I: c) O
""": F& J3 T' p5 j4 q
Densenet X3 L: p$ M: a5 Q& o9 R5 K """1 H- e% i% Q9 E6 N" h1 z; T. a
model_ft = models.desenet121(pretrained = use_pretrained) }2 }# y B. F" K7 s$ ~9 w2 V
set_parameter_requires_grad(model_ft, feature_extract)5 i" ]5 s# g; a Y. [
num_frts = model_ft.classifier.in_features2 a+ R5 }1 E: A- k; C
model_ft.classifier = nn.Linear(num_frts, num_classes) - y) u( P4 O7 w; q! ?9 D input_size = 224 % p( G( ?# ?0 b: T4 I; T& n3 m 8 @9 [: g: `7 A) N& s elif model_name == "inception":4 L' n \& Q8 ^, W- O: v, b9 B$ a
""" # F* {" q8 I+ A Inception V3 / u) [5 e, E4 m0 S( f+ S) ^ """ : t& W. E7 Q) |1 R' T model_ft = models.inception_V(pretrained = use_pretrained)$ V& s; i$ W) [7 ]
set_parameter_requires_grad(model_ft, feature_extract) * @8 Z2 N. P5 [* ~0 Z4 G! o' @! E- c0 E; V& ~
num_frts = model_ft.AuxLogits.fc.in_features: U: C, L' @0 |( m) Z9 r
model_ft.AuxLogits.fc = nn.Linear(num_frts, num_classes)% h; H1 M6 l2 b2 L j) t1 X
$ p* a+ }7 B% B8 b; d F& s
num_frts = model_ft.fc.in_features0 E4 s' e- E& p- f1 t$ T/ W5 r
model_ft.fc = nn.Linear(num_frts, num_classes) 9 i! [: T, t. g# u# W7 \! X input_size = 299# G% s l" I; o" I" q
6 E! z }$ i* p; M" P else: 7 M4 E) c9 K" y3 } print("Invalid model name, exiting..."); g2 y- t9 e9 X% |
exit()" s" T0 A- W2 U3 o
& J" d! N5 s2 G1 i' `. d' r
return model_ft, input_size ' e( K9 E" j& e+ K9 ]7 L3 j8 M ' w: Y. H. X2 A2 j/ H1 1 i: ^" w% A- n9 ~4 M$ D1 y7 ?9 K2 3 m: ^! _0 `1 g* }3 w6 _% o! T; X3 3 F0 S5 d& N) ^6 }4 7 }- F9 [( ~, @8 _) Y2 T1 b8 D1 J) n' e52 ]# m3 ^; C& i4 r4 {$ m) X( g5 X
6 ) n/ q- f" m2 Q6 h7 C7 & y8 s2 l, D g$ R8 ( D9 s7 o! ], X; P D+ v: ~9& t! ~# A I& y ]$ x4 V
10 / T m7 H$ ?' M11% p" a8 I8 h c' `6 q& v
125 F h: S# b+ z1 G" `+ e4 o
13& R, s6 J6 `$ e. l
140 ^# r( U4 Q: k7 f3 m1 ~! t
151 l; P5 h" p% j5 o2 M
160 p. n3 G7 g' M* h
17. K8 K, Z' H: ]8 c
18 6 ]1 c7 N8 l- R; X' u19 " e+ _* f6 Y9 q8 n209 O) L5 h$ e/ ?+ V) ^) \/ ^$ l+ U5 @
21 s. h7 L/ S6 C' F9 Z
22 $ k7 B2 Y! P7 e) E23# h# t2 [! a7 O. K) O8 j& w2 A6 g7 j8 x
24: Y* j+ Z4 P, A
250 B# k/ O; E0 w% P3 K- W
26. N% Q1 L, m+ M Y& x7 i
27( a, _0 c$ @9 O1 n4 H, n% b
287 `7 z' U0 p7 I k, N
29. H: M+ Y/ Q$ B& \
309 o2 H3 o$ U& W- E8 U
31 9 ^( m9 s1 E6 }: [9 G8 E+ K32 $ @, v" {+ Q$ J3 T: {33- V' B: P7 o9 u ~& U. e
34 ( T6 m2 C; x" i$ L/ k35& M: o4 p# Z, G V6 A$ e
36 s" J3 f. l9 ^9 D# U
377 h& R9 H. `2 l* V" S
38 ) q3 _( ]& e0 M; S T39 # }1 T! o2 X/ g8 R403 U- X6 P$ e f$ I& Z9 J8 ^! x$ u
41 1 Z' C* ^" w/ f6 D42 7 ^ t1 u! o z# \. W43 ( J0 v1 W) e S! J- p( [44# L7 t6 y; n6 W& t2 v, q
45 & a; @; M" k; K/ E/ I; [& o" Z46 0 D( d# y: ?2 U1 V n* {/ M47 ( W" }) U2 ]9 I0 z+ W( r7 r/ O480 \) H5 u$ ?% y3 S. ?% f
49 9 @8 H9 o. L9 d+ z503 v5 p& L, j) }8 n# Q. x o9 c# g
51 $ V& {) X/ G: K52: a" z; I1 h! ^; W( O3 r
536 |% A- g+ _) _6 O
54: b( U/ A1 }7 a/ ~- K, u7 \) G
55, ?, s1 }- Q( ^; ?
56 0 f5 }0 b; z* }$ E57 . H* H; ^. k% ~- K- V58- w. z# a) H1 @/ a9 z) e- T6 g
59 4 k8 h# L( X6 Z2 m60 . u' o5 \8 g7 z5 C8 U61 # h _8 O7 z8 i62$ J3 t+ I$ S7 j' B6 J4 ?
63. e9 t: |+ d+ A* }
64# M9 Z: w1 L" q d( F2 X
65! j/ q) x6 v( x" H) U2 P
66. b4 y" A) w* d: M" f; Z8 @, d
67* h m& ]4 E/ d
688 l% g/ E# z- C; q7 c
69& m* F$ E0 O6 Q/ @
704 v6 z- |* D! z
71$ A% K0 k2 K" B! C, a1 u
722 M |+ l1 r; A( l& n
730 L: d0 ]5 _$ `: e& e/ K
74: f% z* h" w/ ~" V
75 6 @ g2 r; ?8 H H$ R$ J76 7 [! r; Y5 J& |' y# s% Z! [; v1 V77! [# E( i3 J8 E$ E; k6 o: j3 Q0 V* L
78% c. V8 B; C- R% W
79 ) A4 N$ @" R7 b80, d9 `4 X6 D, w9 o4 z9 J* t% A' Q
812 o n0 E6 y* B8 ?5 h
82 8 F: m# x- N# X0 {# J83# W( w" T; w, G9 W
7. 设置需要训练的参数, L( ]/ ~) a1 w* u: u
# 设置模型名字、输出分类数0 \* m# ^) ^" |5 I9 b' J. U/ _
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained = True): w! L4 u" B+ o z
# ^' p' \- c. ]# GPU 计算 % \/ ]; o+ V# _# ~) ^4 h$ Emodel_ft = model_ft.to(device) " s7 }4 x$ b" P$ b9 B) Z9 G. R6 [+ r4 g& \; P9 K1 W9 t
# 模型保存, checkpoints 保存是已经训练好的模型,以后使用可以直接读取+ y# C% O3 w. j2 [; b' u
filename = 'checkpoint.pth': z9 `0 ]$ K, w
E& {* R6 B5 C; o, R* x; d- Q# 是否训练所有层 ! f& I* r! b$ b- R+ O9 Fparams_to_update = model_ft.parameters()6 g; k0 c; ~9 u) V) W! M
# 打印出需要训练的层% D' Z3 x* o( C; P8 f" B
print("Params to learn:")0 d$ V; L0 B n
if feature_extract:9 F1 _# w4 ?7 i' R0 e9 H3 x
params_to_update = [] 0 `- j2 I. x {2 e for name, param in model_ft.named_parameters(): % [4 t5 J1 }% p6 O if param.requires_grad == True:- B$ E+ d U. M5 }
params_to_update.append(param)6 ?$ H w8 x4 Q( k9 J
print("\t", name) , J- Y1 R0 B( w; ?, p1 ~- i7 pelse:. R5 q( S. S8 e5 |' `+ f: z. h
for name, param in model_ft.named_parameters():2 [3 I7 t6 ?) V* ?# k& C
if param.requires_grad ==True:2 U) s+ C' i8 w4 f4 g
print("\t", name)' H) D. V' c' M- K1 C9 k
. O( f6 z6 W% `1 M# S
14 Z- e& Y1 h5 G
2 % J! ]2 ]& k0 R0 [8 y( q3: Z4 e1 g" w: P8 p& @4 A: w
4 9 X* u7 ?8 J8 w6 H8 t. q, Q5 0 R+ @1 m2 k7 |6 & m" @% M1 [: J2 H, A/ U3 ?; @9 a; C7& a* Z7 B. N! j1 G M+ d4 ?
8 6 b& Y" V' V* w, ?+ ~9 # |. ~2 I7 M; P+ w$ _: c10/ s. u4 D A( f, l
11 \; u1 b: F l3 I1 V! t# T( @127 c4 _! v0 ]4 C8 N, k# P$ Q
13 ) `6 B, h' w) L$ y, [) s9 e14/ ]0 Z, m/ k6 U& r7 o" e
15 U2 {- v' p, l/ q Z
165 W3 v7 S/ j8 u% Y% T
17 " w5 e6 d6 d5 S9 r& v4 V8 `( c18 ) m: T! P/ V- ~: w3 k19 R, g) c0 ], ^2 @7 j O
20 $ P$ L( b4 X0 ]/ @( A/ ^0 s21 & C1 }) |0 f" q5 k1 K22 8 l p0 \4 W7 _" q4 Y23 " ~& O3 O. Y6 U# aParams to learn: ) k. y4 M" M1 P$ x# h" F' K fc.0.weight% |# A! S7 w& c; B) R0 T
fc.0.bias, W7 _6 I) R. L- J' {/ C
1' J, Q# v) ~1 @6 |* @
27 E5 a+ u# Y! r3 R+ N8 S. X
3( R" t% H" F& H& _
7. 训练与预测& }/ B. K3 G3 a' O6 d* O5 F7 O& }
7.1 优化器设置 ( P* z8 }' t. M( u# 优化器设置 6 j$ q/ h0 q' T* p- eoptimizer_ft = optim.Adam(params_to_update, lr = 1e-2) " n. u: @ `8 H9 ~) ~8 [8 K# 学习率衰减策略 - N' i# e: l* Q% s" b* _- N6 n% Hscheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) 8 j1 \. ~ e0 l' B4 t# 学习率每7个epoch衰减为原来的1/102 ?* H) l/ p. R) f+ t, o
# 最后一层使用LogSoftmax(), 故不能使用nn.CrossEntropyLoss()来计算 6 i9 R) L& a* W% e: U" `) _ q3 j% [1 X' K2 C
criterion = nn.NLLLoss(), j: f c ?' E1 V
1 . R- `; d- e( ~* `21 Y O2 j2 ~5 D( ^
3% e/ ~- L' @6 b1 b0 P; G2 } ?4 T
4 9 O; ~$ I5 e! I% j1 R5 ]' C) Y" n51 n, z/ L9 {% Q7 D/ e6 ^) J
6 ; z/ s, ? W9 ]5 j, V2 f* a7 & y3 H$ w$ s) N: K; p- Y8 & E1 {% E3 u2 n$ k, S7 B# 定义训练函数 {2 K+ L+ V- u% `/ C* \9 Z: z
#is_inception:要不要用其他的网络# m( U+ V% G+ E1 T$ |
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10, is_inception=False,filename=filename): - P( u+ l; s# _) {& q' w7 N& j since = time.time() " a" v/ W! S& q- H: |* N8 L) O #保存最好的准确率0 S+ i- y$ t' \" e4 ~# [. B
best_acc = 0 . K) k8 M* D; v- F7 E# } """6 a6 @! o2 i4 I4 w0 E$ I
checkpoint = torch.load(filename)6 `3 z. N0 w8 t. ^& p
best_acc = checkpoint['best_acc'] 4 ~* d5 ^ h P3 Y model.load_state_dict(checkpoint['state_dict'])) [. \# Q6 A# [6 k
optimizer.load_state_dict(checkpoint['optimizer'])$ t( ?4 n. R6 O% W$ V; F8 U" o: ^5 z G
model.class_to_idx = checkpoint['mapping']' ]6 J8 K$ R% B- K
""" ( k+ q2 d P: `( v U: f1 \ #指定用GPU还是CPU2 c1 ` Z7 _9 i' d( p# \
model.to(device) 0 W+ h6 W x- w* y+ @8 M; H3 K9 h #下面是为展示做的 $ q5 X3 X- U s" M- S1 W val_acc_history = [] " J8 i: K- q1 ?9 x7 O train_acc_history = [] ( P7 J0 g1 N# I% R8 l8 l: Q. z train_losses = [] 2 h# r% V( w9 O2 n valid_losses = [] & A* D, S. _, @' l( v9 C LRs = [optimizer.param_groups[0]['lr']] : d" V( w/ d8 `3 u( B #最好的一次存下来 1 Z* d9 Q$ f7 {; X: H& u6 k {2 R) D best_model_wts = copy.deepcopy(model.state_dict()) # _; Q+ ^1 }3 Q5 w1 ~" d. j0 {$ S7 m7 b7 ~
for epoch in range(num_epochs): $ P% o' x$ S: O print('Epoch {}/{}'.format(epoch, num_epochs - 1))1 \ `" F- e7 z! A8 |" L# E
print('-' * 10) 6 |, E0 I% A4 q2 H$ o / v- J) d4 J: H. a! q # 训练和验证 ( ]& S. ~6 f1 G- s& \ for phase in ['train', 'valid']: " _& E5 a( r% d s" |: L W X3 y if phase == 'train':& Q8 O( ^$ a) V0 C# }( f
model.train() # 训练' f: e; @8 f$ l$ g
else: 9 k6 B6 u% i6 u; I7 z' Q model.eval() # 验证; b( y; _2 M+ t& K7 d& b% \
8 |* g. k6 t5 N- X# [. ]
running_loss = 0.0 - p7 i' \# H- E" E running_corrects = 0" x5 I, Y$ t9 n: k$ F- ~
# J" X3 |6 t* q- E # 把数据都取个遍 c( Y" g: K) S* r4 t" u: ]/ {
for inputs, labels in dataloaders[phase]:% {. V( x# U6 z8 x* C8 ?8 \1 n
#下面是将inputs,labels传到GPU V+ @! x4 p L) v! p9 c3 w; k, ^
inputs = inputs.to(device)+ d% f2 `4 `; S H( t) k8 d
labels = labels.to(device)5 P! g* {0 N3 @ R
, P7 V; ~2 u9 r/ J/ I # 清零 . `8 Z }8 U( D! y+ I! C optimizer.zero_grad()+ b4 A; B* y) a+ d
# 只有训练的时候计算和更新梯度+ X# o$ G ?1 L( Q
with torch.set_grad_enabled(phase == 'train'):2 ~7 E8 v# y5 J2 ^3 ?, `7 X
#if这面不需要计算,可忽略 - w) x/ S* H9 C7 d6 b; Z3 W if is_inception and phase == 'train':; h: g! v* @6 _
outputs, aux_outputs = model(inputs) 5 o3 ?: L9 d, K4 ^3 O0 p loss1 = criterion(outputs, labels) 7 b4 u$ ~6 z8 u4 q* f4 J( Z loss2 = criterion(aux_outputs, labels) 3 y) V' p+ d2 {7 s* |& b. g; W loss = loss1 + 0.4*loss2! p2 ^: H' F6 |/ I+ ^" ]: D1 A
else:#resnet执行的是这里* r1 |. a- S" n4 E2 Q
outputs = model(inputs)$ s0 f/ t, i; x* `5 A
loss = criterion(outputs, labels)* M% m( K& e+ |; Q7 C; \
' x4 t9 O6 W$ q. h* q4 k( h# m7 G #概率最大的返回preds : }2 ]7 w8 ~6 T6 Z3 Y% ? _, preds = torch.max(outputs, 1) r0 B. y( F7 H* i: s* p8 l; P- H0 i" k/ Z
# 训练阶段更新权重 I# E4 O% E6 r7 w if phase == 'train':8 H% P! W$ p8 Q; D4 \
loss.backward() - s& ` v7 u2 {& i optimizer.step()4 C9 T$ c9 O, K% P q9 c% ~% n. `
5 d w% h+ d C) R) v0 W/ u- j
# 计算损失 4 X/ e( P* A& n0 k: x4 k2 r running_loss += loss.item() * inputs.size(0)4 l) e7 c( s9 @+ x
running_corrects += torch.sum(preds == labels.data)8 o5 R' z* K4 r. z+ G" I
' a. k3 ], F1 H) e& g' i8 ^ #打印操作 / ?# B% Z1 m/ m, L6 L( R$ _9 | epoch_loss = running_loss / len(dataloaders[phase].dataset); Y' E& k, ]2 f0 Z6 Y( i5 q( I
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)( c8 c' [8 L1 x5 [3 A
+ _7 Q% q ]1 o- J
, v! a R9 R: U
time_elapsed = time.time() - since 4 | A" Q5 V1 C5 _! m! T- m N print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))- m# n2 ^& a9 p, _4 u/ J. _
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) b6 M# I# }. V3 }, K
2 F) Z6 i/ G7 B- H% }8 H6 t c, t2 ]$ y6 F y
# 得到最好那次的模型 # V1 @0 u: I# C/ k if phase == 'valid' and epoch_acc > best_acc: # e& }9 D) j) K best_acc = epoch_acc 1 h( V2 H- s j1 c, q #模型保存# a- G* w$ J1 K* k7 ?9 J# W
best_model_wts = copy.deepcopy(model.state_dict()) ! \% W& u& `) Z4 K& P state = { 9 B( p: c. L1 j4 M$ D #tate_dict变量存放训练过程中需要学习的权重和偏执系数 0 f) g3 q, G& X2 f9 A 'state_dict': model.state_dict(), 6 U& J. J) N9 ]6 G, o5 B 'best_acc': best_acc,6 T+ O9 s/ i/ O" e! s& m
'optimizer' : optimizer.state_dict(), 8 @0 W. i5 [1 A" |$ @6 j }( f* X- L# Q9 v, T
torch.save(state, filename) Z0 i' n2 l7 b- m- o8 @8 E if phase == 'valid':0 z8 z. G' e. a5 P# W/ _1 @
val_acc_history.append(epoch_acc) `0 e; ]0 X" d valid_losses.append(epoch_loss) 5 s3 z3 @( y/ Q% Y' d$ v4 q scheduler.step(epoch_loss) , u% |' w* s; }- D% f if phase == 'train':' y! K& Q/ [1 G' T
train_acc_history.append(epoch_acc)& ~# M! V0 ~! ~# ~9 A! _
train_losses.append(epoch_loss)5 l1 G' o7 A$ w& ?
4 L7 b, ]9 W% g. ? Q print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))! ]9 Z( B8 V. U
LRs.append(optimizer.param_groups[0]['lr']) 3 _5 I8 G: e9 D- q) J& F print(): m( R e" h4 }0 i n e [
/ p: s( m7 J; K" g0 M time_elapsed = time.time() - since: U2 K/ h/ u) F' p4 z, _
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 2 Q9 E7 M" T) F) p% Q5 _7 f8 c) S print('Best val Acc: {:4f}'.format(best_acc)) / b7 u4 x" \$ i% n9 m Y 2 `6 |3 T4 \& e4 G2 w8 } # 保存训练完后用最好的一次当做模型最终的结果2 k( _# c. d; Z
model.load_state_dict(best_model_wts)2 H- ~# ]& S1 f
return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs 5 }& g! M& T, k. ` C+ b# Y6 T* v- o p8 G3 f# E, v& b
6 m4 H( G( `! ?8 G1% }' ~! W9 v% f# ]- I# _. E' r
2 : ?3 b; u& {8 X* r% b3 X3! T+ U' k: K! j, v7 K5 s
4 + o: a9 `! }1 _% g: e5 ! {, ?( G! A2 t e! G6 5 U' g" @( ?1 _0 V* H7 g. p/ {1 p7& S0 M/ \5 o7 y, p5 ~
8 * _- u' K% G9 Z) ^* u2 p, o& r9 8 H( g/ t6 q; @- K0 z- V10* w2 L. n' j, x8 Q' L7 Z" S$ e
11 8 g5 ~7 ?9 V! u* L7 W$ z7 [12 # q5 u& K' {( D }* r13 $ _7 ?2 g# w1 U/ L14 . d! n2 y8 _0 v& A }7 m) O, D9 B7 M! m15/ j* f& V5 h1 n3 @/ g3 m
16 3 ^9 h5 x! f2 U% y/ D7 { r1 }5 V17; A) w' {% H$ p7 D1 u* Q% x8 n
18 % n" b1 e+ d5 v1 e/ u19: b* m% [/ X$ B1 E8 h
20 4 X& e1 D- P9 k# k216 D/ q7 e5 N$ |. z& Q. N* g3 h" f7 W
221 s2 ]. }# V! ]: l; @ F
23% R- x1 K5 @4 _: b8 Z
244 M& K* g- R! x2 g! a7 @) ?
252 t* k4 y/ }2 G& ~
26 , P. C+ V, ?. A0 U5 n4 P6 T27 p8 A9 s$ C" M28 ) L! Z3 [* W# X; [$ P292 l* `5 c z3 j
30 6 @+ Z& p5 E- f0 z& \/ y5 @1 L$ B$ R31, s1 p2 M- n) I3 @$ ?
32 6 p' A, X1 B0 p0 Y9 A! q33$ r+ C8 N* P$ m4 F
34: |7 E( H9 `" e: |1 S
35 & Z/ C+ L& s6 {5 X3 e+ R3 L! {4 q36 6 R% \+ J! J/ O+ ^3 y( \4 l37- A& f4 _' |: N
38 & C$ C$ T) J* c- |396 i Z( e5 v) V; g) r1 p S! \
40 - s' u3 o F& K: y: k! @. |9 J( D9 r41 ]7 X2 y& B( c7 o: B( A# f
42, R! t. C4 {; _$ E8 i) Z& E
43' u" y4 N5 j( c& W9 f
44% |1 }# w8 f! D6 u$ d5 l8 S) y) h& C
45 1 ]/ s; y" i, X. u5 |46 # r6 l% d6 g: ^& ?# r477 `: ~3 V# G5 w, T. r
48 + x8 r6 I4 t4 Z2 a+ D( O: k49 9 q6 b# a }) w+ I& Y; M. k50 6 j" @9 ^- O! x# N$ R2 D! F# ~# X51, e, u7 U1 D4 H- H
52 $ P2 s# `/ L& Q+ A8 M; ^/ |4 d53 : @' ^. n& S& U3 C& [541 [1 O: q& ~ T+ u9 r7 S
55# W% w+ O1 o6 a# `2 Y0 V, K. s
56 ( X; B! ^2 u" N/ X+ X' N- G579 K. D, y! z% J, L/ _
58 , a! b0 `4 s7 n4 M( [593 r; F+ y3 ~# }( ?$ p
60: z1 k% ~2 ?; k/ G6 b
61 3 c8 c" g8 [( g k& f' u1 F62 $ |/ F; } _, o7 }( j) h# B63. z( [: C u P" x
644 B4 c' }" I- k5 Z+ G4 J
65 6 X( C( u; P/ z66' T6 {& L+ ~# Z' d$ D( [
67$ J: e; M1 Z; b' R9 z/ S
68 & ]) g. \7 X5 Q9 v2 L69 $ Q$ @* w b+ S% z' |- A701 h0 l, N: s7 a2 x' |3 z8 B
716 Y j7 K o/ l( y
72 ( h& J2 h) y$ q( U; _73 , ?' l# D' O' @2 q5 s! o1 _9 m74 c( L- C3 o; W
75) \* H2 @# n' f3 o/ }/ w
767 Z4 d5 x) k2 L. Y" L3 R0 F2 ^
77 - G7 T, T# m- e/ R5 ~ F78 % q) ~0 T3 G8 Z2 W79 6 A& |) ~' Y, z1 m/ }2 L- u* c0 r) U80 + O4 W+ S. }! n8 ?$ W, [3 L. J+ L81/ [- E2 G5 V( m6 D d* v8 l
823 t, ^! E; H/ Z
83" _, f6 h C9 o
84 _0 |3 D- G+ G0 r% R6 F6 N5 q1 M$ {85 ) b! _, U/ E' e860 ^) U& C, P, K
87 + x& c$ X$ p6 q- J' S# a; Q2 V2 I88/ C6 g' H3 J' y- b( p, W
892 R- h' D" i8 b+ H7 }& x
90 . [# ]* Q' |, S$ }. G8 ?91 6 b$ x ^5 @) r. l92, r7 d$ Y( t2 P6 o
93# V5 f( t0 b3 [' m( B5 K& s
941 F# B% i' m C1 n' Y. n
95 7 o6 u$ @ j! {9 O/ m960 @# w+ C6 ~9 i" o$ @* u- s. M
97 ( f4 o/ I n; f: J/ U98 7 W( K& F/ d | r4 D99 ; J/ H; P2 R1 s& e2 q100: a& h$ z( x! y- a0 D) Y
1010 s/ W( J0 H* D/ {
102/ U5 f& W3 v% |! J0 O3 m+ N! Z' W
1034 x8 i: x1 R% _6 g- {0 N T
104 2 r/ L+ j& L# S/ C2 U105 : j2 r0 G/ b$ K" ^0 U1064 W, B" z5 S# L0 P* K: q
107 $ k/ [- T# u% k4 p: \! H2 j1 u1 P8 k108 : r: ~+ e E% S- F$ ~# s3 O109 1 U0 m6 E& V) ~9 t" N G* V- J3 v5 [1105 [0 W# }6 o3 o) E0 V
111 / d$ C+ ]! F, m1 y8 a1 R, S* a112$ i8 i% D6 W* r! J! S2 E
7.2 开始训练模型 ( r) Z8 q# N# v, y: c我这里只训练了4轮(因为训练真的太长了),大家自己玩的时候可以调大训练轮次! y( Z) I! ]2 ^: r' k
( S( c! U8 N4 i. A#若太慢,把epoch调低,迭代50次可能好些: H& |3 k& C) Q/ w
#训练时,损失是否下降,准确是否有上升;验证与训练差距大吗?若差距大,就是过拟合* b' t F% B, ?) o, C/ z
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=5, is_inception=(model_name=="inception"))" k% w x; K8 n; M, T! K$ R
* z( ?4 v+ [. B/ J) {( e& N% C2 k
1 " A1 L. t' P: i" E- L6 R2 ' X/ m; z, P$ |4 e) G/ v) Q; n7 E3 2 c# J9 T6 g# a5 k4' `. J6 m& d5 k3 A' S3 d
Epoch 0/4) l# I2 Q9 C% t/ C, x; K/ o
---------- 9 D+ ^# P% q; ?2 ?, |. C3 o1 V9 [Time elapsed 29m 41s 0 Z. ^2 }( N- B7 e3 ]# ftrain Loss: 10.4774 Acc: 0.3147, V: n5 Z1 o2 i: r
Time elapsed 32m 54s ; h) E4 l8 R/ l& t4 \5 U: ^valid Loss: 8.2902 Acc: 0.4719* C! D" J( l: M) k h% U+ q& z
Optimizer learning rate : 0.00100006 @- m' A0 r( c, ~5 C, z* E; c, z
8 ~0 l9 X/ i/ A- c& T
Epoch 1/4 p5 b. e. X& c5 W+ v$ x- D
----------+ ]: [, F6 P' w+ ]# P
Time elapsed 60m 11s ; V- i c9 W. d4 T1 ftrain Loss: 2.3126 Acc: 0.70535 G4 {$ h" s" `/ j- _2 E' ]( b
Time elapsed 63m 16s+ d; ~8 D: R2 Q" v1 S; b) G7 N
valid Loss: 3.2325 Acc: 0.6626 / Q, Q7 V2 n7 h; r* `, U& L3 HOptimizer learning rate : 0.01000002 Z$ z* \/ N- s. R
( o; R5 J% `5 A4 p) f: z
Epoch 2/4- r+ z( s G; B! N+ z
---------- ( b4 Y8 X3 u+ M5 v; yTime elapsed 90m 58s : P; v; n# L! l; E* ytrain Loss: 9.9720 Acc: 0.4734 ; |3 r% g G& n% J1 aTime elapsed 94m 4s 7 U+ Q! D. g. U- A, U& r' gvalid Loss: 14.0426 Acc: 0.4413 + ^/ c$ U* Q2 t" c; P2 O" f/ HOptimizer learning rate : 0.0001000 - L1 G1 P8 Z. ]; j / C$ K( Z5 R" BEpoch 3/4) f2 @/ d- n' q( N+ R2 N
---------- $ R" [) N6 e# N( R% g, LTime elapsed 132m 49s 1 m& a8 O! N% D6 v( y }- ztrain Loss: 5.4290 Acc: 0.65481 Q0 G1 `- }* D4 W+ R
Time elapsed 138m 49s , N4 J: O1 {# W6 G& nvalid Loss: 6.4208 Acc: 0.6027+ u- ]! g: y: e& r- |+ E
Optimizer learning rate : 0.0100000$ f: w" t) u, Z3 m0 o, T% Y
# U, `) L" j9 b
Epoch 4/4 + x$ A6 S6 ?* N, ]( w/ h/ x' i----------& |4 I6 X; Q3 a
Time elapsed 195m 56s : }: t9 n$ n4 T0 Z/ R8 htrain Loss: 8.8911 Acc: 0.5519; P0 T9 z4 c Q) F/ K
Time elapsed 199m 16s/ z' [$ \+ i& u
valid Loss: 13.2221 Acc: 0.4914 9 b, T" ]- U2 V( P0 E# B5 s5 GOptimizer learning rate : 0.00100002 u2 J0 _/ k8 _3 q. q
! K- Z3 B5 Q0 k5 F6 M- V' JTraining complete in 199m 16s( ^9 H; r9 Z; m: |" a
Best val Acc: 0.662592$ E+ i w9 o7 Y
" W' H1 Y" J @8 N9 z0 |
1 4 X v" ]2 t) f K2& g* U- l: `4 D; F! J! p2 ~, x
3( Q! u3 ^1 i- S/ ^
4 6 q9 E% u' c( _1 q, r5 1 @) x% i9 a% N$ \6 ) N0 K$ V1 m( ^1 ]$ @; W1 F7 / _- r% X( p: g; K' K8 $ a; G) e, P% j' O- l2 F8 P% Q, h1 V9 ( ^) t v, O. t+ b/ X' v; E6 T10( O" q: S2 c* a* d) {5 ]# y
115 G8 q: z! {% `* Q
12 1 J# }6 Y3 q( B( f& i; I$ I1 ^136 V9 {" g" X2 {7 {: N
14 , q; O, K4 u$ i* U% B+ `4 E150 z% s$ m" M* P& f, V2 N
16 / j( h) Y+ V6 S! f- j# q. ]17 }6 v2 d m% a/ w
18# n$ B8 M. p) Z! X4 B" A1 I
196 J) s: z( `! Q2 d
202 z9 }* [% i- y% i( a$ r
219 l3 g- C3 L& r
22 " V/ J6 z+ B( I% _* C23 2 g7 I7 b! C$ A- K6 k3 \" f- T24/ c9 H/ K$ S) q/ m1 R; Y Y
25' D/ y* D" {7 v9 m
26: C3 N8 C& K2 S) T+ _. ~1 J9 q5 h2 U
27 " B6 C8 r/ s4 Z: n6 N% k6 X28 ' A+ F& T% h1 I, m0 c29 + [5 U- S! e y& @30( G6 M: z# `- m6 r/ T$ C
31; X# C0 {# F- M, L, [) j
32 ( O0 n1 _9 ~ T4 J331 z6 S# _! P" U
34 6 W% }+ G8 e: f/ z355 A; D8 i6 {7 i% A. o
36 . Q5 x; M8 W3 O* _% N! N37 # `* j6 m, e. U' B3 p- T38 . w4 r9 V3 J8 g39 ( T5 a1 w6 ^/ O7 X* u: h40 ( \- j* i! A, n# W0 M. W41 2 Z! j2 a) s3 K" D m1 u; |) e' I42# F4 D& ?- i: W# v$ e2 L
7.3 训练所有层& {, }/ }$ }% D: e
# 将全部网络解锁进行训练 6 z" U. S& @* R6 U9 U4 Zfor param in model_ft.parameters(): % [8 R+ Q, [, P* X8 w7 \ param.requires_grad = True: s# \$ O. J2 V- V
G" H% M8 U$ _$ M! w" m* d
# 再继续训练所有的参数,学习率调小一点\ 4 y! {5 T* b5 U4 h9 C z+ a' t" M5 D6 Ooptimizer = optim.Adam(params_to_update, lr = 1e-4)* j2 Q% p0 D! T
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size = 7, gamma = 0.1)2 G) r. W3 K" l: m8 y$ ?
% ]2 Y* k' Q6 b) i2 X# 损失函数4 O1 l, r( d; K3 K; y& h
criterion = nn.NLLLoss() 5 d8 V0 [: F, z1 u5 Q: `, E5 w$ {1 Y( Q
2 9 n* F( M8 f. X4 c; P" S( `3- Z8 D' {: Z/ t
4( O9 _4 _, D. v; E) X0 k* c5 q
59 C+ ~4 J! h$ K; E2 N: ~; {
6 6 E6 M- \7 ^; t; v H6 k+ x% n7 8 C% S; G. A8 _8 % c# F; g. ?4 A; D w/ ?: E9) Y7 a! V7 h/ O1 z. [
10+ W- F2 A# E8 H' T- `/ k M+ F
# 加载保存的参数( k5 }1 @, }# w- s$ [
# 并在原有的模型基础上继续训练 * |5 Z1 i7 ?4 _3 c$ @5 K$ O# 下面保存的是刚刚训练效果较好的路径' W* J5 e1 V$ D+ q
checkpoint = torch.load(filename)$ O' U" V; w8 c$ ~- W# j
best_acc = checkpoint['best_acc'] ' ~) {% {6 r, ?7 C. z9 Nmodel_ft.load_state_dict(checkpoint['state_dict'])$ e6 ^5 e9 q8 |& U* M- e
optimizer.load_state_dict(checkpoint['optimizer']) ; ^+ B* q K3 J1 L/ d/ @) q9 {# W1 7 m! B9 C, |+ r7 t8 t$ s' }+ ^23 ^& H) t5 Z$ l5 M0 j
3 6 J/ A3 L0 f: y; D. G/ G41 ~( F1 p' g, O
5 0 ]3 u6 i7 p0 R68 z- I9 x$ G) D$ S% N
7# b& [5 O5 O7 D. S+ { Z
开始训练& }; f- h, c- G) C1 ~. Z
注:这里训练时长会变得别慢:我的显卡是1660ti,仅供各位参考! q4 n; y: N2 v; ?' t) _
d9 l" W @1 y: y# A
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer, num_epochs=2, is_inception=(model_name=="inception")) 6 M3 z5 o4 H }* H J( I/ N# L1 * a5 {: [3 e2 `# ?( u0 GEpoch 0/19 i+ Z6 W" \$ A' e& T+ Q! {
---------- l! q i1 j: T0 W3 T% W
Time elapsed 35m 22s ; f( |5 `+ v# a( ~( Vtrain Loss: 1.7636 Acc: 0.7346 ! X+ W: @- a/ v: w% s8 mTime elapsed 38m 42s 9 A! X/ Z3 J7 i2 D9 avalid Loss: 3.6377 Acc: 0.6455 " a( X. n& ]' T' s9 [Optimizer learning rate : 0.0010000 - r! l: S" s, j) K* T" C0 c) U8 p. u: M' O+ }! W N$ C
Epoch 1/1 4 p: y2 w) K/ y6 Z6 c+ Q2 r U, O---------- # q, }1 I. } A6 ^& J* k pTime elapsed 82m 59s9 }: s1 u! a9 I
train Loss: 1.7543 Acc: 0.7340$ w0 ?$ m. c* e% X8 x, F0 S
Time elapsed 86m 11s) O# i. ?' |6 t( e! c0 L9 P
valid Loss: 3.8275 Acc: 0.6137 x7 w( _1 m& F) F1 `Optimizer learning rate : 0.0010000 * T! r# [( b. {" _* d w# }5 ~2 e/ A9 e8 A% v& n" ]Training complete in 86m 11s + \4 c9 }5 P" t PBest val Acc: 0.645477' d @4 p7 ~$ z, `2 i& Z! ^( Q' A
8 L' l( C, J8 I/ @2 r0 S
11 p* V, k1 J* |% R/ P
2 9 v6 ?5 A- z: w! K3+ P+ m2 J& g, q# \) U& {; f2 T Z
4 ! D6 J0 t$ k' j4 s* A5' o/ Y, ]8 C8 P7 {
6* }3 ~/ S9 Q0 n+ o
7 2 K( X B; Q& B4 c5 U- E8 % O% O7 N O5 D+ o94 W9 m' k+ d7 i, w
10/ `& c; N8 i4 N" z+ P* ~$ n3 d
11- F6 G5 E! o) v2 J5 ]
12 " J% W& C' X( L# A7 h9 G13 ) i6 t( q, O& P) L5 { |4 E+ m6 [14 3 Z: S) v6 n/ h, C, m& s15 " m {# u7 L* o) U16 6 t7 h; e: I6 p* \8 M; K( c6 @2 S! G2 J17 I9 D. y( |) K5 O: C: E
18% C, y- V& A z" d4 z
8. 加载已经训练的模型 3 z! m: a6 M3 `% Z相当于做一次简单的前向传播(逻辑推理),不用更新参数 3 W/ t+ }3 Z1 x( I o# K( c2 N* c R/ G+ s5 h
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained=True) " H6 }; L, _: j6 n+ i W% l" {; L 0 c% c. ^( y& `9 [+ g; |* `- Q# GPU 模式 7 ^' P- G$ p" R1 `- V0 imodel_ft = model_ft.to(device) # 扔到GPU中2 N" V. f* S- [ H+ ]