$ F* l! T$ Y% C8 q' i& K将训练好的模型拿过来,并pre_train = True 得到他人的权重参数6 m3 L! M1 b. N
可以自己指定一下要不要把某些层给冻住,要冻住的可以指定(将梯度更新改为False) 6 }8 U! G. O/ [, r无论是分类任务还是回归任务,还是将最后的FC层改为相应的参数 4 G1 G% @8 H6 O' O4 ~官方文档链接4 y& F, U8 X$ }
https://pytorch.org/vision/stable/models.html( T- w- e2 H8 y1 h# S& q
2 }3 V1 n6 P6 P, _+ i1 i
# 将他人的模型加载进来& U, _" n) V/ T4 Z9 P/ @
def initialize_model(model_name, num_classes, feature_extract, use_pretrained = True): 4 m8 @" U' M# z9 h # 选择适合的模型,不同的模型初始化参数不同8 {, [ A1 m- K4 `" `8 K
model_ft = None 6 B% P4 R( O. r" [/ F input_size = 0 * M9 h. v; c* K- Y {, c% m, n' }) k3 v4 M L; C' `6 h+ b
if model_name == "resnet": # k: Z3 w* C+ r" ^( I6 w """ ) o) C3 J5 c) M Resnet152( G9 X8 u+ q+ U/ B5 c
"""* I; c# c% N+ R' [( `. R
3 @- R% F, e+ J, a+ o* y4 J
# 1. 加载与训练网络* v$ K8 A, y8 {6 W. J
model_ft = models.resnet152(pretrained = use_pretrained)$ q0 p S! T& J9 |! }- X
# 2. 是否将提取特征的模块冻住,只训练FC层 7 b3 C( i1 Q* N: A9 Q+ F# _! R set_parameter_requires_grad(model_ft, feature_extract)8 I! x% s7 d+ [( T2 a4 l
# 3. 获得全连接层输入特征: u _2 z2 |( @: J2 x
num_frts = model_ft.fc.in_features. q; y& c5 b# ]$ E
# 4. 重新加载全连接层,设置输出102 . ^( V! j- {& c$ ^5 g0 P8 y model_ft.fc = nn.Sequential(nn.Linear(num_frts, 102), 4 Y( U% z6 R" j" M, U nn.LogSoftmax(dim = 1)) # 默认dim = 0(对列运算),我们将其改为对行运算,且元素和为1* ~ X* }- Y) j2 v7 z; h
input_size = 224 7 Z$ `; [7 m+ E0 `- X' b# @ j$ \' F2 L7 |* Q
elif model_name == "alexnet":2 l: [- D' a) C$ _9 S
"""% i4 }$ c7 n7 @. A; l8 w% G
Alexnet! E5 E- z4 K7 }
""" " H2 ^3 h& s9 Z9 I model_ft = models.alexnet(pretrained = use_pretrained)' I1 D3 W$ f1 R# [ m5 F" E
set_parameter_requires_grad(model_ft, feature_extract)+ w6 k2 w$ B% g$ Y' O6 X3 R, I
. i9 B" m F+ h- V( v( k # 将最后一个特征输出替换 序号为【6】的分类器+ I! W" J7 r% q+ J" Q0 d
num_frts = model_ft.classifier[6].in_features # 获得FC层输入- n4 j4 P# T7 ^- R3 O
model_ft.classifier[6] = nn.Linear(num_frts, num_classes) , m; O( M; H8 H" v; m' m3 f9 {7 S) [ input_size = 224 m8 q3 B# J- P% h+ e9 K' Q9 Z2 S4 v7 u8 d+ f7 q7 L% G+ z
elif model_name == "vgg": 8 U( v/ a4 x8 ?5 `' e0 Y" T) x """ " W; y7 c. w* p$ `0 x; ^& G7 H% z VGG11_bn 8 R; B' J0 r% V3 ^ |& g """- x+ ]2 s P+ [
model_ft = models.vgg16(pretrained = use_pretrained) K' I- ]; a: ^$ B0 F8 r
set_parameter_requires_grad(model_ft, feature_extract); Q4 i6 z! I1 G9 z9 i
num_frts = model_ft.classifier[6].in_features2 W W1 i" U+ m* E
model_ft.classifier[6] = nn.Linear(num_frts, num_classes) ' M5 q ~3 v. ]( B: y5 v$ B input_size = 224' }! {% V0 n6 V2 ^
- w2 U Z* k6 C4 H elif model_name == "squeezenet": / ~% z3 Y( a: M4 S9 A """4 e7 n2 O* m' U# c1 @ U
Squeezenet! M/ Y% X& ~5 j* S: p
""" W" X3 w/ B. c4 Q% K/ C( @- E model_ft = models.squeezenet1_0(pretrained = use_pretrained) 7 q4 t% p; W! p, k set_parameter_requires_grad(model_ft, feature_extract)" W8 b% h- Y& q! @
model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size = (1, 1), stride = (1, 1)) ) z4 y+ s: j% o2 q5 j o model_ft.num_classes = num_classes: c$ e/ Y; v* b& n0 }
input_size = 224 + J ~: y; G" N2 K* k6 t0 r) Y1 z+ B; r0 U/ `/ O; T; T7 f, i) y1 n
elif model_name == "densenet": 8 o/ N$ R8 v$ M """ ; n. b. G( q/ W* S3 _- i1 |$ ` Densenet * V" ]! r5 D% B `$ t& s, y """1 J! U O4 {8 r2 b n3 T y3 \
model_ft = models.desenet121(pretrained = use_pretrained) 1 y, }) n9 D# ~ set_parameter_requires_grad(model_ft, feature_extract) $ C( {! O; q5 H num_frts = model_ft.classifier.in_features * g. _ d& @& I model_ft.classifier = nn.Linear(num_frts, num_classes) ) F# d& L: f, i4 x input_size = 224 % g9 r/ Q( @4 a0 v. \) ?+ o [+ a
elif model_name == "inception":% r# p& F; i$ X) P
""" 1 f8 |0 w9 j# |/ S Inception V3+ N A+ ?5 w/ o, j% H
""" 2 _' c# h: G F7 E4 ^( ? model_ft = models.inception_V(pretrained = use_pretrained)8 g U- @0 n/ r y, [! m4 A/ w
set_parameter_requires_grad(model_ft, feature_extract) @! m' x& F& R* ]- i' _: {& r* c* c7 z z; e
num_frts = model_ft.AuxLogits.fc.in_features0 @+ B; o& L% H9 v- i/ @
model_ft.AuxLogits.fc = nn.Linear(num_frts, num_classes)' V; {: @$ j2 M# t& s* Q
9 X3 ?7 {# O1 I% Q num_frts = model_ft.fc.in_features, }) c; O. }/ z" U9 O/ x
model_ft.fc = nn.Linear(num_frts, num_classes) 5 P8 J- h! u) ]3 g% o i input_size = 299 2 `/ s5 l+ m/ C r8 m& d" D/ q1 F4 N4 m5 |& g
else: 3 L# ^2 I' {4 @& L7 a9 q+ ^ print("Invalid model name, exiting...") 7 u* j: Z. g4 q6 U. I# ^9 i exit() ( P( b7 ]4 S" T% v0 `) _ o1 L- D8 c/ N0 x return model_ft, input_size ! p) h# C* c9 t7 ~1 F1 ^ 4 I, \+ K+ k/ I1 C6 ^0 |9 s: h1 6 ?5 M$ p/ b5 w1 m2 n# _2 ' d+ t8 r6 T. [: [; C32 W1 ~& e8 E+ |' k' j
4 ; Q- l1 ^7 C$ U6 @1 I$ W8 H5 * Z+ O- W& D& t7 m+ E) G7 f9 W6 7 h x0 N7 D" o8 w- H3 o/ P7 / {) x4 E# e2 V2 {8' X2 |% U! n# ]! }& q4 R W' Q
9 ! t6 l. Z- ?6 Q. I) U9 K* ?10 8 ^! e8 t* S! K; g11 z' q7 [* V& o/ v$ O
12 w6 `- h% l h% m/ \' D13 6 B/ u( K+ X& F" U, ]% B' a14 : M4 |0 f! z: D15% x' M9 T- g( l
16 ! F8 h+ B% B5 L9 E/ U0 b. u4 k17 % T, D8 j s: r: L) ^+ a- W$ z18 + e* B" |- S7 z4 I) b8 b1 n19 6 ?9 {9 o: Q/ u" V' M9 |20 / d3 C+ ?% S) h) b! i% ?) }21 4 f. I5 M$ | x, k4 c22 G1 ~) o5 b& p9 V
238 E" j c" w# h, X8 ^2 a" x' a" ^
24$ J( k% P$ k9 |) t y5 ]: Q0 ]. N
25, ]9 t+ }" ^; d: }! r+ L9 E
26) r% J W, \7 y+ T6 K
27 : E3 n' ?# M3 B; f0 c* K28 - i2 h7 L1 L& E, P! Z/ Q29- ^6 h3 ]0 z$ Y5 \* j# I
304 }9 J8 Q2 ]: B- r* n
31+ B' C4 f* ~4 b5 S% @
32 . H/ W7 _: S x/ _+ a33 9 ~; y6 q% ^2 J, x1 C O* r34+ ~- |$ Y6 d" M- Z+ ^. L2 ^) G
35 # d+ p+ Z% P+ `; R369 M# |6 U9 A6 Y
37 % O* y- A( T- E4 M38) R. i. l5 a; h
399 Q+ ] w2 q) j
40, d/ N4 Z' c6 n$ p" l7 a
41: a6 Z: X0 d6 ^; h/ _5 ]
42 ' m& A" O+ p/ [+ I43 0 `/ |' \# J3 k* K44 . T! P2 ?+ A6 [7 \" Z' s45 & d1 q1 {& {) D3 Y46" N" V: b: x! g9 u
47 0 L6 a# g6 s' @- C' |/ [48 5 e5 T: a% t( e7 x2 w, r49 / F5 ?7 C9 h: f+ d502 N+ |% A+ u+ |; p
51 8 k s3 S" v' g2 V" K+ V52$ ^6 X1 ?+ Q% u9 M( ]9 t' Q, A+ s
53 % i- B, _5 m3 l" `& ] C54 8 r6 h1 _$ c+ i554 x, A$ b' F7 U. b* l' A
56 4 f' l1 Q9 s! S3 q5 K _: c57 2 I4 l' M. L0 e4 e" d& s5 K. _& d1 O+ o58 + {/ @# M0 T: n3 ^: R0 e59& }2 o: n: B8 z% X8 j$ i% g; Z
60 ! a% `" v) \( C* {+ j/ V: f2 Q b& b61( a% ^2 a$ v4 s; B+ j. q
62* ^. X, m. s0 O7 q. D& v0 z
63 % x( I% y' M6 ~8 H4 a; M4 u2 `64 ; l% n- s* t" w. o/ ?# k# r; E65 + J( y; I: w+ W* D. _66 Z7 k1 d" m$ F8 ?; N67& D: D& t& u' d5 ]) N
68 F- `0 `4 y( \6 S* F3 O
69* |! j5 _+ E( C8 w) M' v
70: ^8 _) _7 c: D* h4 ^0 o8 c+ }! q
71 9 L. X4 Z T( }) Z) _$ ?/ \72 ]/ \7 _6 X" A% |6 T# \73 % F+ l& r3 X& \% ~; W74- p* M& p5 j3 S* r! }- f
75 2 ?3 l1 [! j5 j% C& g- `76 , ^3 E; ?$ l' N/ [& P) |77 8 C) o- c' I7 v78 A; u; W) A1 u2 y79 2 T- k2 M L7 F* D8 D80* p' y7 S8 P% I- d
81( M k; r! j% w6 A
829 h8 D" l' c& E4 S: P u
83 7 z. n) r1 \6 C' P7 z7. 设置需要训练的参数 / a+ O) o! h9 p# 设置模型名字、输出分类数 ; r R; z( s7 b; {$ S: G1 ~model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained = True)( S, a* g9 y" k, V8 a
& _- W0 j8 D$ E# v& h: c
# GPU 计算- j+ t, c1 [1 G1 Z; z# v
model_ft = model_ft.to(device) / I$ T' W; a: g: z# i) N 4 `$ }4 ?8 r5 b4 f! h- p# 模型保存, checkpoints 保存是已经训练好的模型,以后使用可以直接读取 0 ~9 I' p8 a) n: A7 ]$ a, Lfilename = 'checkpoint.pth' ( V9 X2 u J& \6 V! _# R0 s $ R3 {. l v6 }$ U9 u# Z! c0 c. i# 是否训练所有层# V- s+ w/ A( o' X
params_to_update = model_ft.parameters()5 K+ A/ S4 p8 n
# 打印出需要训练的层: b+ N( l' ^& `: s) L& J
print("Params to learn:")$ u3 M0 e: h1 ?, E. Q
if feature_extract:# W/ [' _+ v- F5 z8 ]! a
params_to_update = [] & J" }, e% t m4 E- c for name, param in model_ft.named_parameters(): " H/ T) I' K, p) Z& ?1 h3 \# Z if param.requires_grad == True:# U' q& G9 }0 p9 W- _. s
params_to_update.append(param) / x) ?6 T A: L' @# w* ~, K print("\t", name) 4 z. p2 C# q1 c$ J2 P2 H4 R: Ielse:2 n9 o8 l! N0 F8 o1 H
for name, param in model_ft.named_parameters():! c: c5 ?8 X4 {0 B2 Z9 z, ]
if param.requires_grad ==True: . ~$ T2 F( h( e C& V. M' | print("\t", name) / B- r! L3 Q- {3 T V# v8 u % E( ?! r' p( Q3 Q. n. |5 e- |11 Z, j; I3 l$ i" h8 V& T# e
26 ?" J, n; V: i }, M
3 ~" T1 c, N. A. i4 1 Q- g( O% O* q2 q- r/ k5 & R8 E5 j/ q. K/ c f5 f. U4 \+ N6; O9 }3 }9 }, l# X
7/ A. D; m2 i/ l
83 Y% z6 w4 F! u" O0 I6 b- a
9* ]2 c& n& [" O) h2 S# n
10 0 @4 J' f2 n) a6 w7 E11 ! q9 i% K& q7 u& W$ ~+ J12 5 Q- M0 e2 }* R0 V6 @& j6 L& c s13; }- q! S5 N5 K; M
14 3 d5 l2 O" H, a15" B; o: ]3 Z6 H4 ?, O2 d- A
16- V6 _$ _5 i; Y* a
17 ( V5 X! H8 v. l7 d, S18 9 N( g, o1 L: u7 G5 F+ X: _4 R19 6 I, h( K! C, p1 S9 j: Z8 q20% i& ]6 I! h- H7 t
21+ m' s& b# D( `) h
22 . J$ F& I s/ v1 R! V' C23 4 j H7 x2 G! d; ?8 ?8 O WParams to learn: 9 ?, W& Z! f5 y5 b" Q4 L1 ]4 i' | fc.0.weight# t" t7 `# I7 L K; B# S
fc.0.bias- x* ?) V& ^. E3 O
1 - O# z; u Z. N" U7 [3 I4 f1 ~2 ' k& j# r& Y8 b7 o0 d3 $ p5 H! ]3 [ u! a, M7. 训练与预测* h) _9 ?& T% r
7.1 优化器设置6 k6 v. q T. m7 X
# 优化器设置 ' G, q& r4 z8 L1 m" T. Boptimizer_ft = optim.Adam(params_to_update, lr = 1e-2) 8 ]7 d _. x( f. ^6 O+ E& G: A# 学习率衰减策略 1 u6 k" e# P: }. o2 w7 jscheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) * m( G! \+ H$ j* l5 D) w7 z7 Y# 学习率每7个epoch衰减为原来的1/10$ o e. F. |2 X, D! N/ l: D
# 最后一层使用LogSoftmax(), 故不能使用nn.CrossEntropyLoss()来计算 ) k6 T7 ]4 l# I/ A+ X. b+ |% [4 N4 K! \2 c
criterion = nn.NLLLoss()* u( }* n4 L0 P0 v* \
1 % X H! o& @, b2 g ]! i4 \% f2 , u! J# ^; o6 c7 ^4 L8 F3 : f) D9 |, A& V5 y$ \9 E1 i. e$ b4 7 y( o: E3 z' F; e+ E" ~51 B1 ^5 ^% T9 T$ X6 ?# K6 J
6 " I3 R/ M$ I% V9 K+ c7- V- x9 V1 I4 @9 ^# X; T! i
8 - b! ^/ Y0 u# d+ u7 d# 定义训练函数 . ?1 r5 t3 w% d9 t& a#is_inception:要不要用其他的网络* E Y' g+ K' `
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10, is_inception=False,filename=filename): - C/ R) Q( q, U0 E3 T- N: m2 H, w" j since = time.time() 6 [# K2 k# n6 A$ n Z. b #保存最好的准确率 7 @) d3 G" v9 }5 e best_acc = 0) ^7 B$ ]3 k5 J) y5 Y D7 a9 k
""" ( A8 v* u) k2 j7 F checkpoint = torch.load(filename) 9 q/ |: p. G/ w5 A best_acc = checkpoint['best_acc']# i6 j8 R+ C1 r: W5 j
model.load_state_dict(checkpoint['state_dict']) : u1 ?5 _- _0 Z4 m& r optimizer.load_state_dict(checkpoint['optimizer'])3 b2 k$ ^5 r& u
model.class_to_idx = checkpoint['mapping']) m0 u" x2 e' ?# }; e y
"""! z) b: r9 j6 G- L
#指定用GPU还是CPU; y' v% k' L4 v
model.to(device) 3 F/ H4 O* B' w #下面是为展示做的 + ~( @% l3 A0 x/ T! X9 w& R val_acc_history = [] + x# S8 G% S' z( N! v+ e train_acc_history = []8 i) b* G! @+ d$ j
train_losses = []5 k+ T0 C+ ~1 x! M: O/ ^
valid_losses = [] % K; f" e9 T& k5 U1 K9 t# W: i LRs = [optimizer.param_groups[0]['lr']]+ q7 l# f/ y' u2 H. ]7 W
#最好的一次存下来8 `& R3 Q6 X, ?2 F9 Q* l! a8 e
best_model_wts = copy.deepcopy(model.state_dict()) . a) l3 o! V( m. \ 5 s d% {4 ^! Q( Z) A+ b% B' E for epoch in range(num_epochs): ) N, `: ]7 @6 s print('Epoch {}/{}'.format(epoch, num_epochs - 1)), ?8 C# |3 h! ?: z6 T. ~+ V2 v
print('-' * 10) * p! P [3 C2 g3 k, ^0 T5 ?2 U0 [+ M# g
# 训练和验证 & b& U h" A% R for phase in ['train', 'valid']:# i& v4 e( N; S( A! @3 F0 N
if phase == 'train': 5 V8 l" a$ `- f1 Z, u* ~- J model.train() # 训练 " q) l; j$ q+ U else: " v8 f& d2 y# B8 w# z. t2 f model.eval() # 验证 0 m; n' m9 t1 J5 x l* y Q ) l" S* ^4 {) M" w" M running_loss = 0.01 W* \( @4 n* Q0 Y
running_corrects = 0 $ F+ h: N. x9 w 6 B& c1 `6 R$ d& X1 ~$ z y # 把数据都取个遍$ t' T3 r7 e) F5 q
for inputs, labels in dataloaders[phase]: * `; @! ]) l0 j, F #下面是将inputs,labels传到GPU 3 P0 Q6 j! K9 g4 @ inputs = inputs.to(device)& q6 ~1 S, ?( d% R" x: z9 }
labels = labels.to(device) 9 F. m. w7 b2 L: M' ?0 r : h* b( @+ S! M F1 z # 清零: j$ T+ q4 x$ m8 H' v5 U; S3 q
optimizer.zero_grad() 3 u i1 D6 p& _& @) U # 只有训练的时候计算和更新梯度- X& }+ O a6 y% R/ H2 r
with torch.set_grad_enabled(phase == 'train'): 6 `5 b- ~" a. C% m% X #if这面不需要计算,可忽略+ b# u J) ~9 A+ U \+ b
if is_inception and phase == 'train':# s3 w$ L% o/ s) N: ^- o s( E
outputs, aux_outputs = model(inputs) ( Z7 j# V- t) t3 ]# f loss1 = criterion(outputs, labels)# [* |1 v1 X% A& u% Z% a) m; C8 W
loss2 = criterion(aux_outputs, labels) " E1 c' E. }9 W: Q# t' f, [ loss = loss1 + 0.4*loss2! Q( O6 C0 p+ a/ T A/ }
else:#resnet执行的是这里1 I) a9 b7 D$ U0 M
outputs = model(inputs)- A) `7 u6 ]: z+ Q- ]/ v% G# r
loss = criterion(outputs, labels)0 ~5 C6 ?8 E' m0 v0 s5 y; ^$ w2 u& L
3 [, Z3 H6 G' R! P, k. F( B1 t #概率最大的返回preds ( E; K& X8 f5 C7 a, q- L _, preds = torch.max(outputs, 1)$ F6 t% G# _# x5 e
2 R, a, @' g! d' U- M% i # 训练阶段更新权重 - q) t D1 J( f' o& c if phase == 'train': 6 W& v) k* O9 S6 [/ i: S' P loss.backward()% A' b! P' F, Z1 z& R( F
optimizer.step()* q6 x7 a; F2 Z0 t0 [
; D1 J) b9 c* F2 i) d- X # 计算损失2 V' ]* m5 m- S6 N0 G0 Q" U
running_loss += loss.item() * inputs.size(0)1 s9 }9 W7 {* t4 |( [3 d4 i
running_corrects += torch.sum(preds == labels.data)4 V: H0 c% u4 N2 X# a- Y% x
1 ?5 `( H+ r8 j3 Z0 W
#打印操作# ~5 Z1 v% I! y) Y. ~- p) h6 b8 R: t1 z* t
epoch_loss = running_loss / len(dataloaders[phase].dataset)" g: y% j' Q* {- t( U. f
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)/ f& _; M3 I8 c1 A
& w( E ]9 J% W5 U2 V8 i" c& p6 v5 J- F$ \8 F& r3 [3 S
time_elapsed = time.time() - since7 U% F9 ~! G/ I! Q0 X. x
print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))/ q8 v7 s. A7 U+ {: ^( h
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)): u% \$ `6 Y* A9 c3 C% U
' s1 w$ ]$ U& X
9 v9 h }$ i O # 得到最好那次的模型9 H7 z5 `/ s+ Z6 B1 b4 K% c
if phase == 'valid' and epoch_acc > best_acc:6 S* v% p; v/ d) d1 b1 R# m# W: O9 h
best_acc = epoch_acc A1 Q1 R5 ?4 X" ]- Q. a: D7 f' ?
#模型保存( F y, o+ a L5 W
best_model_wts = copy.deepcopy(model.state_dict()) 7 O! o7 E0 [1 F; P% M state = { # a% ^* @3 H* ]8 ~& k #tate_dict变量存放训练过程中需要学习的权重和偏执系数) o# w; |; R3 u
'state_dict': model.state_dict(),( y8 P" V6 g8 S' c4 a4 G
'best_acc': best_acc,0 X8 m, r1 j# s" B) U C# l" j
'optimizer' : optimizer.state_dict(), ' k5 Z7 A' `4 K- N8 r/ B }* h. m$ w3 S2 L* o$ C7 Z$ t
torch.save(state, filename) ) _6 G. E5 K5 I7 ^+ l) A& v- y6 i% G if phase == 'valid': 5 G5 X7 ~2 ~8 R" d: N val_acc_history.append(epoch_acc)) ~, `, G" X/ }& T
valid_losses.append(epoch_loss)9 f& }( b; V6 A2 f
scheduler.step(epoch_loss)3 v; V3 m# O o
if phase == 'train': 2 }4 E6 }: a* w E' S" \ train_acc_history.append(epoch_acc) 5 g$ ?) F( k( z( \' x' v- ^ train_losses.append(epoch_loss) 4 i( a! @/ `) L# t' b' u* B/ C( V& n9 j. } c2 r. F
print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr'])) 3 w# g' v7 T6 p1 t) h LRs.append(optimizer.param_groups[0]['lr']), l" V) k n" W8 e2 U: X
print()- q* b: U9 }, a; k
7 @8 h4 t+ ^ ?: p2 p1 Q
time_elapsed = time.time() - since! S2 G6 x& ?1 R6 Y5 ^" K1 r7 V
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))- v5 `9 t2 F* j5 ]# a w5 _, }
print('Best val Acc: {:4f}'.format(best_acc))5 Q) N5 Y' g0 X% u2 e1 n N
6 h# e# T" o& m% Z4 |: K
# 保存训练完后用最好的一次当做模型最终的结果3 Z+ }& W; n& L" F, `5 C4 r
model.load_state_dict(best_model_wts) / G9 g( ] M6 @/ c; k0 f* k return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs : U9 ^ j! X5 ~; C8 a. L. L; r: o% p" W! i+ W- W4 [$ k$ N2 \
% r- `. [" u. C1, E+ z5 w8 _* ~# b% M; s
2& [2 u+ q5 |! @" E& E7 B
3* m( f+ O5 A% Q! @7 f
4 ( Z' u& F& y$ e ~5 , ^: S! j, m' t% K9 Q# g, s6# Y$ h- W! j) P1 v( u
7 2 ]3 C! y1 L5 `* t1 E3 r. Y$ t3 R8 # U& J8 `, m5 ^6 T& ~! Y9 0 A0 b+ u: _' l$ ]8 d. c106 p7 i& X ^0 W
114 e8 t: v4 X" h5 @9 ]
12# b3 T! r. K: a
13, F3 x' D7 r4 C9 U4 |
149 N- Z$ e/ J) V( o8 X
15% y5 m7 _, V+ P' M+ V( _# E7 H
163 w& J* S" |8 p2 F4 n
17 ; M2 W2 r2 \) M2 Y# y; y `7 k18 . V. k+ Y# W- H9 d: e7 J19 4 Z. j( r3 J6 X+ \2 t4 G3 q20 # H; @5 m7 F6 P$ |3 Y210 q0 v' u+ n/ D2 ^6 B2 L& y' @
227 F* v/ }% S8 }9 @: D+ Q, F) u
23( [4 q! }- n( ^! h6 h7 I
24( K5 e. x- N, @; k$ B% C8 p
25; Y0 T2 N. F+ n" P
26+ V( r/ F+ K" V
27 $ y i) _' }( O- V3 x; b28+ c$ @- r! B3 e9 t. e
29 4 V! \# w/ O* p9 ~4 Q% a& G, @30 ; T$ J, j) o8 x s0 z) e31 ; F" k% a, f, P; Q& O! n6 I32 . s }* M5 N7 ~7 L+ i33) q; v, C, F8 h' v2 ^/ F& l, }
34 ; Z4 l! w6 S+ w8 @35) ?# J, T, q; [
366 }4 ]) q& }7 h0 P8 {) L
37 - }6 J- V9 m, S( ~38 + N; N5 |6 h( M. l! C7 P39 ! t' b& u& H3 N( R* `+ V: m40 / I9 b( }5 f; V& C, D' G! g41 ; N* z) L$ g2 X" Z42 0 q1 I' S! C% _1 Y5 T43 ( l1 D& m/ x& ]6 w6 v# E' _0 W% j440 H7 Z- |9 n0 m* `
45: e a: X" P/ ~. E! K
46 ; l$ D L! y2 o" m6 k* `9 Q47. v# G0 y" f$ J, A4 I
48 : ~2 R; E- ]1 W1 ^9 @* U491 @4 R. C, \ @6 R+ _7 W
50 ' b8 R, h( t* Y) a+ \4 M4 \: z51 - I l) \3 A: v8 P* N1 r521 `0 s# F; ]4 N$ U8 n; }
53 3 P* R5 ~- A- ~' p7 r; q; _! h54( p: x W$ [1 u, w# k
55 6 ~" }" o- Z0 _9 G0 Y7 ]& a% E565 @1 p; v" \, q8 L- z$ d
57 % j- \3 Q3 m3 l6 b- k4 L" q58 6 d! z6 x* g1 S6 v8 O6 O& S59# w* C+ P* e; d; l
60 : g- G& |# R4 h# E8 A% A5 d/ t) c61+ t( V3 f" N7 U. q0 _+ R. Y
62 $ r8 E3 T8 B' |' g63 ) M1 }9 ]" p. g+ ^8 p: u64 * G8 }; N8 N- w+ f65 ' U, o) k" }- l, B+ o: x: o0 y66 % s' r# V1 M; [7 E+ v- Q67. f7 o4 z% o1 i4 M' y+ g
684 }2 @# p; y7 W K, `
69 . _, H( M* n* l6 O/ {# v4 ^' o70 1 h" B' ^/ v( u: C( U; W715 q: J8 D: {( {! i8 R; }
724 S- I: I! J1 }
73 * s6 f5 b" \7 f! U7 O; O/ m ^74 : G# i( R$ }: u& n4 Y8 v5 `75 7 `7 [- k2 n' ~9 g76- ]/ Z/ }2 k2 Q C' N2 K
77 5 k/ K* o6 r( t6 D4 g2 I7 p& w3 |787 Q1 g" T$ h8 q( a1 A" E0 X% _
79/ X) b0 g* {6 I+ Z. e1 f
80/ { f$ u8 r: Q- p7 ^1 G
819 A/ r# I6 @/ ~7 @
829 v# T8 j* ?* \5 b, H, o! C9 R
835 u; D% d; u& a/ a4 x8 p9 V$ n
84 , E o D9 ~$ R851 |8 B! D& j# N S9 o% t3 G
867 }- h9 w# p8 K* b
874 h) D8 D1 G& j3 ~' [
88 1 E D4 f& q" R" {9 R89 + U8 \; N+ P, m, h) y90 ; V* o( \7 ]' h* Q: Z910 ` F8 |( B. X+ i, p2 v& d
92 0 E. q- d; x! c; l93 . G+ r3 ]) O. N; e! i- z- d* k8 s944 X8 |4 m% _5 w/ M$ A1 l$ J
95 b' p3 K- {; q h. s7 r0 I* w3 r
96 % u/ o, ]4 h$ T97/ [ q4 M1 [/ g) K6 n2 l2 ]
98" @4 Q% ?$ I. ~* A& h0 ^
99 ) I [. p! Q* h' y0 u" ?100 h4 g } A% g6 w Z
101) [0 S: c' O! F& X, ^% Z6 G+ Q
102. ^2 [9 {8 E* x: U# u4 }" ^! y1 M
103 9 g$ A7 l, O& |# n6 K. R! w" n2 J104 " }7 f- C) v" H( d105 9 R1 X6 Z$ l0 Y: c% l( Y106( X) q) h' R+ _4 W' |8 M1 D/ o+ _
107+ D2 m1 m! ^9 d ]1 M4 y
1087 s/ J4 M2 K. }, K3 l' y, V
109& B6 A2 X' y( |# }: Y7 W* l8 }
1102 T9 h, G% ~6 {1 h. V) W8 ^3 P
111! W7 X0 l3 s5 \' s
112 * }! h9 e5 `5 p+ H7.2 开始训练模型 ) ?% y! R( e, u( K我这里只训练了4轮(因为训练真的太长了),大家自己玩的时候可以调大训练轮次 0 W3 z' f8 ?5 t% {, I& m8 k1 l# b- J! n# r
#若太慢,把epoch调低,迭代50次可能好些 0 e8 l+ c, |8 L6 V#训练时,损失是否下降,准确是否有上升;验证与训练差距大吗?若差距大,就是过拟合 . ?0 Z8 b) {* D7 H0 U/ R7 emodel_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=5, is_inception=(model_name=="inception")) ' R8 _' Q+ c! S& O1 {8 P z8 C, A6 E) ?7 i1 & r5 ^3 r3 v) T; y$ X& @% n+ i2 * J% Y, _0 M5 u. _& i3 ! {% b' s, ~) w# P/ Z/ U7 |4% k9 B& I0 i5 v7 [# e0 c7 Q. v+ V5 s
Epoch 0/4 & I3 L s! ]9 f* E---------- - s2 @2 G' j) `0 g; kTime elapsed 29m 41s . W% Y& P& _* r" g4 U3 ltrain Loss: 10.4774 Acc: 0.31471 A! T8 |" _; N+ w& k
Time elapsed 32m 54s + d H3 ], G: j) g( n2 m4 M9 f" Hvalid Loss: 8.2902 Acc: 0.4719# l/ W& |! e9 b3 \2 ?) ?7 x9 I
Optimizer learning rate : 0.0010000 4 W. j C% T: E* d9 y. D1 t2 B( g" C! y X) i
Epoch 1/45 Z% a. A, Z( ~ h8 z
---------- 7 |; e } m, STime elapsed 60m 11s - r6 o* V" h% v) ztrain Loss: 2.3126 Acc: 0.70539 K- u1 S/ _" z' |6 n; M5 s* j) y
Time elapsed 63m 16s6 V+ l) B+ m. c: @
valid Loss: 3.2325 Acc: 0.6626& ?' t" V9 \) M6 S$ \+ U: j
Optimizer learning rate : 0.0100000% ?$ O9 A, l) y& I
) d# k0 C Q7 Z, V( m
Epoch 2/4 3 I9 Y/ [' N1 x----------% Y; p. I% O" ]/ B0 M. `' Y
Time elapsed 90m 58s. _, A! K$ K" h' l
train Loss: 9.9720 Acc: 0.4734: T) Q8 c5 Z5 f6 N. x
Time elapsed 94m 4s # P4 ?$ u; s3 Q0 r) w: avalid Loss: 14.0426 Acc: 0.4413 ' @/ b1 V+ Z" z9 e8 I+ {Optimizer learning rate : 0.0001000 . n; A, Z t9 V- o) ~. U4 B* U: t G5 Q9 A8 W5 h9 ], E1 j
Epoch 3/4 b* U) h, x4 N% l$ m: S
----------! l$ F: h T0 d- Y& x% T" `% x+ U
Time elapsed 132m 49s 0 o" ^3 D# C2 L/ k5 E: l; ~8 ltrain Loss: 5.4290 Acc: 0.6548/ u) W+ F0 D3 u/ H U
Time elapsed 138m 49s, F6 n0 _8 ~2 i0 I( y! n
valid Loss: 6.4208 Acc: 0.6027 & k" }+ Z3 r$ @* E: x+ `Optimizer learning rate : 0.01000006 v# B# X6 L7 i) }9 z; P4 G1 ~* S
8 Z' i1 J8 @# \4 n8 I# K1 [( C* V; z
Epoch 4/4" K2 i" p4 i& ~4 I
---------- 6 x: ]; O. [' r Q8 \( l5 m0 T7 VTime elapsed 195m 56s* n# v9 Y6 J4 F( y( `, s% @/ c
train Loss: 8.8911 Acc: 0.5519 2 O% w0 m3 l2 c e- BTime elapsed 199m 16s( r% H, ?6 p1 e
valid Loss: 13.2221 Acc: 0.4914 8 E' `7 B6 I, f% d$ Q. KOptimizer learning rate : 0.00100000 Q! |$ z% t' F. w& m
7 {" j0 \: r9 R: V5 Z$ s- e/ ]
Training complete in 199m 16s ; z- @9 F& _9 i) o" JBest val Acc: 0.662592 / ?! E! n1 a6 q% N) \6 A' |$ Q8 |: x) M G( i- ]0 A
1 3 n Z. P+ f* L* J2 N5 K! G2( a ~8 A* E; D; _
3 3 c1 X3 [+ R& e B6 u$ V3 B$ I4* D- l7 u7 T" T
5 2 \* q) {6 ~9 n4 r3 K% t3 `68 `3 S, ?* R% a8 m) i
7# i3 U/ P v5 g L- w( p( m
80 ]8 ~/ u1 Y$ @* |8 H
9 2 w5 U0 K% N3 r3 t# \$ j7 T10 8 m0 Q/ J: t4 x11 6 Z$ z7 N8 m$ a1 J12 - d( d; W& K0 Z# ~' J13 ) F6 r/ i8 C+ L14+ `! i2 J$ }* _- Y
15 0 Y+ d7 u+ f& A9 O, X: j16 8 u5 v+ O" Y5 e3 E: L17 8 V% p4 ]$ ^( _; o& a# J- _' f18 3 e& W( J4 Z* F% G19 0 m7 ]; t5 o6 t3 _/ w20 ; ]9 i$ k9 B- Z& N21# j, J: v& |3 ?
22, {. J- K5 K7 d9 m
23, G- E. T3 Z: Y* y4 H; }+ K) I
248 O9 k# d8 b) r( e7 w
25) r# I* V+ t# e4 H& e
26 ' k- `. O3 ~: c/ J1 A/ s27 - Z' ]. w# N+ Y' D28' k7 t# y+ h6 _- [# H$ Q" X
29" z$ f) Z, F7 g
30' k& ^6 V) b. n9 \8 a" R& O
31; G1 y+ c6 U* s7 K2 C9 i0 n
32 3 y( |; M7 z3 S# T- H' P# {# P33$ u! ?2 `3 G6 }* G
34 ' g3 o' p- m, {! Q35% O$ f( m( X1 d. h6 g9 `
36; z& p& X t& u! g+ g- @$ b
373 J/ g+ M4 c' |1 |
38" t- e! | H' R5 A
39 . {7 D; L, j# T1 U40+ O+ \( Z8 o0 |% q% R) H3 y1 ?$ e
41 $ Q$ j \. [- A( q& B- u$ ?5 @42. B; d. t% f. S. ~3 d( c$ V
7.3 训练所有层 $ a" N9 t. C+ M" }9 L# [# 将全部网络解锁进行训练 . W/ Z3 r( l4 @* A* W* j' m( R( dfor param in model_ft.parameters():6 b1 I9 u7 w! v3 J+ j
param.requires_grad = True # J* U A! H2 a4 Z4 w6 |) C2 H2 S * u6 X" P+ `! L2 i# X* c. c# 再继续训练所有的参数,学习率调小一点\ - W: W4 ]: h- t1 v/ ?# Toptimizer = optim.Adam(params_to_update, lr = 1e-4)! _9 i q# I5 ~: V& f6 \1 G
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size = 7, gamma = 0.1)% f: A/ f; H- Y @) c- I, u
8 B5 r1 f! `7 B: T% [
# 损失函数 $ S- F V3 B. k' G( m# }criterion = nn.NLLLoss() 1 q+ T6 w) p5 S+ p1. q4 t: f+ p" S
29 `+ Z' X# @" R
30 |/ @! t2 c% d% E: ~
4 0 j1 ^2 N5 v: ~# ^& S9 z5 * h! m& U0 w8 V( l6) k- D2 F% B+ y6 _ B% `
7$ c: H7 X, a9 Q8 D% Q0 E
8 . {) _# x7 T% n$ p7 ^5 h' k" U94 H2 ?/ s) {6 j- W
100 {- t" `: M( l; s, m& \4 i
# 加载保存的参数 0 L# |9 O# O/ J3 C/ c; h: A& p# 并在原有的模型基础上继续训练. j' o0 J8 L; z6 }
# 下面保存的是刚刚训练效果较好的路径 $ M( U6 F" _3 X7 y' {* o5 Echeckpoint = torch.load(filename)" a1 s- j# l) k7 P8 K# u/ R
best_acc = checkpoint['best_acc'] 9 Q9 w7 E! o; @- z$ s2 x) ]model_ft.load_state_dict(checkpoint['state_dict']) i( Q5 P, r* `* Loptimizer.load_state_dict(checkpoint['optimizer']) # d$ v1 w- z" k9 \" ^! G1 : _, C0 b7 ?7 |( t, g/ |8 Y2 5 n7 a9 W) M0 J, ^! ?31 Q% M. g: T5 n( D- ~) L( l
4 ' V2 O0 b1 m* G. @9 R5 : s( l" [: s2 T1 S6: w' W1 R$ m! z
7/ y' ?5 {0 r6 ^, n) M% @* y, y0 J
开始训练 ! p ?& ~5 Q0 r9 @$ O; [# O1 y& t注:这里训练时长会变得别慢:我的显卡是1660ti,仅供各位参考" t$ D" A! b, ~( l
9 `2 g5 T* f, _* \7 I0 `
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer, num_epochs=2, is_inception=(model_name=="inception")) . Y# M, _. H" W3 h8 a" e) R) G ?# c1 2 x) ], M- F3 }0 z: R2 \3 h" W$ U) kEpoch 0/1 w& c7 r" K9 ]0 D4 W* m
----------' s2 N* X! p1 j6 `
Time elapsed 35m 22s9 G7 l' d/ D3 _0 o" t/ C& E, ]; ?9 N
train Loss: 1.7636 Acc: 0.7346 # `" a# v/ Z& e8 ?/ gTime elapsed 38m 42s & V2 a7 R! N; I6 v* J6 U% avalid Loss: 3.6377 Acc: 0.6455/ o7 \' F( [: e3 F: G1 g$ s
Optimizer learning rate : 0.0010000) i* S9 A# V2 K6 I0 @
}1 r7 G( w) Y5 t% REpoch 1/1 / I: e7 E( y" `---------- / {+ T2 B3 V" JTime elapsed 82m 59s ' E* h+ f8 K" M4 w3 Jtrain Loss: 1.7543 Acc: 0.73400 o+ w1 D6 N5 y0 Y+ w* r* N
Time elapsed 86m 11s2 |: b1 `. W3 ]! a" t; u
valid Loss: 3.8275 Acc: 0.61377 W! O* k9 e# s }+ U( V( P
Optimizer learning rate : 0.0010000. l$ P6 k/ ]: j2 `: d9 L" R
$ |3 b1 ~& }; T
Training complete in 86m 11s - O/ U* D. E. k- F/ v/ L' r7 |Best val Acc: 0.645477 , z" n6 K( N: }4 _ ( F L( H5 g% r1, g7 w9 [4 P2 |) b2 i& ] k
21 v7 X3 j3 ~9 S9 h& K3 L( R! b7 U
3% x5 `& i; f* \/ |2 c+ v
4, k" K) e. v0 R1 f( {$ r
5 4 z, w2 k9 p: }" ^' D/ i3 N1 n6 R6/ ]7 u9 w7 M) } l. A) d
7 / C% G* ~" }& H* o4 x5 W. n* d# j& h8* @0 L0 V! y( Q: C+ [
9 6 o2 O+ o ?; ?! `+ f t10 . c4 L2 p0 u, R# E115 M; J& G" ^8 j
12$ T V+ E3 l7 |7 B$ N6 r/ v
13+ E$ j/ _$ i3 M9 p' _/ Q6 l
14 0 q" x y: @$ Z15' `: |8 z0 }) r' ^ F& h& o
16! ^5 ?6 ~6 W) T! E4 x3 I/ |6 q
17- W4 ?0 w3 a5 y) u, I) ?3 L' p
18: d" S# U" Y" ~) n9 @ h
8. 加载已经训练的模型1 l) @7 Z) W. k2 S+ G
相当于做一次简单的前向传播(逻辑推理),不用更新参数7 R+ f% a$ D. {( D* X+ E