' [ T% r: `* e) G. ^" c 4 T. P( \* X) A: a Y! `& ?# M8 z2 e) B8 Q& d9 K
下面是images图片中的一个。 8 p+ m7 {# F2 ^- i0 N f( Q% ^& V" H& x% V
# Y9 b, W) e) ^# H, o' Q" `
下面是对应的xml文件。 9 V/ Y! \6 `3 D: q# J- C0 W' r j7 Q% y) r3 t; i( J# N
j8 l6 v& C! |, |' R; H
<annotation> 1 Y' m4 D! R1 v9 q1 l& d- t <folder>train</folder>7 n4 @* m; B% Y! p: F
<filename>apple_30.jpg</filename>+ q( A% @9 V" ]7 |, N1 \3 ?9 @8 \
<path>C:\tensorflow1\models\research\object_detection\images\train\apple_30.jpg</path> + m) h! m1 A3 t" n+ Q <source>6 u5 }$ y. U1 X+ a! K; O8 V1 Q3 D
<database>Unknown</database>- {& ?9 J7 }+ F. |- S) |2 `
</source> 2 o8 r0 y7 [* _* @# [6 [& ^ <size> 2 E5 ?$ {, ^4 Y5 @$ n: A <width>800</width> 7 z* N9 [4 ]0 x+ a$ E3 _, u- P <height>800</height> ' z" p1 M: k$ t& ^3 r <depth>3</depth>" @8 t9 M4 c E# B) ]; V+ n$ L
</size> 9 W% D% C8 b3 g3 D' ~ <segmented>0</segmented>+ B+ s) ~* k5 Z
<object> * D1 }9 }1 f/ }2 j <name>apple</name>, \$ m9 l& z5 w+ a* u l
<pose>Unspecified</pose> V7 D: `; t- Z) M; ?" x" g <truncated>0</truncated> * n8 I- g1 m- N2 [ <difficult>0</difficult>$ ? _' v# h5 r9 w
<bndbox>9 Y" \" h, k" M
<xmin>254</xmin>, d0 [1 K7 Y! q3 T. n9 j
<ymin>163</ymin>2 B( H& v% Z) I ^4 [" V$ K: Z
<xmax>582</xmax> " X& |' e. P* P- C; y/ U <ymax>487</ymax># p6 c1 s( Q2 u- s6 L5 c5 Y
</bndbox> . Q1 f" I% X- z& N) Y </object>* ?. f6 |$ `% I2 _# U% c E
<object> 0 k; ?. p- l3 Q+ \0 i; F <name>apple</name>0 n0 \ Y& ^9 ^4 O# j4 w N
<pose>Unspecified</pose>7 |# H* u4 q4 o! ^7 \4 y3 k
<truncated>0</truncated>7 i6 U3 [8 s" M$ E9 _
<difficult>0</difficult> ; {; H% F9 b% A7 i9 s <bndbox>5 l. p- ~ F! ~/ U, f" y- n+ ?
<xmin>217</xmin>+ S, E: X! d& i. d# e* L$ n
<ymin>448</ymin> % |6 c# F, B) @1 { <xmax>535</xmax> b+ q0 y0 c, B! B0 H! A <ymax>713</ymax>0 L) T1 U; ~9 Z# W& v8 q9 E
</bndbox> + U# u, U; z* K! w7 L </object>3 }5 M. y9 c, @; X
<object> 0 B0 t+ H) q0 ~& K, S. P <name>apple</name> 4 g$ t7 ]; D( L' t$ z <pose>Unspecified</pose> 2 Q6 C+ C3 j- m" y# u, O1 t0 {7 L/ [ <truncated>1</truncated>" a% A3 ^' X, V1 K" g3 ^$ ]! q7 q
<difficult>0</difficult> 6 F( k2 r5 d6 @: |" S$ v <bndbox>/ j) F( B+ f: L$ Q
<xmin>603</xmin> 1 w0 Q$ B3 B' s! q8 S <ymin>470</ymin>* I* F! E7 K6 @! i6 B
<xmax>800</xmax>+ o! J) N. ?, y" `0 Y \
<ymax>716</ymax> 6 t) g3 c! D. e7 Z4 d: _6 f </bndbox>4 y4 |& B0 l" |0 M9 X9 Z
</object> 3 N2 @7 {6 I/ U9 W% h# A$ V- p$ P( w <object> ' A% v( P) D2 | <name>apple</name>/ W3 D) i/ v8 I. b
<pose>Unspecified</pose> ! N E5 a/ R8 B& S9 m t) R9 S <truncated>0</truncated> 8 H+ L+ r3 p' ]0 ~& ? <difficult>0</difficult>6 I$ i; ?2 l( l$ m9 Y
<bndbox>7 L$ h, l0 n3 g. L' Z0 I7 C' W3 ~
<xmin>468</xmin> 8 N; p/ [. [' u( }0 G5 r& H3 w <ymin>179</ymin> ]$ M4 p: S6 j% C <xmax>727</xmax>% q6 |3 P) n7 z7 [
<ymax>467</ymax>( Z- a3 {/ u6 m" {( @( ?; m- f
</bndbox> 2 V1 @2 i" e/ G% S6 F$ O </object> 5 P3 H+ y( m9 ?) ~$ U <object>7 r# k9 g" L' U% R! {2 ]' F" x+ _
<name>apple</name> ' M) g) ^0 @6 V% _! N6 t <pose>Unspecified</pose>3 |- f' F1 H/ ^0 {9 k
<truncated>1</truncated> & `, a6 @$ `; u' `$ E6 f; h0 x <difficult>0</difficult>9 ?7 S/ m" u2 r r1 w, ^# L3 ?
<bndbox>8 A3 l* q+ p# z" r
<xmin>1</xmin>3 c/ h5 h3 y1 ?, u* o* [
<ymin>63</ymin>, I( }- A* d9 Z; \( p+ v
<xmax>308</xmax>/ W) O9 n" {) m: N
<ymax>414</ymax>/ p' a9 ]" V8 p4 p/ t
</bndbox> , B4 v' j& Z6 [ ~( u7 U( u( B% ? </object> 2 @" W- [4 \0 U# O: h</annotation> ; v- |9 u3 J# l2 v2 W8 ]13 \$ D- ~) C& l3 H8 B
22 x2 Z* E/ B9 _/ D: W
3 , K3 u2 R! o- V N$ M$ [4 " t0 M+ P9 S! D x5$ z" m4 Q: J% y- d: b: [1 s9 ~
63 p% U* Y* L$ G
7" X) x& }1 M' H0 s; @
8 & e: a# L t* _% t" \5 @9 - [& Q: N# I1 v) o6 D7 N" w10( F W$ b! x0 C; \( _
11; F6 R/ t s ^5 R0 A
12 m) `8 o$ O5 E& F% k- A) E
13 , V* |0 J8 \7 M6 u7 b. a& y- Q- T8 w14* Q V8 P# f1 u1 b! ]9 R9 T8 z
15" K9 t! I1 R3 R; E. I8 D
167 N6 G/ H! d" t& E/ C: G
174 G6 C3 \# D) f8 ^( q! u
18 4 q) l4 |4 y8 [$ p19; R: o# s* P% s& _6 X. K
20 ) [1 z \* J- ]/ _; B- H$ r219 \- r7 E, ~; }( @9 N: H
22; a. g {+ s0 s
23 # t- S: f: h0 U4 e: b- k( g24 0 x- P+ F0 O0 I: J$ f; g25 , q5 l& x2 K$ R7 B0 h" i6 q/ H26+ T; k2 ^5 s6 T" J! I2 ~; T0 }
27 ) G X( E7 R" h7 y282 v. M) j5 m3 p
29 - D; a. \. M, j- g8 Y! |2 o305 [: L* O2 f! o
31 |2 n3 B4 n5 C4 r+ u
32 : p' ^& W. ~/ _0 C, {336 E/ ]- V. i' S V" l+ Q
34 ( ~2 w" p, a- h% w+ O35# b, y. a" _. T, U. x& a" M
360 C4 c8 i. ]9 o! z3 U' i: v
37 0 O1 d# {5 U" H, E385 W3 @; Z' ^1 b$ y; m8 t8 Z+ t
39 - C1 d9 I; x9 B1 b6 @40! r3 x* Z" \ ^9 J& y
41' q; \4 {( |0 W. L- r. w
42+ \) C! p! ]9 \4 k }7 T
437 v+ l% g! k8 ^: |4 U, Q
44 2 {. G4 ^9 o6 U+ u/ E: d& b) X45 & c' \' j! J2 S! n46& p* e& z, C2 l. u
47 / a3 }2 W7 i# a+ D6 R48: L1 w/ M2 h3 P; W
49 ( a# g# M: l; X/ ?50) G- ]- e" k- q" ` l
518 o3 o& y) s0 N: g7 v, G
528 M+ c1 B9 I: }9 e j" w
53 ! K" N3 |4 g# i! t) f54, |+ I( ~* N+ J3 B+ G
55 1 \* q! }* Q. G5 `' ]1 Q1 P% ^56: Z9 N6 ]+ E) Q# T: j0 G
57# q8 l6 ^# H" P( T
58/ u ^; N) s6 d; E" {
59 ; q$ _0 p7 A9 N2 r4 y7 X60 8 G) Y0 w) j- ~% n7 C$ i61 3 _ g$ U3 m2 x" q _6 m62 ) V# O' i; [. {* R: U: M7 a63 a a. W' K4 Z% c64 7 u- ?6 l: i) W, e65 ! j9 ^ W7 z& g; J0 \66) n9 V# o' B& D. R
67 9 s0 r0 j% B, o6 D D9 ?! H4 D68 $ a+ D& s, n* _' v# o: U69 ! h) Y' W9 C3 M& d" b70 ( v$ I0 p4 S& _! a71' _8 h, ]0 S' x; ]5 k
72 : u9 F; \, k3 B6 u73 " K& z8 w. p. J7 h6 L+ ?74' l4 y5 b1 [/ O& R6 i9 i% a
将xml文件提取图像信息,主要使用xml和opencv,基于torch提取,代码比较凌乱。 6 Q8 B+ v; k6 C, f2 } N: K: X& l1 p9 D& L2 g. K3 o2 j2 Q Q( a# R
import os ! _) S) k: F" Pimport numpy as np% C+ C# j& I1 ^% o
import cv2 ) x8 L( p4 e' ^( c5 W' U2 Pimport torch ; y1 ?% K- K4 b4 N/ Limport matplotlib.patches as patches ( p6 i) y6 I/ P7 ximport albumentations as A 9 `8 x% r, C6 @4 x" y5 s% wfrom albumentations.pytorch.transforms import ToTensorV2 / d3 U! S& _$ Kfrom matplotlib import pyplot as plt% a* S3 I [# e1 }
from torch.utils.data import Dataset9 D, V+ s! A. K. U6 _
from xml.etree import ElementTree as et* L/ g ~5 b; M
from torchvision import transforms as torchtrans $ ?+ o, I. J( L6 J& g+ l . n6 {* B( }" _+ h+ _1 h5 H( c% S( a) o4 p" W X
# defining the files directory and testing directory , H( s+ c) K! j$ g) ntrain_image_dir = 'train/train/image' - W* V/ B/ A4 Y: s- f2 Rtrain_xml_dir = 'train/train/xml'5 u; f+ V4 u- U( U$ Q& C& j; a
# test_image_dir = 'test/test/image' # L# E5 T4 P6 ?5 U: K7 j4 L* ]# test_xml_dir = 'test/test/xml' 1 N5 W+ a2 \# u. a- `. i: V. O/ d I9 {% @ U4 ?( x+ Q# S! c& `0 x5 A
, `- y$ l+ S" `. ?' {, y
class FruitImagesDataset(Dataset): ! p2 ]4 Y1 ]: j; k2 X8 r5 e3 d7 w& r+ C& L5 u2 j+ N$ I( G2 l3 j
& w# B4 n3 Y' G# g
def __init__(self, image_dir, xml_dir, width, height, transforms=None):4 \0 @6 z" s3 [( T2 P
self.transforms = transforms ~/ c) V6 S6 B I- Z
self.image_dir = image_dir0 {& O( g, ], {; f6 B5 J$ j
self.xml_dir = xml_dir 7 _2 F; P- v0 a9 g self.height = height " M% o/ R8 T9 h+ L% [2 M2 u8 m self.width = width 6 \* j2 f/ ?9 ~0 G4 C$ h+ K2 m* e- w" w+ ]
- P6 E6 U; b6 B2 L9 R # sorting the images for consistency' J( W$ [: u3 V& H
# To get images, the extension of the filename is checked to be jpg . \& W9 v! s' B# h# t+ l self.imgs = [image for image in os.listdir(self.image_dir), @8 g2 Q5 `) n$ `0 W* V
if image[-4:] == '.jpg']! f5 \3 F* T2 A; M! t: [' z
self.xmls = [xml for xml in os.listdir(self.xml_dir) 9 b% b# e& u' l" z0 E if xml[-4:] == '.xml'] 1 d" ?, ]- p! j . o$ Y/ g; x$ _1 r & c5 v! v% S9 O; l$ b # classes: 0 index is reserved for background6 P' U K" \3 a8 z
self.classes = ['apple', 'banana', 'orange'] 9 q7 X% i! K8 i* P, h) e, N " O, E' v9 n" [) \! E3 l4 o2 H: ^1 T: Z9 b; d: q' c7 L
def __getitem__(self, idx): 7 h+ S7 ]# z6 m3 g - ~) R% P) L( Q/ T+ X& [ 4 U: z7 y5 l- ?+ Y! w. U img_name = self.imgs[idx]% K1 U/ S/ ^1 v# o) B
image_path = os.path.join(self.image_dir, img_name)9 o! s1 Y9 h. E8 s
# v& D; l) h( @. w h5 u d! ]
4 {$ K* r7 t: d. }6 K0 T' V) `9 P$ }
# reading the images and converting them to correct size and color1 }, x. ]% s1 f+ ~8 `3 d
img = cv2.imread(image_path) 3 n: `0 C3 `9 X4 y# Q( J img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) " E% o3 g- [8 V5 F- X img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_AREA) # _0 T0 `8 l, ?3 M # diving by 255 ) ?3 ] @6 Q$ s* |8 f! f8 M! ~- @7 O img_res /= 255.0 6 K- a+ T$ ~$ P6 b; w _: H 8 m3 S$ h$ _. z e( o7 G' ]$ F, S2 {+ ]2 {) Z5 Z
# annotation file) [, ^ ^/ k' O4 |
annot_filename = img_name[:-4] + '.xml': n2 C( a' w7 i# T
annot_file_path = os.path.join(self.xml_dir, annot_filename) 0 i. [, Z- M. ~! @% Q9 q+ @/ t, l9 i, s3 _6 ]; @
# \" y4 S: \* H+ b boxes = []7 u# o2 [ f5 E0 o
labels = [] ' n3 Z0 f2 L2 V; a$ P& \/ ] tree = et.parse(annot_file_path) 6 Q7 w- A' D1 M root = tree.getroot() 0 U6 r- A9 B1 t0 j R4 k( {) ]8 ` }* k$ A8 ^- s8 [3 j
8 A3 X% c% g3 H3 f4 z' i
# cv2 image gives size as height x width. S0 M( g) z" o9 _! ?
wt = img.shape[1], c, G* N6 `! g; n& P; D9 R
ht = img.shape[0] 7 r. `8 E" D: U- O$ ^ & _- ]1 x. x {* T0 T5 }9 l: X+ R1 R4 E) a8 @3 M
# box coordinates for xml files are extracted and corrected for image size given/ ^$ z4 g+ h( R5 `" w
for member in root.findall('object'): + P# R# N9 ], K' K( g- @5 I1 k* g labels.append(self.classes.index(member.find('name').text)) 8 o& s+ P0 d% a # q4 X, {% ]7 n) B" {+ W" [0 c2 Y/ R8 p
# bounding box+ }7 U3 p" K( i; L" O0 m
xmin = int(member.find('bndbox').find('xmin').text)& n, m6 ]' _" p/ Z, o
xmax = int(member.find('bndbox').find('xmax').text): a0 i9 M$ h8 `( ?0 Q
4 E/ d5 h! j& o( j+ Q4 E& u ; ^" ?; p2 k! k9 O, u) \4 K$ h ymin = int(member.find('bndbox').find('ymin').text)" o1 K( B8 e f' s6 Q
ymax = int(member.find('bndbox').find('ymax').text)# M* c% g0 i' k# W5 {$ ]
1 D! z7 j+ I* O3 [2 T: r
, W2 [- [7 q0 X& y; G
xmin_corr = (xmin / wt) * self.width2 F0 P1 x; p( S( n; \
xmax_corr = (xmax / wt) * self.width/ u L. ]& K- b- Y* v3 l5 F
ymin_corr = (ymin / ht) * self.height1 W7 N8 C# I, m+ ?
ymax_corr = (ymax / ht) * self.height + V5 G5 {$ r; W3 m# @ boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) & v i- r( a: N$ q 5 n* b% }3 L2 j+ X! {! L5 \* U H5 ?( X4 g" V3 i I0 u$ h
# convert boxes into a torch.Tensor8 r3 p/ v/ y- A8 s- z5 E9 E
boxes = torch.as_tensor(boxes, dtype=torch.float32)( v* y# }8 f3 B: P4 h! J
9 F0 {+ v: C) r ! T6 H+ N7 r* B( s+ U. N1 e # getting the areas of the boxes 3 c" j! K) s) o2 w3 S7 | area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]): V2 j! `9 a3 t" X4 w
) Q( d2 r- `9 \
3 c" ^# @1 P4 ]4 C, M) l9 x # suppose all instances are not crowd & k" ~! D5 a0 C* o+ }* a8 z iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)0 u$ T3 i' V# V
9 n3 C* N3 s/ y( t8 ~ # E u; d" Z8 O# C- k labels = torch.as_tensor(labels, dtype=torch.int64) & d$ }9 m4 \$ | ' R6 Y' k( I) ^8 f! c" c8 A$ C6 p) P0 R+ K+ v
target = {}5 Q, r, B$ ^- B
target["boxes"] = boxes. V; m2 `' ?8 Z$ g* P- \
target["labels"] = labels 4 L% U1 C' h, ]. R target["area"] = area7 u" o. H. o0 M
target["iscrowd"] = iscrowd [" X/ r) O$ W
# image_id - y" m6 i7 G* q! k image_id = torch.tensor([idx]), T$ m5 }- \. ~
target["image_id"] = image_id . r1 t7 f- C0 b% S: g0 E+ P) f! B5 b$ R8 e
( l- _3 Z% v1 n. s9 C7 a! b9 S
if self.transforms:+ w! q2 ~3 `, v! o
sample = self.transforms(image=img_res, 3 G, H: g. q; s- a bboxes=target['boxes'], " N# v' L2 s3 F% z! x. d1 I; ?& y labels=labels) # Z* B i3 W2 L 2 }% @1 g" H0 d- S7 p, z* I; |: t; i3 v
img_res = sample['image'] 7 U% ]" g' [1 M2 \& Y Y$ W3 Y target['boxes'] = torch.Tensor(sample['bboxes']) l3 @7 k- @1 P4 b" P! T' J% ~& n$ X' o& A" Y. U- ~' |* e8 v/ e
& t }! j1 w8 W8 }$ @6 t return img_res, target . X1 l6 ]" G% G- z; D% @ % _3 m: k/ R& O/ {. P0 t' [2 R3 n* G5 E. F: C
def __len__(self):- f( [: C! B! r$ f$ T/ E
return len(self.imgs)3 n$ T; s Z/ r, ?
- J; R2 ~7 A. ~* ^; c4 ?% H* t7 j! _5 m; N J6 v
# function to convert a torchtensor back to PIL image; g. @8 T5 G0 _/ p6 S8 p3 z* r+ d
def torch_to_pil(img): 9 O; k2 c4 H0 i K T$ E* @ return torchtrans.ToPILImage()(img).convert('RGB')1 n4 q4 @1 q, P, K0 f
$ W/ r/ M5 k. }' j% f& k
' b) H4 j5 D( z5 k. f , Z* b; b' ]8 {! d* ~. V: m7 L* [ d4 ^( s6 S" R( Ndef plot_img_bbox(img, target):- `9 P$ N- T5 [' n. h. j- {
# plot the image and bboxes ! F& u' \. {7 A# X( z1 S. s$ o, Y fig, a = plt.subplots(1, 1)8 \ u- ^9 Y& t5 ]7 D4 G
fig.set_size_inches(5, 5) ( b1 n4 Y6 N" g$ Y a.imshow(img); C. D- m! P0 J+ f3 |
for box in (target['boxes']): 0 l( S" V* V* C$ B. H) a x, y, width, height = box[0], box[1], box[2] - box[0], box[3] - box[1] 3 D: U: t: \) H2 C" D, L* Z' N rect = patches.Rectangle((x, y),: L4 B' ?$ q# J% B
width, height,4 `% [" D) c& D
linewidth=2, % e) L+ Z1 Y. e) p edgecolor='r',+ D: c( b2 d/ b2 T
facecolor='none') , T+ n3 L6 ?* k' W' D 3 @" S- y8 I2 D4 Z* o% }% k" o" f5 h) N* h) K
# Draw the bounding box on top of the image3 g+ C3 U* s9 j! s: k3 S- B# K L
a.add_patch(rect)8 \2 X$ ]! t- J7 z" G
plt.show() @# S- @& C! C6 e0 D( o3 q, j
0 |. }$ ?5 T" r- D % G; f" y/ H9 {+ \- ^( k ! @1 ]$ P# v0 H, n$ T P & q1 Y4 ]$ D0 ^! L+ n; w0 Hdef get_transform(train): ' ~: X/ S3 t+ |, f, [: @ if train: ; R$ \1 O; s/ a3 o. L- ` U return A.Compose([7 |8 r- S" l" E& c8 A9 c
A.HorizontalFlip(0.5), 2 Y+ e4 x9 S0 O # ToTensorV2 converts image to pytorch tensor without div by 2558 H) q( S# \- B! Q1 f
ToTensorV2(p=1.0) ! Y; {/ L3 B+ N6 ^) V. f6 E ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) ! v: ^; e# m8 M% R; f% b# T6 S else:* w& j" C; O4 O/ t' q* S' f
return A.Compose([& L! m4 S% t& S# \
ToTensorV2(p=1.0) ! Y% J/ F* Y8 C! a6 y. N3 Y ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) 8 g+ j/ n2 |9 ?. F6 Y% q" _+ p: o4 y9 d, ^& ~1 M
, ^) D4 M9 o5 ~( x9 W5 s; q8 G$ A
1 R _0 d+ U, \2 s4 ?# _# Y3 n6 \ 5 k7 d5 `) c! w) Y: k; j* ` V$ j* Y8 f& W