8 [2 c: S6 d7 m7 V5 ]8 m" M 9 P, E5 F* v Z - A. n9 V3 m1 ?3 j, S3 ?/ { h2 t$ M! X
下面是images图片中的一个。4 A) A" X$ {8 P$ K8 }4 I
1 F# K+ j x- c: n
* w2 A$ c7 U5 Y% {2 X3 u下面是对应的xml文件。 0 J* N) J2 a! C" s2 u) |* `% W 3 }: F* D; N5 g7 P+ j0 Y6 f & v- t9 D2 W7 B# _( b2 e0 }<annotation>$ H! r" }4 x& ~" F
<folder>train</folder>& D& E5 |0 [! D' {
<filename>apple_30.jpg</filename> P5 ]- z( o* u b/ n0 U: ~, @8 X <path>C:\tensorflow1\models\research\object_detection\images\train\apple_30.jpg</path> - a& ^7 F8 o" n <source> / U! N0 e Q% z <database>Unknown</database> & ^& D. X3 g; u# f9 D5 A7 T </source>& X* G- S w' r+ }
<size>: C+ Q \( ]4 e! w+ R# p: N/ u
<width>800</width> * ~4 ^2 @! c3 b, W2 P <height>800</height> 1 Q- e! d% e, f5 j/ i0 M" t <depth>3</depth>4 f* S; u) g! w/ O$ ~9 ^) c
</size>) B" c% Q% r3 V ^3 o
<segmented>0</segmented># I$ Q1 Q3 L! u% c
<object>; @0 T2 P. [0 C) M2 m1 z
<name>apple</name> . d9 a0 D/ h$ P' k! D6 r2 V( l <pose>Unspecified</pose> ! ^$ g) d% k! u$ L/ ] <truncated>0</truncated> ( I2 u4 |2 k# u, }$ X/ A' r' T5 O5 Q# T <difficult>0</difficult>* C1 g& \: j) M0 P% a; T
<bndbox>" {# J; ?* y% J0 e2 v+ W
<xmin>254</xmin>. c# `- t& J6 T
<ymin>163</ymin>1 W- m/ [: F4 z9 D( N
<xmax>582</xmax>6 M; q; W: Y2 R
<ymax>487</ymax>4 x" p9 h! N. i% f. |+ K
</bndbox>: H& Z d$ h. M7 ]
</object>4 q$ O. y$ n% t R8 k b
<object>; ]& c# A2 h" o- I, T& @
<name>apple</name>, @; E0 q R9 H1 `4 G2 Z+ ^' C9 j
<pose>Unspecified</pose> ; b9 ?0 }9 }0 d# k9 P; E <truncated>0</truncated> 7 s) j6 g9 O0 o: K$ ]7 H <difficult>0</difficult> % _4 \$ Q. c7 J/ j: o& U4 V) w <bndbox> {: z7 d+ Z- _* `+ L% J* ]% \
<xmin>217</xmin> + e8 `( l6 U5 U/ ^( v/ e <ymin>448</ymin>9 _0 m) ?$ d; ^; K" i+ E+ \
<xmax>535</xmax>5 u% S0 D, T: }) M* h9 [, R
<ymax>713</ymax> # G& _# q H; r! | </bndbox>; I: q" T8 D; Y& ?. o' I
</object>* u) R$ f' ?: e2 N& t; t
<object> 4 n2 n$ i5 S! b <name>apple</name> / _& Z7 f' w! N. I$ Z <pose>Unspecified</pose>5 E" G0 C' `7 @$ |" Y8 j0 ]
<truncated>1</truncated> + J% p/ C$ l+ ^& U' ~" M; q, j <difficult>0</difficult>! C' R: B7 K" t/ |% ~
<bndbox>+ c/ |$ J6 B6 R0 s) ~
<xmin>603</xmin>+ `6 F& p+ g* S" ?3 U
<ymin>470</ymin>8 T4 p1 U0 @$ p
<xmax>800</xmax> 9 w) [( [* ?: [9 |8 D) j <ymax>716</ymax>4 k* ]" D* n1 M- @# J4 R
</bndbox>; d2 S! p" t3 Q! F
</object> - l! c. q/ B/ \/ s <object> * D) T: S9 h% x, K+ m" s <name>apple</name> ) a. r$ W$ M0 [. K$ o! I9 k6 A/ ^ <pose>Unspecified</pose>1 n9 s7 F% |2 L
<truncated>0</truncated> 6 v9 E: Z: C; X6 W% m; C( S- B <difficult>0</difficult>' ]+ v x; F3 l O( n+ V: x# s
<bndbox>1 d; R4 O2 Y$ x
<xmin>468</xmin> 1 I4 L l( G4 u% `3 G2 z" E7 S <ymin>179</ymin> % I% T }- k& s4 ^ <xmax>727</xmax># b# ]9 L* w# a
<ymax>467</ymax>& `- v( ~6 Z5 S- W# E
</bndbox>; T9 B0 S3 J/ K7 B0 u6 K/ f
</object>% N, z" D: C0 v0 o; p/ c
<object>' B9 X/ m* l+ E( r/ _5 g7 n
<name>apple</name> j `1 t7 q; ? N! @$ V$ A9 a <pose>Unspecified</pose> H* c+ ]+ z t. R <truncated>1</truncated> $ h# ], s; O6 s1 D+ X' v+ a! l; P <difficult>0</difficult>2 t7 N/ D7 a" r
<bndbox>- U' u; B! k- a K7 Y/ I9 @4 _6 q
<xmin>1</xmin> 0 g! m: D( ?3 J <ymin>63</ymin> ! z/ ]8 S1 t. S <xmax>308</xmax> 7 u4 i7 D; B% e$ q <ymax>414</ymax>' N# q7 T$ E5 m9 v2 e' W
</bndbox> 7 `: _0 c L: o" l6 s; V; m </object> 4 Z4 E' F' P# }. z1 ~0 T6 ]</annotation> 9 P" b& ?7 K: b$ S2 q8 Q5 J! T! ]% F1 % |8 y2 A' p5 s9 N0 f20 F7 Y0 e* k/ @( i
3 - H9 n7 R# W, n9 N% t1 i/ P4 & [' X; ~0 Q8 |4 }! C5$ b0 c- n0 q! y& _* p' J
6 / G( S J3 m3 U# `8 [( a7 p, I! o; K5 t1 P/ X$ Z8( _ h7 b! w8 T5 s* g
9 4 g" L, ^% w# V$ h- @10; |% N! g" `+ f+ X) o
11 I- t0 {0 w u- v$ e
12! L! Z" I/ e$ [ G2 ^
13: D: [" F- O$ S; P
14 W: k' J6 j$ R" m
156 v/ h9 W/ K3 N1 s9 p
16 9 K! t, c5 G* E" x. k6 H17 & K1 P) `' K2 l$ t8 i1 N: z3 B18 7 w) j9 y: Z( N$ y1 v19: G9 d! R; {1 S" K, ^# k8 i: M9 k
206 { V1 R8 d% I- S k
21 ! i$ {% @* R4 M* C22. }8 H9 p! T! Y' u% S4 l u% c y/ F
23% Y+ _' I+ s/ z: T/ M- u/ |
24# _2 ~+ d8 `3 p1 q& E
25. x: x# h, v+ j. m7 k5 v2 i
26$ I6 v7 H' [+ \9 D2 ?
27 4 {' x" s t0 F8 C. q% p, o; ~28( |7 U4 z0 g/ q' p, d! Z; J
29 & Z( _0 W8 M( x% C9 J/ L* Z301 j( Y6 i* \5 ~+ r
31 ' n: o* k; s l$ s4 ~' J S% i32* ~1 I/ e. m, b0 @
33- g/ \5 ]2 G. V: U
34" i6 J; {& @# U% r: ] d" C$ W5 [
35 0 \$ J/ X. L- r; L8 |, e365 m" r$ U, o# `% W# ?- }' `
37# X4 i* e5 i t1 Q/ X3 J1 L
38 # a6 C' J% p# J39 " E3 A: w6 b! x& h+ h- l5 t40 / T8 b W9 d- {# Z: K41 ; |8 a6 \$ k1 o42& H) B8 f' ^- i2 C% h. n
43 % M, E6 a3 y( R/ Y1 X44: t" A+ ?' m6 H$ d' E
45 0 \: ]5 a# A; C% Q( J46 . P* a: p2 A7 Y) B5 L; d1 g7 l47! P |( |7 g6 ] d( ^: f
48: }! E8 j& f5 p# z1 Y
49 6 u( {. t- j. y/ R1 Y3 P- T50; s1 w8 P) {/ F: B7 L9 Q1 [0 O4 l
51 0 [1 s9 L- }9 J0 F52 8 u% D8 ~' y0 N3 S53 5 y" B$ u' ?) i! x% p |54 & o2 d' \3 S$ w, D! q4 k- B- Z55 ( l9 T0 q y; S/ l% D3 q. L( j563 g8 D j, Y1 q0 \5 l. ?, m3 R; v1 B
57 , e3 c' `8 m; o: c58( g2 m, t3 a& N9 y9 G3 k! _
59' M5 X, u- T) z
60 7 Q: N; E: ^$ `3 i8 _- u J61/ I9 |/ ]4 \( k2 m. q2 J
62( e2 g0 Z1 M+ v( G% m5 T, { F6 `
63 1 S% A" {5 b% d: \6 C64% k; v/ D; L1 }4 W4 d- o7 d
65 / ?3 z" Q1 Y- m0 w( T66; A9 P* S. h- U% i/ N+ k
67 7 g- s: J: ~ D) }/ K6 ^, Z68 o# F/ C6 I8 W# M- b69! O3 U8 S! m% }& i
70% ~( X& J5 \ H4 R- s) J3 z; b$ t/ W
71 & G0 P! E. y! Y2 v- [ k, T6 ?72 4 y& }( y, b. c n0 j5 I73 6 O( V* P, D0 W$ a: `& y74. n8 @2 t0 O6 Y& G) C6 N1 w
将xml文件提取图像信息,主要使用xml和opencv,基于torch提取,代码比较凌乱。 0 I) D! t; A$ W, ~% a / ^3 s% [0 l' I ( K3 ?; M5 A% A) Jimport os+ i9 [% L5 \, Y# y) M9 j2 C, V
import numpy as np5 d4 b5 ?' d I. n3 p. T
import cv20 A" t( x2 e8 N K
import torch. f: a# k) b( _7 T w# ?
import matplotlib.patches as patches/ i; P7 E5 Q' R. V3 A8 `( g
import albumentations as A 4 k7 P2 ^/ H" @from albumentations.pytorch.transforms import ToTensorV2 : z5 _" A- Y) K' [from matplotlib import pyplot as plt ! I: D% f+ m/ r) tfrom torch.utils.data import Dataset * S+ R* g3 ~2 O& M1 K5 z7 kfrom xml.etree import ElementTree as et - O4 p8 [' C. v$ _from torchvision import transforms as torchtrans $ x( Y, n6 D( @4 h: |: g; Y3 ~4 a4 u- y* \- S. H& j3 q
8 y6 M; K- u* M* [' N
# defining the files directory and testing directory J/ O2 T# S- X4 Q+ j" g0 j; b) ]6 h3 J
train_image_dir = 'train/train/image' ; F1 ~' s6 @3 F, y! e4 Vtrain_xml_dir = 'train/train/xml'/ @3 H- U5 ~! j# Z! L
# test_image_dir = 'test/test/image' 8 r1 q1 }4 h1 ?+ T* x# test_xml_dir = 'test/test/xml' 5 A2 s' g2 X" ^& r- }7 s0 R/ o- P* G x
: @" T1 S. F/ [/ H* Nclass FruitImagesDataset(Dataset): & t6 q* K& C4 L6 z8 B ) M+ Y! ]2 x' o/ Q2 m' n: F 5 n. d/ N( C( w/ v4 u, y def __init__(self, image_dir, xml_dir, width, height, transforms=None):0 v5 t+ J* J, h, e& _
self.transforms = transforms0 H- t5 U; u" A/ r
self.image_dir = image_dir - a6 q, I2 s7 C/ Q M self.xml_dir = xml_dir# U# T( q5 ?! y, t' F
self.height = height ! }9 s, `" W3 f" N self.width = width2 N: n: f0 c5 T c
5 K9 M2 z& V$ \. b, R& g( D2 b( R h8 P; ^% U, s2 c
# sorting the images for consistency1 |' x3 `$ _. K/ s: |% `
# To get images, the extension of the filename is checked to be jpg8 n6 B! d8 Y1 |
self.imgs = [image for image in os.listdir(self.image_dir) + Q [6 P( ~5 A! t0 I+ | if image[-4:] == '.jpg']. R) M4 u; z4 @+ J. c
self.xmls = [xml for xml in os.listdir(self.xml_dir)6 G' W3 y6 c$ q$ m% y- j% I$ }
if xml[-4:] == '.xml'] . s& D* p5 k" O 1 Z& k0 J! I& Z8 T- m$ l % b3 W. ~3 Q s7 u7 s8 j* u% o0 h1 R # classes: 0 index is reserved for background ! `# ?* ^; h6 C% w5 \ self.classes = ['apple', 'banana', 'orange'] # S: ~9 g4 L ?* i& u5 v" t$ r0 s ^ S! w" z# F% r# N
6 E/ b' g9 |- D! v& J' x* B
def __getitem__(self, idx):6 v- a+ m' ]7 G' L$ U- P" m( n: S5 @
' O) P O6 V0 f) @
3 h- s' t* p9 _% R$ Z$ J
img_name = self.imgs[idx] ( ~; Z$ p0 L# ?7 y image_path = os.path.join(self.image_dir, img_name) : ?2 W; r% A1 h) n: d* C* X% C2 T3 w" Q' K
5 N7 g1 N9 f" `% B6 a; Q( |2 R2 _
# reading the images and converting them to correct size and color5 i& P# p! W6 c; M' P
img = cv2.imread(image_path) - G: E3 O7 F( \0 U5 j- H6 H img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)4 Q- \/ ^8 A: a$ R# O) t
img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_AREA)' `9 S, k) _$ C; b! [. m
# diving by 255 / h% L, T4 L6 L3 { img_res /= 255.0 , z, o5 c7 V1 ^! e$ p G& v- n! k: S# K8 {( g4 O C! C) z8 z& | G
# annotation file. x- P+ e0 D0 f }" u8 g
annot_filename = img_name[:-4] + '.xml', Z+ m6 {8 I3 w
annot_file_path = os.path.join(self.xml_dir, annot_filename)6 u7 q: Y+ w" ?8 e
7 u6 L! y" N1 m
/ X9 F8 `. q9 R3 f boxes = []2 F1 Y; g: ]: I6 v" j
labels = []7 b3 w7 E/ M9 v% A# P5 @2 ?
tree = et.parse(annot_file_path)8 t0 M. h6 `- C# Q: D
root = tree.getroot() 7 ~" e& S c) D7 b/ _/ ] # l; ?. V& T$ o5 f + q5 R; s1 }. N; U# }6 }9 ? # cv2 image gives size as height x width 9 v: g9 X5 Q0 f4 r/ {" D" ~- c( x wt = img.shape[1]! O+ |' ^+ N M( [4 {# M$ \
ht = img.shape[0]+ Y+ ?6 {. E6 f/ F. A
/ N S; L1 c$ S4 w. R
" M& ^& y( S {/ G # box coordinates for xml files are extracted and corrected for image size given1 Y$ {5 p" [. s n$ Y9 H% D
for member in root.findall('object'): % e4 m t2 G: ~ labels.append(self.classes.index(member.find('name').text))0 K" s% h" }. l+ o! w% o
6 B9 T# [% G: L/ Y& i \
# p x" V- T; B% J6 o0 b. F # bounding box- C7 x4 f& j: }
xmin = int(member.find('bndbox').find('xmin').text)5 e5 W% _. D. ~- `$ ^. j3 ?, o
xmax = int(member.find('bndbox').find('xmax').text) 0 q# Z% X( X6 K3 z5 @; p7 {8 |* m! f1 s9 e: S
% \9 A9 o, h& U% J7 U* m; L
ymin = int(member.find('bndbox').find('ymin').text) 1 d ^7 a- [ L ymax = int(member.find('bndbox').find('ymax').text) ) C5 [! ?4 Z8 k- ^ + Z2 I6 v# i9 ~- \" {% h6 P+ ^) O* k8 V+ T6 E0 v
xmin_corr = (xmin / wt) * self.width+ @& _( ^# R- M/ ~7 Y7 \4 @8 n( P
xmax_corr = (xmax / wt) * self.width ( `1 \$ M0 _1 N* Y' ` ymin_corr = (ymin / ht) * self.height4 }6 ?" K- `$ J+ Z/ E+ p% _$ l, s
ymax_corr = (ymax / ht) * self.height ; q* f! J! P; R3 o' i' x boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr]) ! t, y1 n9 X) U! a' V& `5 O* `0 y3 H" Y, z
% R$ X' v% t+ c0 r& Z g
# convert boxes into a torch.Tensor ' T- Y6 f! Q: z% ]# { boxes = torch.as_tensor(boxes, dtype=torch.float32) 5 c! ^1 U/ q( Z7 z; h0 k& m* O7 G: T( B% A8 L7 |
( W2 x s( z* U k/ ?
# getting the areas of the boxes# f8 L/ P6 i- s) n Q1 Z
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) % }" A) h7 h+ Q* G* ~, M 6 ]- k, L, C' z+ R: J+ j . `" J9 {, d9 P' f/ {9 l1 q2 a # suppose all instances are not crowd 9 }) N/ S7 C, \, Y' H+ | iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64) ; [4 r- @- d' Z$ V+ Y 0 q \% [4 I) ]8 p+ Y, Y ?! D# W" v: \" L9 Y
labels = torch.as_tensor(labels, dtype=torch.int64)* C- v$ Y: D) U3 o8 l
# S. G- g/ P+ f 6 u/ s; o7 h: F0 o$ ]+ L4 z6 @+ k6 Z target = {}/ S1 M7 L- g2 Q( ]5 W$ Y3 M. g
target["boxes"] = boxes & t) o6 |5 g4 U target["labels"] = labels. V6 w/ o9 t+ L+ M" I
target["area"] = area 7 u, [ f9 \6 h target["iscrowd"] = iscrowd' U( v' N4 x. K0 \( t0 e
# image_id9 R% ?, a3 W# V5 ?# J) C
image_id = torch.tensor([idx])/ Y) j5 F; H D+ c4 ^& D
target["image_id"] = image_id % o: O' H, f$ n: ?+ O: J ! K5 u: U! K' E3 a* f1 b* T" r9 V9 E( B9 e$ O9 A$ H9 G& X9 g: M$ n
if self.transforms:0 `0 {2 ~- \. U$ n! X5 j
sample = self.transforms(image=img_res, 3 q$ S1 U5 j2 X* | bboxes=target['boxes'],& S% d1 ~$ a3 y' F ^8 h3 O% ?
labels=labels)- `- g7 S0 c) N) A( x0 ^