深度学习和目标检测系列教程 8-300：目标检测常见的标注工具LabelImg和将xml文件提...

杨利霞 发表于 2021-7-14 15:18

深度学习和目标检测系列教程 8-300：目标检测常见的标注工具LabelImg和将xml文件提取图像信息
图像标注主要用于创建数据集进行图片的标注。本篇博客将推荐一款非常实用的图片标注工具LabelImg，重点介绍其安装使用过程。如果想简单点，请直接下载打包版（下载地址见结尾），无需编译，直接打开即可！

感谢原作者对Github的贡献，博主发现软件已经更新，可以关注最新版本。这个工具是一个用 Python 和 Qt 编写的完整的图形界面。最有意思的是，它的标注信息可以直接转换成XML文件，这和PASCAL VOC和ImageNet使用的XML是一样的。

附注。作者在5月份更新了代码，现在最新版本号是1.3.0，博主亲测，源码在Windows 10和Ubuntu 16.04上正常运行。

具体的安装查看Github教程：https://github.com/wkentaro/labelme/#installation

在原作者的github下载源码：https://github.com/tzutalin/labelImg
。解压名为labelImg-master的文件夹，进入当前目录的命令行窗口，输入如下语句依次打开软件。

python labelImg.py
1

具体使用
修改默认的XML文件保存位置，使用快捷键“Ctrl+R”，更改为自定义位置，这里的路径一定不能包含中文，否则不会保存。

使用notepad++打开源文件夹中的data/predefined_classes.txt，修改默认分类，如person、car、motorcycle这三个分类。

“打开目录”打开图片文件夹，选择第一张图片开始标注，用“创建矩形框”或“Ctrl+N”启动框，点击结束框，双击选择类别。完成一张图片点击“保存”保存后，XML文件已经保存到本地了。单击“下一张图片”转到下一张图片。

贴标过程可以随时返回修改，保存的文件会覆盖上一个。

完成注解后，打开XML文件，发现和PASCAL VOC格式一样。

将xml文件提取图像信息
下面列举如何将xml文件提取图像信息，图片保存到image文件夹，xml保存标注内容。图片和标注的文件名字一样的。

下面是images图片中的一个。

下面是对应的xml文件。

<annotation>
<folder>train</folder>
<filename>apple_30.jpg</filename>
<path>C:\tensorflow1\models\research\object_detection\images\train\apple_30.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>800</width>
<height>800</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>apple</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>254</xmin>
<ymin>163</ymin>
<xmax>582</xmax>
<ymax>487</ymax>
</bndbox>
</object>
<object>
<name>apple</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>217</xmin>
<ymin>448</ymin>
<xmax>535</xmax>
<ymax>713</ymax>
</bndbox>
</object>
<object>
<name>apple</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>603</xmin>
<ymin>470</ymin>
<xmax>800</xmax>
<ymax>716</ymax>
</bndbox>
</object>
<object>
<name>apple</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>468</xmin>
<ymin>179</ymin>
<xmax>727</xmax>
<ymax>467</ymax>
</bndbox>
</object>
<object>
<name>apple</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1</xmin>
<ymin>63</ymin>
<xmax>308</xmax>
<ymax>414</ymax>
</bndbox>
</object>
</annotation>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
将xml文件提取图像信息，主要使用xml和opencv，基于torch提取，代码比较凌乱。

import os
import numpy as np
import cv2
import torch
import matplotlib.patches as patches
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
from xml.etree import ElementTree as et
from torchvision import transforms as torchtrans

# defining the files directory and testing directory
train_image_dir = 'train/train/image'
train_xml_dir = 'train/train/xml'
# test_image_dir = 'test/test/image'
# test_xml_dir = 'test/test/xml'

class FruitImagesDataset(Dataset):

def __init__(self, image_dir, xml_dir, width, height, transforms=None):
      self.transforms = transforms
      self.image_dir = image_dir
      self.xml_dir = xml_dir
      self.height = height
      self.width = width

      # sorting the images for consistency
      # To get images, the extension of the filename is checked to be jpg
      self.imgs =
                  if image[-4:] == '.jpg']
      self.xmls =
                  if xml[-4:] == '.xml']

      # classes: 0 index is reserved for background
      self.classes = ['apple', 'banana', 'orange']

def __getitem__(self, idx):

      img_name = self.imgs
      image_path = os.path.join(self.image_dir, img_name)

      # reading the images and converting them to correct size and color
      img = cv2.imread(image_path)
      img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
      img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_AREA)
      # diving by 255
      img_res /= 255.0

      # annotation file
      annot_filename = img_name[:-4] + '.xml'
      annot_file_path = os.path.join(self.xml_dir, annot_filename)

      boxes = []
      labels = []
      tree = et.parse(annot_file_path)
      root = tree.getroot()

      # cv2 image gives size as height x width
      wt = img.shape
      ht = img.shape

      # box coordinates for xml files are extracted and corrected for image size given
      for member in root.findall('object'):
         labels.append(self.classes.index(member.find('name').text))

         # bounding box
         xmin = int(member.find('bndbox').find('xmin').text)
         xmax = int(member.find('bndbox').find('xmax').text)

         ymin = int(member.find('bndbox').find('ymin').text)
         ymax = int(member.find('bndbox').find('ymax').text)

         xmin_corr = (xmin / wt) * self.width
         xmax_corr = (xmax / wt) * self.width
         ymin_corr = (ymin / ht) * self.height
         ymax_corr = (ymax / ht) * self.height
         boxes.append()

      # convert boxes into a torch.Tensor
      boxes = torch.as_tensor(boxes, dtype=torch.float32)

      # getting the areas of the boxes
      area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

      # suppose all instances are not crowd
      iscrowd = torch.zeros((boxes.shape,), dtype=torch.int64)

      labels = torch.as_tensor(labels, dtype=torch.int64)

      target = {}
      target["boxes"] = boxes
      target["labels"] = labels
      target["area"] = area
      target["iscrowd"] = iscrowd
      # image_id
      image_id = torch.tensor()
      target["image_id"] = image_id

      if self.transforms:
         sample = self.transforms(image=img_res,
                                 bboxes=target['boxes'],
                                 labels=labels)

         img_res = sample['image']
         target['boxes'] = torch.Tensor(sample['bboxes'])

      return img_res, target

def __len__(self):
      return len(self.imgs)

# function to convert a torchtensor back to PIL image
def torch_to_pil(img):
return torchtrans.ToPILImage()(img).convert('RGB')

def plot_img_bbox(img, target):
# plot the image and bboxes
fig, a = plt.subplots(1, 1)
fig.set_size_inches(5, 5)
a.imshow(img)
for box in (target['boxes']):
      x, y, width, height = box, box, box - box, box - box
      rect = patches.Rectangle((x, y),
                              width, height,
                              linewidth=2,
                              edgecolor='r',
                              facecolor='none')

      # Draw the bounding box on top of the image
      a.add_patch(rect)
plt.show()

def get_transform(train):
if train:
      return A.Compose([
         A.HorizontalFlip(0.5),
         # ToTensorV2 converts image to pytorch tensor without div by 255
         ToTensorV2(p=1.0)
      ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
else:
      return A.Compose([
         ToTensorV2(p=1.0)
      ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

dataset = FruitImagesDataset(train_image_dir,train_xml_dir, 480, 480, transforms= get_transform(train=True))

print(len(dataset))
# getting the image and target for a test index.  Feel free to change the index.
img, target = dataset
print(img.shape, '\n', target)
plot_img_bbox(torch_to_pil(img), target)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
输出如下：

torch.Size()
{'boxes': tensor([,
      ,
      [  0.0000, 282.0000, 118.2000, 429.6000],
      [ 43.8000, 107.4000, 199.2000, 280.2000],
      ]), 'labels': tensor(), 'area': tensor(), 'iscrowd': tensor(), 'image_id': tensor()}
1
2
3
4
5
6

下载地址
链接：https://pan.baidu.com/s/1QZDgeYTHyAlD2xhtJqZ-Yw
提取码：srjn
————————————————
版权声明：本文为CSDN博主「刘润森！」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/weixin_44510615/article/details/118496273

页: [1]

数学建模社区-数学中国's Archiver

深度学习和目标检测系列教程 8-300：目标检测常见的标注工具LabelImg和将xml文件提...