你需要注意的是:

此文章仅为ChatGpt的回答,是本人用于理解paddle相应模块而CV过来的,并不一定具有实际效益

一种可行的Predict.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import argparse
import os
import cv2
import numpy as np
import paddle
from paddle.vision.transforms import Compose, Normalize
from paddledet.utils.logger import setup_logger
from paddledet.models.detectors import YOLOv3
from paddledet.datasets.builder import build_dataset
from paddledet.config import Config
from paddledet.core.workspace import load_config, create
from paddledet.utils.visualizer import draw_boxes

def parse_args():
parser = argparse.ArgumentParser(description='PaddleDetection model inference')
parser.add_argument('--model_dir', type=str, default='./output/yolov3_darknet53_270e_coco', help='Model directory path')
parser.add_argument('--image_path', type=str, default='./test.jpg', help='Path of the input image')
parser.add_argument('--score_thresh', type=float, default=0.5, help='Threshold of the score')
parser.add_argument('--output_dir', type=str, default='./output', help='Output directory path')
parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU or not')
args = parser.parse_args()
return args

def main():
args = parse_args()

# 加载配置文件
cfg_file = os.path.join(args.model_dir, 'pp-yolo.yaml')
cfg = load_config(cfg_file)

# 创建数据集
dataset = build_dataset(cfg.data.test)

# 创建模型
model = YOLOv3(
num_classes=len(dataset.class_names),
backbone=cfg.model.backbone,
neck=cfg.model.neck,
head=cfg.model.head,
train_cfg=cfg.train_cfg,
test_cfg=cfg.test_cfg)

# 加载模型权重
model_state_dict = paddle.load(os.path.join(args.model_dir, 'model.pdparams'))
model.set_state_dict(model_state_dict)

if args.use_gpu:
paddle.set_device('gpu')
model.cuda()
else:
paddle.set_device('cpu')

# 预处理图像
transforms = Compose([
Normalize(
mean=cfg.img_mean, std=cfg.img_std, to_rgb=True),
])
img = cv2.imread(args.image_path)
inputs = transforms(img)
inputs = np.expand_dims(inputs, axis=0)

# 进行预测
model.eval()
with paddle.no_grad():
outputs = model.forward(inputs)

# 处理预测结果
bboxes = paddle.fluid.layers.multiclass_nms(
outputs[0],
outputs[1],
score_threshold=args.score_thresh,
nms_top_k=400,
keep_top_k=100,
nms_threshold=0.45,
background_label=-1)

# 可视化预测结果
out_img = draw_boxes(img, bboxes, dataset.class_names)
cv2.imwrite(os.path.join(args.output_dir, 'out.jpg'), out_img)

if __name__ == '__main__':
main()

下方为解析


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import argparse
import os
import cv2
import numpy as np
import paddle
from paddle.vision.transforms import Compose, Normalize
from paddledet.utils.logger import setup_logger
from paddledet.models.detectors import YOLOv3
from paddledet.datasets.builder import build_dataset
from paddledet.config import Config
from paddledet.core.workspace import load_config, create
from paddledet.utils.visualizer import draw_boxes

# !!!!
# 从 PaddleDetection 2.2 开始,paddledet.utils.logger 模块已经被移除
  • argparse:Python标准库,用于解析命令行参数
  • os:Python标准库,提供了访问操作系统功能的接口
  • cv2:OpenCV库,提供了图像处理和计算机视觉的函数
  • numpy:Python第三方库,提供了支持多维数组和矩阵运算的函数
  • paddle:PaddlePaddle深度学习框架
  • ComposeNormalize:PaddlePaddle的图像预处理函数
  • setup_logger:PaddleDetection的日志函数
  • YOLOv3:PaddleDetection中的YOLOv3目标检测器
  • build_dataset:PaddleDetection中的数据集构建函数
  • Config:PaddleDetection的配置类
  • load_config:PaddleDetection的配置加载函数
  • create:PaddleDetection的模型创建函数
  • draw_boxes:PaddleDetection的可视化函数

1
2
3
4
5
6
7
8
9
def parse_args():
parser = argparse.ArgumentParser(description='PaddleDetection model inference')
parser.add_argument('--model_dir', type=str, default='./output/yolov3_darknet53_270e_coco', help='Model directory path')
parser.add_argument('--image_path', type=str, default='./test.jpg', help='Path of the input image')
parser.add_argument('--score_thresh', type=float, default=0.5, help='Threshold of the score')
parser.add_argument('--output_dir', type=str, default='./output', help='Output directory path')
parser.add_argument('--use_gpu', type=bool, default=True, help='Use GPU or not')
args = parser.parse_args()
return args
  • parse_args():解析命令行参数的函数
  • parser:命令行参数解析器
  • parser.add_argument():添加命令行参数
  • --model_dir:模型目录路径参数
  • --image_path:输入图像路径参数
  • --score_thresh:分数阈值参数
  • --output_dir:输出目录路径参数
  • --use_gpu:是否使用GPU参数
  • args = parser.parse_args():解析命令行参数并返回参数对象

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def main():
args = parse_args()

# 加载配置文件
cfg_file = os.path.join(args.model_dir, 'pp-yolo.yaml')
cfg = load_config(cfg_file)

# 创建数据集
dataset = build_dataset(cfg.data.test)

# 创建模型
model = YOLOv3(
num_classes=len(dataset.class_names),
backbone=cfg.model.backbone,
neck=cfg.model.neck,
head=cfg.model.head,
train_cfg=cfg.train_cfg,
test_cfg
  • num_classes=len(dataset.class_names):目标检测任务的类别数,等于数据集类别数
  • backbone=cfg.model.backbone:模型骨干网络
  • neck=cfg.model.neck:模型neck部分
  • head=cfg.model.head:模型头部
  • train_cfg=cfg.train_cfg:训练配置
  • test_cfg=cfg.test_cfg:测试配置
  • model.prepare():准备模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 加载模型参数
model_path = os.path.join(args.model_dir, 'yolov3_darknet53_270e_coco.pdparams')
assert os.path.exists(model_path), "model file {} does not exist".format(model_path)
model.load(model_path)

# 设置计算设备
device = 'gpu' if args.use_gpu else 'cpu'
paddle.set_device(device)

# 图像预处理
transform = Compose([Normalize(mean=cfg.img_norm_cfg.mean, std=cfg.img_norm_cfg.std, to_rgb=True)])

# 读取图像
image_path = args.image_path
assert os.path.exists(image_path), "Image file {} does not exist".format(image_path)
image = cv2.imread(image_path)

# 图像预处理
data = transform(image)
data = np.expand_dims(data, axis=0)

# 将数据转为Tensor
inputs = paddle.to_tensor(data)

# 进行模型预测
outputs = model(inputs)

# 解析预测结果
bboxes = paddle.split(outputs['bbox'], 2, axis=-1)
scores = outputs['score']
labels = outputs['cid']

# 可视化预测结果
im = draw_boxes(image, bboxes, scores, labels, dataset.class_names, score_thresh=args.score_thresh)
cv2.imwrite(os.path.join(args.output_dir, 'output.jpg'), im)

print("Predict success!")
  • model_path = os.path.join(args.model_dir, 'yolov3_darknet53_270e_coco.pdparams'):模型参数文件路径

  • assert os.path.exists(model_path), "model file {} does not exist".format(model_path):判断模型文件是否存在

  • model.load(model_path):加载模型参数

  • device = 'gpu' if args.use_gpu else 'cpu':设置计算设备

  • paddle.set_device(device):设置PaddlePaddle计算设备

  • transform = Compose([Normalize(mean=cfg.img_norm_cfg.mean, std=cfg.img_norm_cfg.std, to_rgb=True)])图像预处理,包括归一化

  • image = cv2.imread(image_path):读取输入图像

  • data = transform(image):对图像进行预处理

  • data = np.expand_dims(data, axis=0)将数据维度扩展为四维,与模型输入要求一致

  • inputs = paddle.to_tensor(data):将数据转为Tensor

  • outputs = model(inputs)进行模型预测,得到输出结果

  • bboxes = paddle.split(outputs['bbox'], 2, axis=-1):解析预测结果中的边界框

  • scores = outputs['score']解析预测结果中的目标得分

  • labels = outputs['cid']:解析预测结果中的类别标签

  • im = draw_boxes(image, bboxes, scores, labels, dataset.class_names, score_thresh=args.score_thresh):根据解析得到的预测结果,绘制目标框并可视化预测结果

  • cv2.imwrite(os.path.join(args.output_dir, 'output.jpg'), im):将可视化结果保存到输出目录

  • print("Predict success!"):输出预测成功的提示信息


另一种可行的Predict.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import codecs
import os
import time
import sys
sys.path.append('PaddleDetection')
import json
import yaml
from functools import reduce
import multiprocessing

from PIL import Image
import cv2
import numpy as np
import paddle
# import paddleseg.transforms as T
from paddle.inference import Config
from paddle.inference import create_predictor
from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from deploy.python.preprocess import preprocess,Resize, NormalizeImage, Permute, PadStride
from deploy.python.utils import argsparser, Timer, get_current_memory_mb

#id_class_map
LABEL_MAP = {
"0": "bump",
"1": "granary",
"2": "CrossWalk",
"3": "cone",
"4": "bridge",
"5": "pig",
"6": "tractor",
"7": "corn",
}

class PredictConfig():
def __init__(self, model_dir):
# parsing Yaml config for Preprocess
deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
self.arch = yml_conf['arch']
self.preprocess_infos = yml_conf['Preprocess']
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.labels = yml_conf['label_list']
# self.print_config()

# def print_config(self):
# print('%s: %s' % ('Model Arch', self.arch))
# for op_info in self.preprocess_infos:
# print('--%s: %s' % ('transform op', op_info['type']))


def get_test_images(infer_file):
with open(infer_file, 'r') as f:
dirs = f.readlines()
images = []
for dir in dirs:
images.append(eval(repr(dir.replace('\n',''))).replace('\\', '/'))
assert len(images) > 0, "no image found in {}".format(infer_file)
return images

def load_predictor(model_dir):
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
# initial GPU memory(M), device ID
config.enable_use_gpu(3000, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)
return predictor, config



def create_inputs(imgs, im_info):
inputs = {}

im_shape = []
scale_factor = []
if len(imgs) == 1:
inputs['image'] = np.array((imgs[0], )).astype('float32')
inputs['im_shape'] = np.array(
(im_info[0]['im_shape'], )).astype('float32')
inputs['scale_factor'] = np.array(
(im_info[0]['scale_factor'], )).astype('float32')
return inputs

for e in im_info:
im_shape.append(np.array((e['im_shape'], )).astype('float32'))
scale_factor.append(np.array((e['scale_factor'], )).astype('float32'))

inputs['im_shape'] = np.concatenate(im_shape, axis=0)
inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)

imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
max_shape_h = max([e[0] for e in imgs_shape])
max_shape_w = max([e[1] for e in imgs_shape])
padding_imgs = []
for img in imgs:
im_c, im_h, im_w = img.shape[:]
padding_im = np.zeros(
(im_c, max_shape_h, max_shape_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = img
padding_imgs.append(padding_im)
inputs['image'] = np.stack(padding_imgs, axis=0)
return inputs


class Detector(object):

def __init__(self,
pred_config,
model_dir):
self.pred_config = pred_config
self.predictor, self.config = load_predictor(model_dir)
self.preprocess_ops = self.get_ops()

def get_ops(self):
preprocess_ops = []
for op_info in self.pred_config.preprocess_infos:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
preprocess_ops.append(eval(op_type)(**new_op_info))
return preprocess_ops

def predict(self, inputs):
# preprocess
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])

# model prediction
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
boxes_num = self.predictor.get_output_handle(output_names[1])
np_boxes_num = boxes_num.copy_to_cpu()

# postprocess
results = []
if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
results = {'boxes': np.zeros([]), 'boxes_num': [0]}
else:
results = {'boxes': np_boxes, 'boxes_num': np_boxes_num}
return results

# 将原preprocess的两个参数转为一个参数para
def my_preprocess(para):
im_path, preprocess_ops = para
im, im_info = preprocess(im_path, preprocess_ops)
return im, im_info

def predict_image(detector, image_list, result_path):
c_results = {"result": []}
# 不同目标设定不同输出阈值
multiclass_thres = [0.49, 0.49, 0.49, 0.49, 0.49, 0.49, 0.49, 0.49]
num_worker = 4
# processes这个参数可以不设置,如果不设置函数会跟根据计算机的实际情况来决定要运行多少个进程
pool = ThreadPool(processes=num_worker)# 多线程处理输入图像,预处理速度快一些
img_length = len(image_list)
# 根据评估数据自行调整每次多线程处理的样本数量, len(image_list) >= img_iter_filter
img_iter_filter = 10
img_iter_range = list(range(img_length//img_iter_filter))
for start_index in img_iter_range:
if start_index == img_iter_range[-1]:
im_paths = image_list[start_index*img_iter_filter:]
else:
im_paths = image_list[start_index*img_iter_filter:(start_index+1)*img_iter_filter]
image_ids = [int(os.path.basename(im_p).split('.')[0]) for im_p in im_paths]
para = [[i,detector.preprocess_ops] for i in im_paths]
imandinfos = pool.map(my_preprocess, para)
# print('imandinfos',imandinfos)
for idx, imandinfo in enumerate(imandinfos):
# 检测模型图像预处理
image_id = image_ids[idx]
inputs = create_inputs([imandinfo[0]], [imandinfo[1]])

# 检测模型预测结果
det_results = detector.predict(inputs)
# 检测模型写结果
im_bboxes_num = det_results['boxes_num'][0]
if im_bboxes_num > 0:
bbox_results = det_results['boxes'][0:im_bboxes_num, 2:]
id_results = det_results['boxes'][0:im_bboxes_num, 0]
score_results = det_results['boxes'][0:im_bboxes_num, 1]
for idx in range(im_bboxes_num):
if float(score_results[idx]) >= multiclass_thres[int(id_results[idx])]:
c_results["result"].append({"image_id": image_id,
"type": LABEL_MAP[str(int(id_results[idx]))],
"x": float(bbox_results[idx][0]),
"y": float(bbox_results[idx][1]),
"width": float(bbox_results[idx][2]) - float(bbox_results[idx][0]),
"height": float(bbox_results[idx][3]) - float(bbox_results[idx][1]),
"segmentation": []})

# 写文件
with open(result_path, 'w') as ft:
json.dump(c_results, ft)

def main(infer_txt, result_path, det_model_path):
pred_config = PredictConfig(det_model_path)
detector = Detector(pred_config, det_model_path)

# predict from image
img_list = get_test_images(infer_txt)
predict_image(detector, img_list, result_path)


if __name__ == '__main__':
print('start…')
start_time = time.time()
det_model_path = "model/"

paddle.enable_static()
infer_txt = sys.argv[1]
result_path = sys.argv[2]
main(infer_txt, result_path, det_model_path)
print('total time:', time.time() - start_time)

加载所需的Python库:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import codecs
import os
import time
import sys
sys.path.append('PaddleDetection')
import json
import yaml
from functools import reduce
import multiprocessing
from PIL import Image
import cv2
import numpy as np
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
from deploy.python.preprocess import preprocess,Resize, NormalizeImage, Permute, PadStride
from deploy.python.utils import argsparser, Timer, get_current_memory_mb

定义一个包含模型预测的配置类:

1
2
3
4
5
6
7
8
9
10
class PredictConfig():
def __init__(self, model_dir):
# parsing Yaml config for Preprocess
deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
self.arch = yml_conf['arch']
self.preprocess_infos = yml_conf['Preprocess']
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.labels = yml_conf['label_list']

这个类的主要功能是解析用于预处理的配置信息,包括模型架构、预处理信息、最小子图大小和标签列表。其中,infer_cfg.yml文件包含了所有的预处理信息,例如图像大小、均值和方差等信息。yaml.safe_load()用于解析infer_cfg.yml文件。self.arch保存了模型的架构,self.preprocess_infos包含了图像预处理信息,self.min_subgraph_size表示模型的最小子图大小,self.labels包含了标签列表。

定义了一个函数get_test_images(),用于获取测试集中的所有图片:

1
2
3
4
5
6
7
8
def get_test_images(infer_file):
with open(infer_file, 'r') as f:
dirs = f.readlines()
images = []
for dir in dirs:
images.append(eval(repr(dir.replace('\n',''))).replace('\\', '/'))
assert len(images) > 0, "no image found in {}".format(infer_file)
return images

该函数从一个文件中读取所有的测试集图片路径,保存在一个列表中并返回。

定义了一个函数load_predictor(),用于加载预测模型和配置信息:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def load_predictor(model_dir):
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
# initial GPU memory(M), device ID
config.enable_use_gpu(3000, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)
return predictor, config

》》》未完待续