forked from Wasim37/answer_card_detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
detection_choice_question.py
402 lines (319 loc) · 14.8 KB
/
detection_choice_question.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
'''
Author: wangxin
Date: 2021-05-25 10:31:01
LastEditTime: 2021-07-01 14:11:58
LastEditors: Please set LastEditors
Description: 选择题自动识别与批改
'''
# coding=utf-8
import cv2
import numpy as np
import pytesseract
from pytesseract import Output
from settings import ANSWER_CARD_MIN_WIDTH, ANSWER_CARD_SIZE
from utils import sort_contours, save_img_by_cnts
def detection_choice_question(images_path, ocr):
""" 选择题自动识别与批改
Args:
images_path (list): 图片地址列表
Returns:
[list]: 每张图片的识别结果
"""
sub_answer_cnt_szie = 0
question_answers = []
for img_path in images_path:
image = cv2.imread(img_path)
if not is_choice_question(image):
continue
# 获取图片中填充的全部答案轮廓
answer_option_cnts = get_answer_option_cnts(image)
if len(answer_option_cnts) > 0:
save_img_by_cnts('out/answer_cnt_' + str(sub_answer_cnt_szie) + '.png', image.shape[:2], answer_option_cnts)
# 所有被填充的选择项的中心的x坐标
answer_options_center_x = get_cnt_center_x(answer_option_cnts)
# 所有未被填充的选择项的中心的x坐标
choice_options_center_x = get_choice_option_center_x(img_path)
# 所有选择项的中心的x坐标
all_options_center_x = answer_options_center_x + choice_options_center_x
# 获取所有选择项的轮廓及其题序轮廓
all_choice_option_cnts, question_number_cnts = get_choice_option_cnts(image, all_options_center_x)
if len(all_choice_option_cnts) > 0:
save_img_by_cnts('out/choice_cnt_' + str(sub_answer_cnt_szie) + '.png', image.shape[:2], all_choice_option_cnts)
save_img_by_cnts('out/ques_num_' + str(sub_answer_cnt_szie) + '.png', image.shape[:2], question_number_cnts)
sub_answer_cnt_szie = sub_answer_cnt_szie + 1
# 选择题自动批改
if len(all_choice_option_cnts) > 0:
question_answer_dict = get_choice_question_answer_index(image, all_choice_option_cnts, answer_option_cnts, question_number_cnts, ocr)
question_answers.append(question_answer_dict)
return question_answers
def get_answer_option_cnts(img):
""" 识别图片中的填充的全部答案轮廓
Args:
img_path (String): 图片
Returns:
[list]: 候选项轮廓
"""
# 转灰度
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# OTSU二值化(黑底白字)
thresh_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# 腐蚀
kernel = np.ones((5, 5), np.uint8)
erode_img = cv2.erode(thresh_img, kernel, iterations=1)
# 膨胀
kernel = np.ones((9, 9), np.uint8)
dilate_img = cv2.dilate(erode_img, kernel, iterations=1)
# 提取答案的轮廓
answer_cnts, _ = cv2.findContours(dilate_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# 减少答案轮廓的边数
answer_option_cnts = []
for cnt in answer_cnts:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.06 * peri, True)
answer_option_cnts.append(approx)
# self.assertTrue(choiceAnswerCnts % 4 == 0, "候选框提取异常, 提取的数量不是4的整数")
return answer_option_cnts
def get_choice_option_cnts(img, all_options_center_x):
"""识别图片中的所有的选择项轮廓与题序轮廓
Args:
img ([type]): [description]
all_option_center_x ([type]): [description]
Returns:
[type]: [description]
"""
# 灰度
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 二值化(黑底白字)
thresh_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# 对高亮部分膨胀
# 因为候选区域由三部分组成(左括号、右括号、大写的英文字母),通过膨胀将三个区域连成一片
kernel = np.ones((11, 11), np.uint8)
dilate_img = cv2.dilate(thresh_img, kernel, iterations=1)
# 提取膨胀后的轮廓
option_cnts, _ = cv2.findContours(dilate_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# 所有候选框的轮廓
choice_option_cnts = []
# 每道选择题的题序
question_number_cnts = []
for c in option_cnts:
peri = cv2.arcLength(c, True)
area = cv2.contourArea(c)
approx = cv2.approxPolyDP(c, 0.1 * peri, True)
(x, y, w, h) = cv2.boundingRect(approx)
ar = h / float(w)
# 筛选轮廓为四边形的目前轮廓
# if y >= 60 and w >= 20 and w <= 60 and ar >= 1 and ar <= 2 and area > 700:
if y >= 60 and ar > 0.5 and ar < 2:
if is_choice_option(x, w, all_options_center_x) and area > 400:
choice_option_cnts.append(c)
elif not is_choice_option(x, w, all_options_center_x) and area > 100:
question_number_cnts.append(c)
return choice_option_cnts, question_number_cnts
def is_choice_option(x, w, all_option_center_x):
for center_x in all_option_center_x:
if center_x > x and center_x < x + w:
return True
return False
def get_cnt_center_x(cnts):
"""返回轮廓中心的x轴坐标
Args:
cnts (list): 轮廓列表
Returns:
[list]: 中心x轴坐标
"""
center_x = []
for cnt in cnts:
(x, y, w, h) = cv2.boundingRect(cnt)
center_x.append((2 * x + w) / 2)
return center_x
def get_choice_option_center_x(img):
""" 识别所有未被填充的选择项的中心的x坐标
Args:
img ([type]): [description]
Returns:
[type]: [description]
"""
img = cv2.imread(img)
ocr_reslut = pytesseract.image_to_data(img, output_type=Output.DICT, lang='chi_sim')
choice_option_center_x = []
for i in range(len(ocr_reslut['text'])):
text_i = ocr_reslut['text'][i]
(x, y, w, _) = (ocr_reslut['left'][i], ocr_reslut['top'][i], ocr_reslut['width'][i], ocr_reslut['height'][i])
if y > 60 and ('A' in text_i or 'B' in text_i or 'C' in text_i or 'D' in text_i):
choice_option_center_x.append((2 * x + w) / 2)
return choice_option_center_x
def get_answer_card_cnts(img):
""" 获得答题卡的左右答题区域
# findContours 函数详解:https://blog.csdn.net/laobai1015/article/details/76400725
# approxPolyDP 多边形近似 https://blog.csdn.net/kakiebu/article/details/79824856
Args:
img ([type]): 图片
Returns:
[type]: 答题卡的左右答题区域轮廓
"""
# 检测图片中的最外围轮廓
cnts, _ = cv2.findContours(img.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
# print("原始图片检测的轮廓总数:", len(cnts))
if len(cnts) == 0:
return None
# 提取的轮廓总数
contour_size = 0
# 检测到的左右答题区域轮廓
answer_cnts = []
# 将轮廓按大小, 降序排序
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
# arcLength 计算周长
peri = cv2.arcLength(c, True)
# print("轮廓周长:", peri)
# 之前寻找到的轮廓可能是多边形,现在通过寻找近似轮廓,得到期望的四边形
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# print('原始轮廓的边数:', len(c), ', 近似轮廓的边数:', len(approx))
# 当近似轮廓为4时,代表是需要提取的矩形区域
if len(approx) == 4:
contour_size = contour_size + 1
answer_cnts.append(approx)
# 只提取答题卡中的最大两个轮廓
if contour_size == ANSWER_CARD_SIZE:
break
answer_cnts = sort_contours(answer_cnts, method="left-to-right")[0]
return answer_cnts
def get_sub_answer_card_cnts(img_path):
""" 获得答题卡的子区域
# findContours 函数详解:https://blog.csdn.net/laobai1015/article/details/76400725
# approxPolyDP 多边形近似 https://blog.csdn.net/kakiebu/article/details/79824856
Args:
img ([type]): 图片
Returns:
[type]: 答题卡的左右答题区域轮廓
"""
image = cv2.imread(img_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# warped_answer_image_1 = four_point_transform(gray, answer_contour_1.reshape(4, 2))
# 二值化
thresh = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# 在二值图像中查找轮廓,包括内围、外围轮廓,但是检测到的轮廓不建立等级关系,彼此之间独立
thresh_cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
cnt_size = 0
sub_answer_cnts = []
if len(thresh_cnts) > 0:
# 将轮廓按大小, 降序排序
thresh_cnts = sorted(thresh_cnts, key=cv2.contourArea, reverse=True)
for c in thresh_cnts:
cnt_size = cnt_size + 1
# arcLength 计算周长
peri = cv2.arcLength(c, True)
# 计算轮廓的边界框
(x, y, w, h) = cv2.boundingRect(c)
# 之前寻找到的轮廓可能是多边形,现在通过寻找近似轮廓,得到期望的四边形
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 只提取近似轮廓为四边形的区域, 且轮廓长度大于指定长度
# if len(approx) == 4 and w > ANSWER_CARD_MIN_WIDTH:
# print("轮廓周长:", peri, '宽:', w)
# print('原始轮廓的边数:', len(c), ', 近似轮廓的边数:', len(approx))
if w > ANSWER_CARD_MIN_WIDTH:
sub_answer_cnts.append(approx)
# 只处理前20个最大轮廓
if cnt_size >= 20:
break
# 从上到下,将轮廓排序
sub_answer_cnts = sort_contours(sub_answer_cnts, method="top-to-bottom")[0]
return sub_answer_cnts
def get_question_num_dict(image, question_number_cnts, ocr):
"""获取图片中所有的选择题的题序
Args:
image ([type]): 图片
question_number_cnts ([type]): 图片中的所有的选择题的题序轮廓
ocr ([type]): ocr识别工具
Returns:
[dict]: key: 题序, value: 题序轮廓的坐标
"""
question_num_dict = {}
for question_number_cnt in question_number_cnts:
peri = cv2.arcLength(question_number_cnt, True)
approx = cv2.approxPolyDP(question_number_cnt, 0.1 * peri, True)
(x, y, w, h) = cv2.boundingRect(approx)
# ocr识别题型轮廓区域的文本
text = ocr.ocr_for_single_line(image[y:y + h, x:x + w])
question_num = ''.join(text)
question_num = question_num.replace('.', '')
# 文本是否为数字
if question_num.isdigit():
(x, y, w, h) = cv2.boundingRect(question_number_cnt)
question_num_dict[int(question_num)] = (x, y, w, h)
# 按照题序从小到大排序
question_num_list = sorted(question_num_dict.items(), key=lambda item: item[0])
return dict(question_num_list)
def get_choice_question_answer_index(image, choice_option_cnts, answer_option_cnts, question_number_cnts, ocr):
"""自动批改, 返回每道试题对应的答案索引. \
注意:(1)用户可能没有填充答案 (2)选择题的答案数量可能大于1
Args:
choice_option_cnts (list): 试题的选择项轮廓
answer_option_cnts (list): 用户填充的答案轮廓
question_number_cnts (list): 试题的题序轮廓
Returns:
[dict]: key 题序, value 答案索引列表
"""
# 获取所有选择题的题序
question_num_dict = get_question_num_dict(image, question_number_cnts, ocr)
question_answer_dict = {}
for key in question_num_dict.keys():
(num_x, num_y, num_w, num_h) = question_num_dict[key]
num_center_x = (2 * num_x + num_w) / 2
num_center_y = (2 * num_y + num_h) / 2
# 获取同一行中,本题序右侧第一个题序的中心x坐标
min_num_center_x = float("inf") # 无穷大
for question_number_cnt in question_number_cnts:
(x, y, w, h) = cv2.boundingRect(question_number_cnt)
right_num_center_x = (2 * x + w) / 2
if num_center_y > y and num_center_y < y + h and right_num_center_x > num_center_x and right_num_center_x < min_num_center_x:
min_num_center_x = right_num_center_x
# print(min_num_center_x)
# 获取本题的全部答案轮廓的中心x坐标列表
# 一道选择题题可能有多个答案, 所以answers_center_x为列表
answers_center_x = []
for answer_option_cnt in answer_option_cnts:
(x, y, w, h) = cv2.boundingRect(answer_option_cnt)
answer_cnt_center_x = (2 * x + w) / 2
if num_center_y > y and num_center_y < y + h and answer_cnt_center_x > num_center_x and answer_cnt_center_x < min_num_center_x:
answers_center_x.append(answer_cnt_center_x)
# print('answers_center_x', answers_center_x)
# 获取本题的全部选择项轮廓
question_choice_option_cnts = []
for choice_option_cnt in choice_option_cnts:
# print(len(question_choice_option_cnts))
(x, y, w, h) = cv2.boundingRect(choice_option_cnt)
choice_option_center_x = (2 * x + w) / 2
if num_center_y > y and num_center_y < y + h and choice_option_center_x > num_center_x and choice_option_center_x < min_num_center_x:
question_choice_option_cnts.append(choice_option_cnt)
question_choice_option_cnts, _ = sort_contours(question_choice_option_cnts, 'left-to-right')
# print('question_choice_option_cnts', len(question_choice_option_cnts))
# 答案列表
answer_indexes = []
# 答案索引
answer_index = 0
for choice_option_cnt in question_choice_option_cnts:
answer_index = answer_index + 1
(x, y, w, h) = cv2.boundingRect(choice_option_cnt)
# print((x, y, w, h), answers_center_x)
for answer_center_x in answers_center_x:
if answer_center_x > x and answer_center_x < x + w:
answer_indexes.append(answer_index)
break
question_answer_dict[key] = answer_indexes
# 返回每道试题对应的答案索引
question_answer_dict = sorted(question_answer_dict.items(), key=lambda item: item[0])
return dict(question_answer_dict)
def is_choice_question(img):
"""判断当前图片是否属于选择题
Args:
image_path ([type]): 图片
Returns:
[boolean]: false 不是 true 是
"""
ocr_result = pytesseract.image_to_data(img, output_type=Output.DICT, lang='chi_sim')
ocr_text = ocr_result['text']
return '[A]' in ocr_text or '[B]' in ocr_text or '[C]' in ocr_text or '[D]' in ocr_text