-
Notifications
You must be signed in to change notification settings - Fork 8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix the bug where Python scripts fail to execute PDF text recognition… #11994
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -559,8 +559,9 @@ def check_img(img, alpha_color=(255, 255, 255)): | |
file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats | ||
storage type: binary image, net image file, local image file | ||
alpha_color: Background color in images in RGBA format | ||
return: numpy.array (h, w, 3) | ||
return: numpy.array (h, w, 3) or list (p, h, w, 3) (p: page of pdf), boolean, boolean | ||
""" | ||
flag_gif, flag_pdf = False, False | ||
if isinstance(img, bytes): | ||
img = img_decode(img) | ||
if isinstance(img, str): | ||
|
@@ -589,17 +590,17 @@ def check_img(img, alpha_color=(255, 255, 255)): | |
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR) | ||
except: | ||
logger.error("error in loading image:{}".format(image_file)) | ||
return None | ||
return None, flag_gif, flag_pdf | ||
if img is None: | ||
logger.error("error in loading image:{}".format(image_file)) | ||
return None | ||
return None, flag_gif, flag_pdf | ||
# single channel image array.shape:h,w | ||
if isinstance(img, np.ndarray) and len(img.shape) == 2: | ||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | ||
# four channel image array.shape:h,w,c | ||
if isinstance(img, np.ndarray) and len(img.shape) == 3 and img.shape[2] == 4: | ||
img = alpha_to_color(img, alpha_color) | ||
return img | ||
return img, flag_gif, flag_pdf | ||
|
||
|
||
class PaddleOCR(predict_system.TextSystem): | ||
|
@@ -700,9 +701,9 @@ def ocr( | |
"Since the angle classifier is not initialized, it will not be used during the forward process" | ||
) | ||
|
||
img = check_img(img, alpha_color) | ||
img, flag_gif, flag_pdf = check_img(img, alpha_color) | ||
# for infer pdf file | ||
if isinstance(img, list): | ||
if isinstance(img, list) and flag_pdf: | ||
if self.page_num > len(img) or self.page_num == 0: | ||
imgs = img | ||
else: | ||
|
@@ -837,7 +838,16 @@ def __call__( | |
img_idx=0, | ||
alpha_color=(255, 255, 255), | ||
): | ||
img = check_img(img, alpha_color) | ||
img, flag_gif, flag_pdf = check_img(img, alpha_color) | ||
if isinstance(img, list) and flag_pdf: | ||
res_list = [] | ||
for index, pdf_img in enumerate(img): | ||
logger.info("processing {}/{} page:".format(index + 1, len(img))) | ||
res, _ = super().__call__( | ||
pdf_img, return_ocr_result_in_table, img_idx=index | ||
) | ||
res_list.append(res) | ||
return res_list | ||
res, _ = super().__call__(img, return_ocr_result_in_table, img_idx=img_idx) | ||
return res | ||
Comment on lines
+850
to
852
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里返回类型发生了改变,会不会对用户使用造成困扰。建议参考ocr部分处理一下。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 关于返回类型发生了改变,会不会对用户使用造成困扰。参考ocr部分可知,它对返回类型的处理为,img如果是list,则不做改变,如果不是list,则把他放入一个list里返回,即都处理成一个list。然而,对于PPStructure类,它的定义和ocr不同,似乎是设计为返回单个页面的结果,main函数验证了我的猜想,目前命令行的方式里调用PPStructure是让它返回单个值的,如果按照OCR的部分处理的话,势必要改变main函数,我觉得还是暂时不动比较好。因为您那边可能对后续如何编写有其它设计,我尽量不改变已有的操作方式。 |
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这样改,对处理gif会不会有影响
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
返回 flag_gif 和 flag_pdf是不是很有必要,这里判断它是不是list,应该也是可以达到目标的。
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.