PaddlePaddle · GreatV · Apr 25, 2024 · Apr 24, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/paddleocr.py b/paddleocr.py
@@ -559,8 +559,9 @@ def check_img(img, alpha_color=(255, 255, 255)):
             file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats
             storage type: binary image, net image file, local image file
         alpha_color: Background color in images in RGBA format
-        return: numpy.array (h, w, 3)
+        return: numpy.array (h, w, 3) or list (p, h, w, 3) (p: page of pdf), boolean, boolean
     """
+    flag_gif, flag_pdf = False, False
     if isinstance(img, bytes):
         img = img_decode(img)
     if isinstance(img, str):
@@ -589,17 +590,17 @@ def check_img(img, alpha_color=(255, 255, 255)):
                     img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
                 except:
                     logger.error("error in loading image:{}".format(image_file))
-                    return None
+                    return None, flag_gif, flag_pdf
         if img is None:
             logger.error("error in loading image:{}".format(image_file))
-            return None
+            return None, flag_gif, flag_pdf
     # single channel image array.shape:h,w
     if isinstance(img, np.ndarray) and len(img.shape) == 2:
         img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
     # four channel image array.shape:h,w,c
     if isinstance(img, np.ndarray) and len(img.shape) == 3 and img.shape[2] == 4:
         img = alpha_to_color(img, alpha_color)
-    return img
+    return img, flag_gif, flag_pdf
 
 
 class PaddleOCR(predict_system.TextSystem):
@@ -700,9 +701,9 @@ def ocr(
                 "Since the angle classifier is not initialized, it will not be used during the forward process"
             )
 
-        img = check_img(img, alpha_color)
+        img, flag_gif, flag_pdf = check_img(img, alpha_color)
         # for infer pdf file
-        if isinstance(img, list):
+        if isinstance(img, list) and flag_pdf:
             if self.page_num > len(img) or self.page_num == 0:
                 imgs = img
             else:
@@ -837,7 +838,16 @@ def __call__(
         img_idx=0,
         alpha_color=(255, 255, 255),
     ):
-        img = check_img(img, alpha_color)
+        img, flag_gif, flag_pdf = check_img(img, alpha_color)
+        if isinstance(img, list) and flag_pdf:
+            res_list = []
+            for index, pdf_img in enumerate(img):
+                logger.info("processing {}/{} page:".format(index + 1, len(img)))
+                res, _ = super().__call__(
+                    pdf_img, return_ocr_result_in_table, img_idx=index
+                )
+                res_list.append(res)
+            return res_list
         res, _ = super().__call__(img, return_ocr_result_in_table, img_idx=img_idx)
         return res
 

diff --git a/ppstructure/docs/quickstart.md b/ppstructure/docs/quickstart.md
@@ -209,6 +209,62 @@ for line in result:
     print(line)
 ```
 
+```python
+import os
+import cv2
+from paddleocr import PPStructure,save_structure_res
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+result = ocr_engine(img_path)
+for index, res in enumerate(result):
+    save_structure_res(res, save_folder, os.path.basename(img_path).split('.')[0], index)
+
+for res in result:
+    for line in res:
+        line.pop('img')
+        print(line)
+```
+
+```python
+import os
+import cv2
+import numpy as np
+from paddleocr import PPStructure,save_structure_res
+from paddle.utils import try_import
+from PIL import Image
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+
+fitz = try_import("fitz")
+imgs = []
+with fitz.open(img_path) as pdf:
+    for pg in range(0, pdf.page_count):
+        page = pdf[pg]
+        mat = fitz.Matrix(2, 2)
+        pm = page.get_pixmap(matrix=mat, alpha=False)
+
+        # if width or height > 2000 pixels, don't enlarge the image
+        if pm.width > 2000 or pm.height > 2000:
+            pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        imgs.append(img)
+
+for index, img in enumerate(imgs):
+    result = ocr_engine(img)
+    save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0], index)
+    for line in result:
+        line.pop('img')
+        print(line)
+```
+
 <a name="224"></a>
 
 #### 2.2.4 表格识别

diff --git a/ppstructure/docs/quickstart_en.md b/ppstructure/docs/quickstart_en.md
@@ -192,6 +192,62 @@ for line in result:
     print(line)
 ```
 
+```python
+import os
+import cv2
+from paddleocr import PPStructure,save_structure_res
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+result = ocr_engine(img_path)
+for index, res in enumerate(result):
+    save_structure_res(res, save_folder, os.path.basename(img_path).split('.')[0], index)
+
+for res in result:
+    for line in res:
+        line.pop('img')
+        print(line)
+```
+
+```python
+import os
+import cv2
+import numpy as np
+from paddleocr import PPStructure,save_structure_res
+from paddle.utils import try_import
+from PIL import Image
+
+ocr_engine = PPStructure(table=False, ocr=True, show_log=True)
+
+save_folder = './output'
+img_path = 'ppstructure/recovery/UnrealText.pdf'
+
+fitz = try_import("fitz")
+imgs = []
+with fitz.open(img_path) as pdf:
+    for pg in range(0, pdf.page_count):
+        page = pdf[pg]
+        mat = fitz.Matrix(2, 2)
+        pm = page.get_pixmap(matrix=mat, alpha=False)
+
+        # if width or height > 2000 pixels, don't enlarge the image
+        if pm.width > 2000 or pm.height > 2000:
+            pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        imgs.append(img)
+
+for index, img in enumerate(imgs):
+    result = ocr_engine(img)
+    save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0], index)
+    for line in result:
+        line.pop('img')
+        print(line)
+```
+
 <a name="224"></a>
 #### 2.2.4 table recognition