From cbf2ca4601860bef37e83c0573632ef654b63ef6 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 24 Mar 2022 00:42:29 +0100 Subject: [PATCH 1/2] build with proper pix2pixhd pkg --- Makefile | 22 +++++----------------- ocrd_anybaseocr/pix2pixhd | 2 +- requirements.txt | 1 + 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 8606420..c05bbd3 100755 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ help: @echo "" @echo " deps Install python deps via pip" @echo " install Install" - @echo " patch-pix2pixhd Patch pix2pixhd to trick it into thinking it was part of this mess" + @echo " ocrd_anybaseocr/pix2pixhd Checkout pix2pixhd submodule" @echo " repo/assets Clone OCR-D/assets to ./repo/assets" @echo " assets-clean Remove assets" @echo " assets Setup test assets" @@ -53,23 +53,11 @@ deps: $(PIP_INSTALL) -r requirements.txt # Install -install: patch-pix2pixhd +install: ocrd_anybaseocr/pix2pixhd $(PIP_INSTALL) . -.PHONY: patch-pix2pixhd - -# Patch pix2pixhd to trick it into thinking it was part of this mess -PIX2PIX_FILES = ocrd_anybaseocr/pix2pixhd/*/*.py ocrd_anybaseocr/pix2pixhd/*.py -patch-pix2pixhd: pix2pixhd - touch ocrd_anybaseocr/pix2pixhd/__init__.py - sed -i 's,^from util,from ..util,' $(PIX2PIX_FILES) - sed -i 's,^import util,import ..util,' $(PIX2PIX_FILES) - sed -i 's,^\(\s*\)from data,\1from .data,' ocrd_anybaseocr/pix2pixhd/*.py - sed -i 's,^\(\s*\)from data,\1from ..data,' ocrd_anybaseocr/pix2pixhd/*/*.py - # string exceptions, srsly y - sed -i "s,raise('\([^']*\)',raise(Exception('\1')," $(PIX2PIX_FILES) - -pix2pixhd: - git submodule update --init + +ocrd_anybaseocr/pix2pixhd: + git submodule update --init $@ # # Assets diff --git a/ocrd_anybaseocr/pix2pixhd b/ocrd_anybaseocr/pix2pixhd index e524de2..6b75232 160000 --- a/ocrd_anybaseocr/pix2pixhd +++ b/ocrd_anybaseocr/pix2pixhd @@ -1 +1 @@ -Subproject commit e524de235b251adddee6ca2bcbd31115a834077c +Subproject commit 6b752323d83537f2554c96de3036e9b5b904aa70 diff --git a/requirements.txt b/requirements.txt index 2dc3a92..2e2ce4a 100755 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ shapely tensorflow torch>=1.1.0 torchvision >= 0.6.1 +pix2pixhd # @ ./ocrd_anybaseocr/pix2pixhd From 8e0128a5273dd8c8c59af2b98e0e6543cbc12113 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 24 Mar 2022 23:41:24 +0100 Subject: [PATCH 2/2] dewarp: use segment ID for image file ID --- ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py index f693858..668ed28 100755 --- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py +++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py @@ -195,7 +195,7 @@ def _process_segment(self, dataset, segment, coords, orig_img_size, input_file): dewarped = np.mean(dewarped, axis=2) > ocrolib.midrange(dewarped) dewarped = Image.fromarray(dewarped) coords['features'] += ',dewarped' - file_id = make_file_id(input_file, self.output_file_grp) + '.IMG-DEW' + file_id = make_file_id(input_file, self.output_file_grp) + '_' + segment.id + '.IMG-DEW' file_path = self.workspace.save_image_file(dewarped, file_id, page_id=input_file.pageId,