diff --git a/Dockerfile b/Dockerfile index 91a6904..f361844 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,12 +5,12 @@ FROM python:3.12-slim LABEL org.opencontainers.image.source="https://github.com/AI-SIP/MVP_CV" # 필요한 시스템 패키지 먼저 설치 -RUN apt-get update && apt-get install -y git \ +RUN apt-get update && apt-get install -y \ libgl1-mesa-glx \ libglib2.0-0 \ gcc \ python3-dev && \ - rm -rf /var/lib/apt/lists/* \ + rm -rf /var/lib/apt/lists/* # 작업 디렉토리 설정 WORKDIR /test diff --git a/app/AIProcessor.py b/app/AIProcessor.py index f578f5b..3c91781 100644 --- a/app/AIProcessor.py +++ b/app/AIProcessor.py @@ -4,7 +4,7 @@ import cv2 from ultralytics import YOLO from ultralytics import SAM -from segment_anything import sam_model_registry, SamPredictor +# from segment_anything import sam_model_registry, SamPredictor import io import logging @@ -50,9 +50,10 @@ def remove_alpha(self, image): return image_rgb def load_sam_model(self, model_type="vit_h"): # sam 로드 - self.sam_model = sam_model_registry[model_type](checkpoint=self.sam_path) - self.sam_model.to(self.device) - self.predictor = SamPredictor(self.sam_model) + # self.sam_model = sam_model_registry[model_type](checkpoint=self.sam_path) + # self.sam_model.to(self.device) + # self.predictor = SamPredictor(self.sam_model) + logging.info("SAM model cannot be loaded") def object_detection(self, img): results = self.yolo_model.predict(source=img, imgsz=640, device=self.device, @@ -76,9 +77,7 @@ def segment_from_yolo(self, image, bbox, save_path=None): # kernel = np.ones((15, 15), np.uint8) # mask_np = cv2.morphologyEx(mask_np, cv2.MORPH_GRADIENT, kernel)''' masks_np = cv2.bitwise_or(masks_np, mask_np) - # cv2.imwrite(f'mask_box{i}.jpg', masks_np) - # cv2.imwrite(save_path, masks_np) logging.info(f'1차 마스킹 - 바운딩 박스 {len(mask_boxes)}개 세그먼트 완료.') return masks_np @@ -96,49 +95,50 @@ def segment_from_user(self, image, user_inputs, bbox, save_path=None): mask_np = mask_np.squeeze() masks_np = cv2.bitwise_or(masks_np, mask_np) - # cv2.imwrite(save_path, mask_points_uint8) logging.info(f'2차 마스킹 - 사용자 입력에 대해 {len(mask_points)}개 영역으로 세그먼트 완료.') return masks_np - def inpainting(self, image, mask_total, bbox=None): - - mask_total = np.zeros(image.shape[:2], dtype=np.uint8) + def inpaint_from_yolo(self, image, bbox=None): + inpainted_image = image.copy() + masks_np = np.zeros(image.shape[:2], dtype=np.uint8) text_np = np.zeros(image.shape[:2], dtype=np.uint8) # 전체 roi_mask 저장 for b in bbox: minx, miny, maxx, maxy = map(int, b) - mask_total[miny:maxy, minx:maxx] = 255 # 박스 영역을 255로 채움 - roi = image[miny:maxy, minx:maxx] # 해당 이미지만 크롭 - roi_mask = cv2.inRange(roi, (0, 0, 0), (45, 45, 45)) # roi 내에서 인쇄된 글씨는 255값 + masks_np[miny:maxy, minx:maxx] = 255 # 박스 영역을 255로 채움 + + roi = image[miny:maxy, minx:maxx] # 박스 내 복사하고 텍스트만 추출하기 + roi_mask = cv2.inRange(roi, (0, 0, 0), (40, 40, 45)) # roi 내에서 인쇄된 글씨는 255값 text_np[miny:maxy, minx:maxx] = roi_mask + if maxy < image.shape[0]-2: # 박스 근처 BG 컬러 샘플링 + sample_color = image[maxy + 1, (minx+maxx)//2].tolist() # 박스 하단 모서리 중앙 아래 픽셀값 샘플링 + else: + sample_color = [255, 255, 255] # 경계에 있을 경우, 기본 흰색 + inpainted_image[miny:maxy, minx:maxx] = sample_color # 인페인팅 + logging.info(f'1차 인페인팅 - yolo 박스 샘플링 컬러 rgb: {sample_color}') + + # 복원 수행 text_np = cv2.dilate(text_np, np.ones((4, 4), np.uint8), iterations=1) text_np = cv2.erode(text_np, np.ones((2, 2), np.uint8), iterations=2) - # cv2.imwrite('text_np.jpg', text_np.astype(np.uint8)) - inpainted_image = image.copy() - inpainted_image[mask_total == 255] = [255, 255, 255] inpainted_image[text_np == 255] = [30, 30, 30] - final_image = cv2.convertScaleAbs(inpainted_image, alpha=1.5, beta=15) - - # inpainted_image = cv2.inpaint(image.copy(), mask_total, 15, cv2.INPAINT_TELEA) - # cv2.imwrite('test_images/inpainted_init.png', inpainted_image) - # cv2.imwrite('test_images/inpainted_final.png', final_image) - logging.info('Yolo 결과 인페인팅 및 후보정 완료.') + # inpainted_image = cv2.inpaint(inpainted_image, mask_total, 10, cv2.INPAINT_TELEA) - return final_image + return text_np, masks_np, inpainted_image def inpaint_from_user(self, image, user_boxes, save_path=None): - masks_np = np.zeros(image.shape[:2], dtype=np.uint8) - for b in user_boxes: # 박스 내부 다 채우기(마스킹) + inpainted_image = image.copy() + masks_np = np.zeros(image.shape[:2], dtype=np.uint8) # 박스 위치 마스킹 + for b in user_boxes: minx, miny, maxx, maxy = map(int, b) masks_np[miny:maxy, minx:maxx] = 255 # 박스 영역을 255로 채움 + if minx > 2: # 박스 근처 BG 컬러 샘플링 + sample_color = image[(miny + maxy) // 2, minx-2].tolist() # 우측 상단 모서리 바로 외부의 픽셀 + else: + sample_color = [255, 255, 255] # 경계에 있을 경우, 기본 흰색 + inpainted_image[miny:maxy, minx:maxx] = sample_color # 인페인팅 + logging.info(f'2차 인페인팅 - user 박스 샘플링 컬러 RGB: {sample_color}') - # cv2.imwrite(save_path, mask_points_uint8) - inpainted_image = image.copy() - inpainted_image[masks_np == 255] = [255, 255, 255] - # final_image = cv2.convertScaleAbs(inpainted_image, alpha=1.5, beta=10) - - logging.info(f'2차 마스킹 & 인페인팅 - 사용자 입력 {len(user_boxes)}개 영역 인페인팅 완료.') return masks_np, inpainted_image def combine_alpha(self, image_rgb): @@ -169,32 +169,34 @@ def process(self, img_bytes, user_inputs, extension='jpg'): bbox = self.object_detection(image) if len(bbox) > 0: logging.info("***** 1차: 객체 탐지 세그멘테이션 시작 ******") - masks_by_yolo = self.segment_from_yolo(image, bbox, save_path=None) # 'test_images/seg_box.png' + # masks_by_yolo = self.segment_from_yolo(image, bbox, save_path=None) # 'test_images/seg_box.png' + recovery, masks_by_yolo, image_output = self.inpaint_from_yolo(image_output, bbox) masks_total = cv2.bitwise_or(masks_total, masks_by_yolo) + logging.info('***** 1차: 객차 탐지 인페인팅 수행 완료 ******') + # cv2.imwrite('inpainted_yolo_복원.jpg', recovery.astype(np.uint8)) # 인식 및 복원하는 문항 + # cv2.imwrite('inpainted_yolo_결과.jpg', image_output) # 1차 마스킹 및 인페인팅 결과 else: logging.info("***** 1차: 객체 탐지 세그멘테이션 스킵 ******") masks_by_yolo = None - ### 2차: Segment by User Prompt if len(user_inputs) > 0: logging.info("***** 2차: 사용자 입력 세그멘테이션 시작 ******") - masks_by_user = self.segment_from_user(image, user_inputs, bbox, save_path=None) # save_path='test_images/seg_points.png' + # masks_by_user = self.segment_from_user(image, user_inputs, bbox, save_path=None) # save_path='test_images/seg_points.png' + masks_by_user, image_output = self.inpaint_from_user(image_output, user_inputs) masks_total = cv2.bitwise_or(masks_total, masks_by_user) - masks_by_user, image_output = self.inpaint_from_user(image, user_inputs, save_path=None) - _, mask_bytes = cv2.imencode("." + extension, image_output) + logging.info('***** 2차: 사용자 입력 인페인팅 수행 완료 ******') + # cv2.imwrite('inpainted_yolo&user_결과.jpg', image_output) else: logging.info("***** 2차: 사용자 입력 세그멘테이션 스킵 ******") masks_by_user = None - mask_bytes = None - if isinstance(masks_total, np.ndarray) and len(bbox) > 0: - image_output = self.inpainting(image_output, masks_total, bbox) - logging.info('***** 인페인팅 수행 완료 ******') + image_output = cv2.convertScaleAbs(image_output, alpha=1.5, beta=10) + logging.info("***** 필기 제거 결과 후보정 완료 ******") + # 5가지 bytes 생성 _, input_bytes = cv2.imencode("." + extension, image) _, result_bytes = cv2.imencode("." + extension, image_output) - if masks_by_yolo is not None: mask_by_yolo_img = image.copy() mask_by_yolo_img[masks_by_yolo == 255] = [0, 0, 0] @@ -207,10 +209,13 @@ def process(self, img_bytes, user_inputs, extension='jpg'): _, mask_by_user_bytes = cv2.imencode("." + extension, mask_by_user_img) else: mask_by_user_bytes = None - mask_total_img = image.copy() mask_total_img[masks_total == 255] = [0, 0, 0] _, mask_bytes = cv2.imencode("." + extension, mask_total_img) + # cv2.imwrite('input.jpg', image) + # cv2.imwrite('mask_total.jpg', mask_total_img) + # cv2.imwrite('output.jpg', image_output) + return (io.BytesIO(input_bytes), io.BytesIO(mask_bytes), io.BytesIO(result_bytes), io.BytesIO(mask_by_yolo_bytes), io.BytesIO(mask_by_user_bytes)) diff --git a/requirements.txt b/requirements.txt index 230ff78..c14d20f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,4 +49,3 @@ torch==2.5.1 torchvision==0.20.1 matplotlib==3.9.1.post1 ultralytics==8.3.31 -git+https://github.com/facebookresearch/segment-anything.git@dca509fe793f601edb92606367a655c15ac00fdf