Merge pull request #16 from bioimage-io/slim-docker-image

Slim docker image
bioimage-io · Jan 21, 2025 · 3435284 · 3435284
2 parents ca9090d + 9b62e9b
commit 3435284
Show file tree

Hide file tree

Showing 14 changed files with 126 additions and 149 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -23,19 +23,16 @@ RUN echo "bioimageio_colab ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
 # Upgrade pip
 RUN pip install --upgrade pip
 
-# Copy the requirements file for SAM to the docker environment
+# Copy the requirements files to the docker environment
 COPY ./requirements.txt /app/requirements.txt
 COPY ./requirements-sam.txt /app/requirements-sam.txt
 
-# Install the required packages for SAM
-RUN pip install -r /app/requirements-sam.txt
+# Install the required packages to register the service
+RUN pip install -r /app/requirements.txt
 
 # Copy the python module and data to the docker environment
 COPY ./bioimageio_colab /app/bioimageio_colab
-COPY ./data /app/data
-
-# Create cache directory for models
-RUN mkdir -p /app/.model_cache
+COPY ./data/example_image.tif /app/data/example_image.tif
 
 # Change ownership of the application directory to the non-root user
 RUN chown -R bioimageio_colab:bioimageio_colab /app/

diff --git a/bioimageio_colab/models/sam_deployment.py b/bioimageio_colab/models/sam_deployment.py
@@ -1,6 +1,3 @@
-import os
-
-import aiohttp
 import numpy as np
 from ray import serve
 
@@ -33,6 +30,8 @@ def __init__(self, cache_dir: str):
         self.models = SAM_MODELS
 
     async def _download_model(self, model_path: str, model_url: str) -> None:
+        import aiohttp
+
         async with aiohttp.ClientSession() as session:
             async with session.get(model_url) as response:
                 if response.status != 200:
@@ -43,6 +42,8 @@ async def _download_model(self, model_path: str, model_url: str) -> None:
 
     @serve.multiplexed(max_num_models_per_replica=2)
     async def get_model(self, model_id: str):
+        import os
+
         model_path = os.path.join(self.cache_dir, f"{model_id}.pt")
 
         if not os.path.exists(model_path):

diff --git a/bioimageio_colab/models/sam_image_encoder.py b/bioimageio_colab/models/sam_image_encoder.py
@@ -1,15 +1,16 @@
 from typing import Literal
 
 import numpy as np
-import torch
-from segment_anything import sam_model_registry
-from segment_anything.utils.transforms import ResizeLongestSide
 
 
 class SamImageEncoder:
     def __init__(
         self, model_path: str, model_architecture: Literal["vit_b", "vit_l", "vit_h"]
     ):
+        import torch
+        from segment_anything import sam_model_registry
+        from segment_anything.utils.transforms import ResizeLongestSide
+
         # Extract image encoder from checkpoint
         device = "cuda" if torch.cuda.is_available() else "cpu"
         build_sam = sam_model_registry[model_architecture]
@@ -53,7 +54,9 @@ def _to_image_format(self, array: np.ndarray) -> np.ndarray:
 
         return array
 
-    def _preprocess(self, array: np.array) -> torch.Tensor:
+    def _preprocess(self, array: np.array):
+        import torch
+
         # Validate image shape and dtype
         original_image = self._to_image_format(array)
 
@@ -77,7 +80,7 @@ def _preprocess(self, array: np.array) -> torch.Tensor:
 
         return input_image_torch
 
-    def encode(self, array: np.ndarray):
+    def encode(self, array: np.ndarray) -> dict:
         """
         Encode an image using the SAM image encoder.
 
@@ -89,6 +92,8 @@ def encode(self, array: np.ndarray):
                 - "features" (np.ndarray): The extracted features from the image
                 - "input_size" (tuple): The size of the input image (H, W)
         """
+        import torch
+
         # Preprocess the input image
         input_image = self._preprocess(array)
         input_size = tuple(input_image.shape[-2:])

diff --git a/bioimageio_colab/register_sam_service.py b/bioimageio_colab/register_sam_service.py
@@ -5,11 +5,10 @@
 
 import numpy as np
 import ray
-import torch
 from dotenv import find_dotenv, load_dotenv
 from hypha_rpc import connect_to_server
 from hypha_rpc.rpc import RemoteService
-from kaibu_utils import mask_to_features
+# from kaibu_utils import mask_to_features
 from ray.serve.config import AutoscalingConfig
 from tifffile import imread
 
@@ -50,20 +49,14 @@ def parse_requirements(file_path) -> list:
 
 
 def connect_to_ray(address: str = None) -> None:
-    if address:
-        # Create runtime environment
-        base_requirements = parse_requirements(
-            os.path.join(BASE_DIR, "requirements.txt")
-        )
-        sam_requirements = parse_requirements(
-            os.path.join(BASE_DIR, "requirements-sam.txt")
-        )
-        runtime_env = {
-            "pip": base_requirements + sam_requirements,
-            "py_modules": [os.path.join(BASE_DIR, "bioimageio_colab")],
-        }
-    else:
-        runtime_env = None
+    # Create runtime environment
+    sam_requirements = parse_requirements(
+        os.path.join(BASE_DIR, "requirements-sam.txt")
+    )
+    runtime_env = {
+        "pip": sam_requirements,
+        "py_modules": [os.path.join(BASE_DIR, "bioimageio_colab")],
+    }
 
     # Check if Ray is already initialized
     if ray.is_initialized():
@@ -138,7 +131,7 @@ def ping(context: dict = None) -> str:
 async def compute_image_embedding(
     app_name: str,
     image: np.ndarray,
-    model_name: str,
+    model_id: str,
     require_login: bool = False,
     context: dict = None,
 ) -> dict:
@@ -152,13 +145,13 @@ async def compute_image_embedding(
 
         user_id = user["id"]
         logger.info(
-            f"User '{user_id}' - Computing embedding (model: '{model_name}')..."
+            f"User '{user_id}' - Computing embedding (model: '{model_id}')..."
         )
 
         # Compute the embedding
         # Returns: {"features": embedding, "input_size": input_size}
         handle = ray.serve.get_app_handle(name=app_name)
-        result = await handle.remote(model_id=model_name, array=image)
+        result = await handle.remote(model_id=model_id, array=image)
 
         logger.info(f"User '{user_id}' - Embedding computed successfully.")
 
@@ -170,7 +163,7 @@ async def compute_image_embedding(
 
 # def compute_mask(
 #     cache_dir: str,
-#     model_name: str,
+#     model_id: str,
 #     embedding: np.ndarray,
 #     image_size: tuple,
 #     point_coords: np.ndarray,
@@ -184,14 +177,14 @@ async def compute_image_embedding(
 #     """
 #     try:
 #         user_id = context["user"].get("id") if context else "anonymous"
-#         logger.info(f"User '{user_id}' - Segmenting image (model: '{model_name}')...")
+#         logger.info(f"User '{user_id}' - Segmenting image (model: '{model_id}')...")
 
 #         if not format in ["mask", "kaibu"]:
 #             raise ValueError("Invalid format. Please choose either 'mask' or 'kaibu'.")
 
 #         # Load the model
 #         sam_predictor = load_model_from_ckpt(
-#             model_name=model_name,
+#             model_id=model_id,
 #             cache_dir=cache_dir,
 #         )
 
@@ -322,11 +315,6 @@ async def register_service(args: dict) -> None:
     """
     logger.info("Registering the SAM annotation service...")
     logger.info(f"Available CPU cores: {os.cpu_count()}")
-    if torch.cuda.is_available():
-        logger.info(f"Available GPUs: {torch.cuda.device_count()}")
-        logger.info(f"Available GPU devices: {torch.cuda.get_device_name()}")
-    else:
-        logger.info("No GPU devices available.")
 
     workspace_token = args.token or os.environ.get("WORKSPACE_TOKEN")
     if not workspace_token:
@@ -406,7 +394,7 @@ async def register_service(args: dict) -> None:
     )
 
     # This will register probes service where you can accessed via hypha or the HTTP proxy
-    print(f"Probes registered at workspace: {workspace}")
-    print(
+    logger.info(f"Probes registered at workspace: {workspace}")
+    logger.info(
         f"Test the liveness probe here: {args.server_url}/{workspace}/services/probes/liveness"
     )
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -12,7 +12,7 @@ services:
       "--server_url=https://hypha.aicell.io",
       "--workspace_name=bioimageio-colab",
       "--service_id=microsam",
-      "--model_names=sam_vit_b",
+      "--cache_dir=/tmp/ray/.model_cache"
     ]
     deploy:
       resources:

diff --git a/pyproject.toml b/pyproject.toml
@@ -3,15 +3,16 @@ requires = ["setuptools", "wheel"]
 
 [project]
 name = "bioimageio-colab"
-version = "0.1.8"
+version = "0.1.9"
 readme = "README.md"
 description = "Collaborative image annotation and model training with human in the loop."
 dependencies = [
-  "hypha-rpc>=0.20.43",
-  "requests",
-  "numpy",
-  "requests",
+  "hypha-rpc",
   "kaibu-utils",
+  "numpy",
+  "python-dotenv",
+  "ray[serve]",
+  "tifffile",
 ]
 
 [tool.setuptools]

diff --git a/requirements-cellpose.txt b/requirements-cellpose.txt
@@ -1,4 +1,3 @@
-tifffile==2024.7.24
 matplotlib==3.9.0
 opencv-python-headless==3.4.18.65
 cellpose==3.0.11
diff --git a/requirements-sam.txt b/requirements-sam.txt
@@ -1,6 +1,6 @@
--r "requirements.txt"
-ray[client]==2.33.0
-ray[serve]==2.33.0
+aiohttp==3.11.11
+numpy==2.2.2
+ray[client,serve]==2.33.0
 segment_anything==1.0
 torch==2.3.1
 torchvision==0.18.1
diff --git a/requirements_test.txt → requirements-test.txt b/requirements_test.txt → requirements-test.txt
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
-hypha-rpc==0.20.44
-kaibu-utils==0.1.14
-numpy==1.26.4
+hypha-rpc==0.20.47
+# kaibu-utils==0.1.14
+numpy==2.2.2
 python-dotenv==1.0.1
-requests==2.31.0
+ray[serve]==2.33.0
+tifffile==2025.1.10
diff --git a/test/stress_test.py b/test/stress_test.py
@@ -1,39 +1,45 @@
-from hypha_rpc import connect_to_server
-import numpy as np
-import asyncio
 import argparse
+import asyncio
 
+import numpy as np
+from hypha_rpc import connect_to_server
+from tifffile import imread
 
 SERVER_URL = "https://hypha.aicell.io"
 WORKSPACE_NAME = "bioimageio-colab"
 SERVICE_ID = "microsam"
-SID = f"{WORKSPACE_NAME}/{SERVICE_ID}"
+MODEL_IDS = ["sam_vit_b", "sam_vit_b_lm", "sam_vit_b_em_organelles"]
+IMG_PATH = "./data/example_image.tif"
 
 
-async def run_client(client_id: int, image: np.ndarray):
+async def run_client(
+    client_id: int, image: np.ndarray, model_id: str, method_timeout: int = 30
+):
     print(f"Client {client_id} started", flush=True)
-    client = await connect_to_server({"server_url": SERVER_URL, "method_timeout": 30})
-    segment_svc = await client.get_service(SID, {"mode": "random"})
-    await segment_svc.segment(model_name="vit_b", image=image, point_coordinates=[[128, 128]], point_labels=[1])
-    await asyncio.sleep(1)
-    await segment_svc.segment(model_name="vit_b", image=image, point_coordinates=[[20, 50]], point_labels=[1])
-    await asyncio.sleep(1)
-    await segment_svc.segment(model_name="vit_b", image=image, point_coordinates=[[180, 10]], point_labels=[1])
+    client = await connect_to_server(
+        {"server_url": SERVER_URL, "method_timeout": method_timeout}
+    )
+    service = await client.get_service(
+        f"{WORKSPACE_NAME}/{SERVICE_ID}", {"mode": "random"}
+    )
+    await service.compute_embedding(model_id=model_id, image=image)
     print(f"Client {client_id} finished", flush=True)
 
 
 async def stress_test(num_clients: int):
-    image=np.random.rand(256, 256)
+    image = imread(IMG_PATH)
     tasks = []
     for client_id in range(num_clients):
         await asyncio.sleep(0.1)
-        tasks.append(run_client(client_id, image))
+        model_id = MODEL_IDS[np.random.randint(0, len(MODEL_IDS))]
+        tasks.append(run_client(client_id=client_id, image=image, model_id=model_id))
     await asyncio.gather(*tasks)
     print("All clients finished")
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--num_clients", type=int, default=50)
     args = parser.parse_args()
 
-    asyncio.run(stress_test(args.num_clients))
+    asyncio.run(stress_test(args.num_clients))
diff --git a/test/test_image_provider.py b/test/test_image_provider.py
@@ -2,17 +2,16 @@
 import sys
 
 import numpy as np
+# from kaibu_utils import mask_to_features
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from kaibu_utils import mask_to_features
-
 from docs.data_providing_service import get_random_image, save_annotation
 
 
 def test_load_image():
     supported_file_types = ("tif", "tiff")
     image, image_name = get_random_image(
-        image_folder="./bioimageio_colab/",
+        image_folder="./data/",
         supported_file_types=supported_file_types,
     )
     assert image is not None