Mirroring d64b6c5

IGS · Dec 13, 2024 · 7a03d86 · 7a03d86
1 parent 2c3360b
commit 7a03d86
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 44 deletions.
diff --git a/services/projectr/Dockerfile b/services/projectr/Dockerfile
@@ -4,7 +4,7 @@
 FROM python:3.10
 
 # Allow statements and log messages to immediately appear in the Knative logs
-ENV PYTHONUNBUFFERED True
+ENV PYTHONUNBUFFERED=True
 
 RUN apt-get -qq update \
     && DEBIAN_FRONTEND="noninteractive" apt -qq install -y --no-install-recommends \
@@ -39,7 +39,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt
 ENV LD_LIBRARY_PATH="/usr/local/lib/R/lib:$LD_LIBRARY_PATH"
 
 # Copy local code to the container image.
-ENV APP_HOME /app
+ENV APP_HOME=/app
 WORKDIR $APP_HOME
 COPY . ./
 

diff --git a/services/projectr/main.py b/services/projectr/main.py
@@ -58,6 +58,10 @@ def do_pca_projection(target_df, loading_df):
     tp_target_df = target_df.transpose()
     return tp_target_df.dot(loading_df)
 
+@app.route("/status", methods=["GET"])
+def status():
+    return "OK"
+
 @app.route("/", methods=["POST"])
 def index():
     req_json = request.get_json()

diff --git a/services/projectr/rfuncs.py b/services/projectr/rfuncs.py
@@ -28,7 +28,7 @@ def __init__(self, message="") -> None:
 
 def convert_r_df_to_r_matrix(df):
     """
-    Convert pandas dataframe to R-style matrix
+    Convert R-style dataframe to R-style matrix
     """
 
     r_matrix = ro.r["as.matrix"]
@@ -53,50 +53,53 @@ def run_projectR_cmd(target_df, loading_df, algorithm):
     with openrlib.rlock:
 
         # Convert from pandas dataframe to R data.frame
-        with localconverter(ro.default_converter + pandas2ri.converter):
+        # Seems any R call needs to be in a "conversion" context block
+        # source -> https://stackoverflow.com/a/76532346, https://github.com/rpy2/rpy2/issues/1081, https://github.com/rpy2/rpy2/issues/975
+        local_rules = localconverter(ro.default_converter + pandas2ri.converter)
+        with local_rules:
             target_r_df = ro.conversion.py2rpy(target_df)
             loading_r_df = ro.conversion.py2rpy(loading_df)
 
-        # data.frame to matrix (projectR has no data.frame signature)
-        target_r_matrix = convert_r_df_to_r_matrix(target_r_df)
-        loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df)
-
-        # Assign Rownames to each matrix
-        # I don't know why but using ro.StrVector makes rpy2py fail where the output df is an incompatible class
-        # Guessing that there are some non-strings mixed into the indexes
-        target_r_matrix.rownames = StrVector(target_df.index)
-        loading_r_matrix.rownames = StrVector(loading_df.index)
-
-        # The NMF projectR method signature is based on the LinearEmbeddedMatrix class,
-        # Which has a featureLoadings property. That matrix is loaded and the default
-        # projectR signature is returned and used. So we can just pass the matrix as-is.
-        # https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html
-
-        # Run project R command.  Get projectionPatterns matrix
-        try:
-            if algorithm == "nmf":
-                projectR = importr('projectR')
-                projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False)
-            elif algorithm == "fixednmf":
-                sjd = importr('SJD')
-                loading_list = ro.ListVector({"genesig": loading_r_matrix})
-
-                projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list)
-                projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig")
-            else:
-                raise ValueError("Algorithm {} is not supported".format(algorithm))
-        except Exception as e:
-            # print stacktrace with line numbers
-            traceback.print_exc(file=sys.stderr)
-            raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e)))
-
-        # matrix back to data.frame
-        projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix)
-
-        # Convert from R data.frame to pandas dataframe
-        with localconverter(ro.default_converter + pandas2ri.converter):
+            target_r_index = ro.conversion.py2rpy(target_df.index)
+            loading_r_index = ro.conversion.py2rpy(loading_df.index)
+        # Need a ruleset without pandas with auto-converts the R matrix to a numpy array
+        with localconverter(ro.default_converter):
+            try:
+                # data.frame to data.matrix (projectR has no data.frame signature)
+                target_r_matrix = convert_r_df_to_r_matrix(target_r_df)
+                loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df)
+                # Assign Rownames to each matrix
+                target_r_matrix.rownames = target_r_index
+                loading_r_matrix.rownames = loading_r_index
+            except Exception as e:
+                # print stacktrace with line numbers
+                traceback.print_exc(file=sys.stderr)
+                raise RError("Error: Could not assign rownames to matrix.\tReason: {}".format(str(e)))
+            # The NMF projectR method signature is based on the LinearEmbeddedMatrix class,
+            # Which has a featureLoadings property. That matrix is loaded and the default
+            # projectR signature is returned and used. So we can just pass the matrix as-is.
+            # https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html
+            # Run project R command.  Get projectionPatterns matrix
+            try:
+                if algorithm == "nmf":
+                    projectR = importr('projectR')
+                    projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False)
+                elif algorithm == "fixednmf":
+                    sjd = importr('SJD')
+                    loading_list = ro.ListVector({"genesig": loading_r_matrix})
+                    projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list)
+                    projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig")
+                else:
+                    raise ValueError("Algorithm {} is not supported".format(algorithm))
+            except Exception as e:
+                # print stacktrace with line numbers
+                traceback.print_exc(file=sys.stderr)
+                raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e)))
+            # matrix back to data.frame
+            projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix)
+        with local_rules:
             projection_patterns_df = ro.conversion.rpy2py(projection_patterns_r_df)
 
-        return projection_patterns_df
+            return projection_patterns_df
 
 
diff --git a/www/api/resources/common.py b/www/api/resources/common.py
@@ -3,6 +3,8 @@
 """
 
 import os, sys
+import tempfile
+import shutil
 import anndata
 import pandas as pd
 from pandas.api.types import is_integer_dtype
@@ -82,11 +84,23 @@ def create_projection_adata(dataset_adata, dataset_id, projection_id):
     # For some reason the gene_symbol is not taken in by the constructor
     projection_adata.var["gene_symbol"] = projection_adata.var_names
 
+    # write to projection_adata_path. This ensures that the file is created and up to date with latest projection results
+    projection_adata.write(projection_adata_path)
+    # This should resolve (https://github.com/IGS/gEAR/issues/951)
+    # Create a temporary file
+    with tempfile.NamedTemporaryFile(delete=True) as temp_file:
+        temp_file_path = temp_file.name
+    # Copy the contents of the original file to the temporary file
+    shutil.copyfile(projection_adata_path, temp_file_path)
+    # Associate with the temporary filename to ensure AnnData is read in "backed" mode
+    # This creates the h5ad file if it does not exist
+    projection_adata.filename = temp_file_path
+
     # Associate with a filename to ensure AnnData is read in "backed" mode
     # This creates the h5ad file if it does not exist
     # TODO: If too many processes read from this file, it can throw a BlockingIOError. Eventually we should
     #       handle this by creating a copy of the file for each process, like a tempfile.
-    projection_adata.filename = projection_adata_path
+    #projection_adata.filename = projection_adata_path
 
     return projection_adata