Skip to content

Commit

Permalink
Mirroring d64b6c5
Browse files Browse the repository at this point in the history
  • Loading branch information
adkinsrs committed Dec 13, 2024
1 parent 2c3360b commit 7a03d86
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 44 deletions.
4 changes: 2 additions & 2 deletions services/projectr/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
FROM python:3.10

# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True
ENV PYTHONUNBUFFERED=True

RUN apt-get -qq update \
&& DEBIAN_FRONTEND="noninteractive" apt -qq install -y --no-install-recommends \
Expand Down Expand Up @@ -39,7 +39,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt
ENV LD_LIBRARY_PATH="/usr/local/lib/R/lib:$LD_LIBRARY_PATH"

# Copy local code to the container image.
ENV APP_HOME /app
ENV APP_HOME=/app
WORKDIR $APP_HOME
COPY . ./

Expand Down
4 changes: 4 additions & 0 deletions services/projectr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def do_pca_projection(target_df, loading_df):
tp_target_df = target_df.transpose()
return tp_target_df.dot(loading_df)

@app.route("/status", methods=["GET"])
def status():
return "OK"

@app.route("/", methods=["POST"])
def index():
req_json = request.get_json()
Expand Down
85 changes: 44 additions & 41 deletions services/projectr/rfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self, message="") -> None:

def convert_r_df_to_r_matrix(df):
"""
Convert pandas dataframe to R-style matrix
Convert R-style dataframe to R-style matrix
"""

r_matrix = ro.r["as.matrix"]
Expand All @@ -53,50 +53,53 @@ def run_projectR_cmd(target_df, loading_df, algorithm):
with openrlib.rlock:

# Convert from pandas dataframe to R data.frame
with localconverter(ro.default_converter + pandas2ri.converter):
# Seems any R call needs to be in a "conversion" context block
# source -> https://stackoverflow.com/a/76532346, https://github.com/rpy2/rpy2/issues/1081, https://github.com/rpy2/rpy2/issues/975
local_rules = localconverter(ro.default_converter + pandas2ri.converter)
with local_rules:
target_r_df = ro.conversion.py2rpy(target_df)
loading_r_df = ro.conversion.py2rpy(loading_df)

# data.frame to matrix (projectR has no data.frame signature)
target_r_matrix = convert_r_df_to_r_matrix(target_r_df)
loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df)

# Assign Rownames to each matrix
# I don't know why but using ro.StrVector makes rpy2py fail where the output df is an incompatible class
# Guessing that there are some non-strings mixed into the indexes
target_r_matrix.rownames = StrVector(target_df.index)
loading_r_matrix.rownames = StrVector(loading_df.index)

# The NMF projectR method signature is based on the LinearEmbeddedMatrix class,
# Which has a featureLoadings property. That matrix is loaded and the default
# projectR signature is returned and used. So we can just pass the matrix as-is.
# https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html

# Run project R command. Get projectionPatterns matrix
try:
if algorithm == "nmf":
projectR = importr('projectR')
projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False)
elif algorithm == "fixednmf":
sjd = importr('SJD')
loading_list = ro.ListVector({"genesig": loading_r_matrix})

projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list)
projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig")
else:
raise ValueError("Algorithm {} is not supported".format(algorithm))
except Exception as e:
# print stacktrace with line numbers
traceback.print_exc(file=sys.stderr)
raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e)))

# matrix back to data.frame
projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix)

# Convert from R data.frame to pandas dataframe
with localconverter(ro.default_converter + pandas2ri.converter):
target_r_index = ro.conversion.py2rpy(target_df.index)
loading_r_index = ro.conversion.py2rpy(loading_df.index)
# Need a ruleset without pandas with auto-converts the R matrix to a numpy array
with localconverter(ro.default_converter):
try:
# data.frame to data.matrix (projectR has no data.frame signature)
target_r_matrix = convert_r_df_to_r_matrix(target_r_df)
loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df)
# Assign Rownames to each matrix
target_r_matrix.rownames = target_r_index
loading_r_matrix.rownames = loading_r_index
except Exception as e:
# print stacktrace with line numbers
traceback.print_exc(file=sys.stderr)
raise RError("Error: Could not assign rownames to matrix.\tReason: {}".format(str(e)))
# The NMF projectR method signature is based on the LinearEmbeddedMatrix class,
# Which has a featureLoadings property. That matrix is loaded and the default
# projectR signature is returned and used. So we can just pass the matrix as-is.
# https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html
# Run project R command. Get projectionPatterns matrix
try:
if algorithm == "nmf":
projectR = importr('projectR')
projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False)
elif algorithm == "fixednmf":
sjd = importr('SJD')
loading_list = ro.ListVector({"genesig": loading_r_matrix})
projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list)
projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig")
else:
raise ValueError("Algorithm {} is not supported".format(algorithm))
except Exception as e:
# print stacktrace with line numbers
traceback.print_exc(file=sys.stderr)
raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e)))
# matrix back to data.frame
projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix)
with local_rules:
projection_patterns_df = ro.conversion.rpy2py(projection_patterns_r_df)

return projection_patterns_df
return projection_patterns_df


16 changes: 15 additions & 1 deletion www/api/resources/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""

import os, sys
import tempfile
import shutil
import anndata
import pandas as pd
from pandas.api.types import is_integer_dtype
Expand Down Expand Up @@ -82,11 +84,23 @@ def create_projection_adata(dataset_adata, dataset_id, projection_id):
# For some reason the gene_symbol is not taken in by the constructor
projection_adata.var["gene_symbol"] = projection_adata.var_names

# write to projection_adata_path. This ensures that the file is created and up to date with latest projection results
projection_adata.write(projection_adata_path)
# This should resolve (https://github.com/IGS/gEAR/issues/951)
# Create a temporary file
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
temp_file_path = temp_file.name
# Copy the contents of the original file to the temporary file
shutil.copyfile(projection_adata_path, temp_file_path)
# Associate with the temporary filename to ensure AnnData is read in "backed" mode
# This creates the h5ad file if it does not exist
projection_adata.filename = temp_file_path

# Associate with a filename to ensure AnnData is read in "backed" mode
# This creates the h5ad file if it does not exist
# TODO: If too many processes read from this file, it can throw a BlockingIOError. Eventually we should
# handle this by creating a copy of the file for each process, like a tempfile.
projection_adata.filename = projection_adata_path
#projection_adata.filename = projection_adata_path

return projection_adata

Expand Down

0 comments on commit 7a03d86

Please sign in to comment.