Merge pull request #393 from OHDSI/develop

Develop
OHDSI · Jun 2, 2023 · f542d0c · f542d0c
2 parents 66d3cf1 + 1e89ed9
commit f542d0c
Show file tree

Hide file tree

Showing 343 changed files with 459 additions and 65,476 deletions.
diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml
@@ -78,7 +78,7 @@ jobs:
           do
             eval sudo $cmd
           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
-
+        
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
           extra-packages: any::rcmdcheck

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,46 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, develop]
+  release:
+    types: [published]
+  workflow_dispatch:
+
+name: pkgdown
+
+jobs:
+  pkgdown:
+    runs-on: ubuntu-latest
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, ohdsi/OhdsiRTools
+          needs: website
+
+      - name: Build site
+        run: Rscript -e 'pkgdown::build_site_github_pages(new_process = FALSE, install = TRUE)'
+
+      - name: Fix Hades Logo
+        run: Rscript -e 'OhdsiRTools::fixHadesLogo()'
+
+      - name: Deploy to GitHub pages 🚀
+        if: github.event_name != 'pull_request'
+        uses: JamesIves/[email protected]
+        with:
+          clean: false
+          branch: gh-pages
+          folder: docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: PatientLevelPrediction
 Type: Package
 Title: Developing patient level prediction using data in the OMOP Common Data
     Model
-Version: 6.3.1
-Date: 2023-02-28
+Version: 6.3.2
+Date: 2023-05-15
 Authors@R: c(
     person("Jenna", "Reps", email = "[email protected]", role = c("aut", "cre")),
     person("Martijn", "Schuemie", role = c("aut")),
@@ -23,7 +23,7 @@ URL: https://ohdsi.github.io/PatientLevelPrediction, https://github.com/OHDSI/Pa
 BugReports: https://github.com/OHDSI/PatientLevelPrediction/issues
 VignetteBuilder: knitr
 Depends:
-    R (>= 3.3.0)
+    R (>= 4.0.0)
 Imports:
     Andromeda,
     Cyclops (>= 3.0.0),
@@ -75,6 +75,7 @@ Remotes:
     ohdsi/Eunomia,
     ohdsi/FeatureExtraction,
     ohdsi/IterativeHardThresholding,
+    ohdsi/ParallelLogger,
     ohdsi/ShinyAppBuilder,
     ohdsi/ResultModelManager
 RoxygenNote: 7.2.3

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+PatientLevelPrediction 6.3.2
+======================
+- fixed bug with database insert if result is incomplete
+- updated/fixed documentation (Egill)
+- added model path to models (Henrik)
+- updated hyper-parameter saving to data.frame and made consistent 
+
 PatientLevelPrediction 6.3.1
 ======================
 - fixed bug with multiple covariate settings in diagnose plp

diff --git a/R/CyclopsModels.R b/R/CyclopsModels.R
@@ -156,7 +156,7 @@ fitCyclopsModel <- function(
   prediction$evaluationType <- 'Train'
 
   # get cv AUC if exists
-  cvPerFold <- c()
+  cvPerFold <- data.frame()
   if(!is.null(modelTrained$cv)){
     cvPrediction  <- do.call(rbind, lapply(modelTrained$cv, function(x){x$predCV}))
     cvPrediction$evaluationType <- 'CV'
@@ -167,7 +167,17 @@ fitCyclopsModel <- function(
 
     cvPerFold <-  unlist(lapply(modelTrained$cv, function(x){x$out_sample_auc}))
     if(length(cvPerFold)>0){
-      names(cvPerFold) <- paste0('fold_auc', 1:length(cvPerFold))
+      cvPerFold <- data.frame(
+        metric = 'AUC',
+        fold = 1:length(cvPerFold),
+        value = cvPerFold,
+        startingVariance = ifelse(is.null(param$priorParams$variance), 'NULL', param$priorParams$variance),
+        lowerLimit = ifelse(is.null(param$lowerLimit), 'NULL', param$lowerLimit),
+        upperLimit = ifelse(is.null(param$upperLimit), 'NULL', param$upperLimit),
+        tolerance  = ifelse(is.null(settings$tolerance), 'NULL', settings$tolerance)
+      )
+    } else{
+      cvPerFold <- data.frame()
     }
 
     # remove the cv from the model:

diff --git a/R/DatabaseMigration.R b/R/DatabaseMigration.R
@@ -1,3 +1,21 @@
+# @file DatabaseMigration.R
+#
+# Copyright 2023 Observational Health Data Sciences and Informatics
+#
+# This file is part of PatientLevelPrediction
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitatons under the License.
+#
 #' Migrate Data model
 #' @description
 #' Migrate data from current state to next state

diff --git a/R/Fit.R b/R/Fit.R
@@ -37,6 +37,7 @@
 #'                                         }
 #' @param search                           The search strategy for the hyper-parameter selection (currently not used)                                        
 #' @param analysisId                       The id of the analysis
+#' @param analysisPath                     The path of the analysis
 #' @return
 #' An object of class \code{plpModel} containing:
 #' 
@@ -53,7 +54,8 @@ fitPlp <- function(
   trainData,   
   modelSettings,
   search = "grid",
-  analysisId
+  analysisId,
+  analysisPath
   )
   {
 
@@ -76,7 +78,8 @@ fitPlp <- function(
     trainData = trainData,
     modelSettings, # old: param = modelSettings$param, # make this model settings?
     search = search,
-    analysisId = analysisId
+    analysisId = analysisId,
+    analysisPath = analysisPath
     )
   plpModel <- do.call(fun, args)
   ParallelLogger::logTrace('Returned from classifier function')

diff --git a/R/KNN.R b/R/KNN.R
@@ -64,7 +64,7 @@ setKNN <- function(k=1000, indexFolder=file.path(getwd(),'knn'), threads = 1  ){
   return(result)
 }
 
-fitKNN <- function(trainData, modelSettings, search = 'none', analysisId ){
+fitKNN <- function(trainData, modelSettings, search = 'none', analysisId, ...){
 
   param <- modelSettings$param
 
@@ -150,7 +150,7 @@ fitKNN <- function(trainData, modelSettings, search = 'none', analysisId ){
       trainingTime = paste(as.character(abs(comp)), attr(comp,'units')),
       trainingDate = Sys.Date(),
       modelName = 'KNN',
-      hyperParamSearch =c(),
+      hyperParamSearch = data.frame(),
       finalModelParameters = list(
         k = k,
         threads = param$threads

diff --git a/R/PatientLevelPrediction.R b/R/PatientLevelPrediction.R
@@ -22,6 +22,7 @@
 #'
 #' @docType package
 #' @name PatientLevelPrediction
+#' @keywords internal
 #' @importFrom dplyr %>%
 #' @importFrom rlang .data
 NULL

diff --git a/R/PreprocessingData.R b/R/PreprocessingData.R
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-#' Create the settings for preprocessing the trainData using \code{ }.
+#' Create the settings for preprocessing the trainData.
 #'
 #' @details
 #' Returns an object of class \code{preprocessingSettings} that specifies how to preprocess the training data

diff --git a/R/RClassifier.R b/R/RClassifier.R
@@ -3,8 +3,8 @@ fitRclassifier <- function(
   trainData, 
   modelSettings, 
   search = 'grid', 
-  analysisId
-  ){
+  analysisId,
+  ...){
 
   param <- modelSettings$param
 

diff --git a/R/Recalibration.R b/R/Recalibration.R
@@ -90,7 +90,8 @@ recalibratePlpRefit <- function(
     fitPlp(
       trainData = newData, 
       modelSettings = setLassoRefit,
-      analysisId = 'recalibrationRefit'
+      analysisId = 'recalibrationRefit',
+      analysisPath = NULL
     )
   }, 
   error = function(e){ParallelLogger::logInfo(e); return(NULL)}

diff --git a/R/RunMultiplePlp.R b/R/RunMultiplePlp.R
@@ -132,11 +132,10 @@ runMultiplePlp <- function(
       dataExists <- length(dir(file.path(saveDirectory, settings$dataLocation)))>0
 
       if(dataExists){
-        plpData <- PatientLevelPrediction::loadPlpData(file.path(saveDirectory, settings$dataLocation))
-
         analysisExists <- file.exists(file.path(saveDirectory, settings$analysisId,'diagnosePlp.rds'))
 
         if(!analysisExists){
+          plpData <- PatientLevelPrediction::loadPlpData(file.path(saveDirectory, settings$dataLocation))
           diagnosePlpSettings <- list(
             plpData = plpData,
             outcomeId = modelDesign$outcomeId,
@@ -171,11 +170,10 @@ runMultiplePlp <- function(
       dataExists <- length(dir(file.path(saveDirectory, settings$dataLocation)))>0
 
       if(dataExists){
-        plpData <- PatientLevelPrediction::loadPlpData(file.path(saveDirectory, settings$dataLocation))
-
         analysisExists <- file.exists(file.path(saveDirectory, settings$analysisId,'plpResult', 'runPlp.rds'))
+
         if(!analysisExists){
-
+          plpData <- PatientLevelPrediction::loadPlpData(file.path(saveDirectory, settings$dataLocation))
           runPlpSettings <- list(
             plpData = plpData,
             outcomeId = modelDesign$outcomeId,

diff --git a/R/RunPlp.R b/R/RunPlp.R
@@ -366,7 +366,8 @@ runPlp <- function(
     settings <- list(
       trainData = data$Train, 
       modelSettings = modelSettings,
-      analysisId = analysisId
+      analysisId = analysisId,
+      analysisPath = analysisPath
     )
 
     ParallelLogger::logInfo(sprintf('Training %s model',settings$modelSettings$name))  

diff --git a/R/SklearnClassifier.R b/R/SklearnClassifier.R
@@ -416,13 +416,29 @@ computeGridPerformance <- function(prediction, param, performanceFunct = 'comput
     }
   }
 
-  hyperSummary <- c(performanceFunct, performance, performanceFold, unlist(paramString))
-  names(hyperSummary) <- c(
-    'Metric', 
-    'cvPerformance', 
-    paste0('cvPerformanceFold',1:length(performanceFold)),
-    names(param)
+  #hyperSummary <- c(performanceFunct, performance, performanceFold, unlist(paramString))
+  #names(hyperSummary) <- c(
+  #  'Metric', 
+  #  'cvPerformance', 
+  #  paste0('cvPerformanceFold',1:length(performanceFold)),
+  #  names(param)
+  #)
+  paramValues <- unlist(paramString)
+  names(paramValues) <- names(param)
+
+  hyperSummary <- as.data.frame(
+    c(
+      data.frame(
+        metric = performanceFunct,
+        fold = c("CV",as.character(1:length(performanceFold))),
+        value = c(performance,performanceFold)
+      ),
+      paramValues
+    )
   )
+
+
+
 
   return(
     list(

diff --git a/R/uploadToDatabase.R b/R/uploadToDatabase.R
@@ -329,13 +329,18 @@ addMultipleRunPlpToDatabase <- function(
       ParallelLogger::logInfo('result loaded')
 
       #  Add runPlp to the database
-      addRunPlpToDatabase(
-        runPlp = runPlp,
-        connectionDetails = connectionDetails,
-        databaseSchemaSettings = databaseSchemaSettings,
-        cohortDefinitions = cohortDefinitions,
-        databaseList = databaseList,
-        modelSaveLocation = modelSaveLocation
+      tryCatch(
+        {addRunPlpToDatabase(
+          runPlp = runPlp,
+          connectionDetails = connectionDetails,
+          databaseSchemaSettings = databaseSchemaSettings,
+          cohortDefinitions = cohortDefinitions,
+          databaseList = databaseList,
+          modelSaveLocation = modelSaveLocation
+        )}, error = function(e){
+          ParallelLogger::logInfo('result upload failed: '); 
+          ParallelLogger::logInfo(e)
+          }
       )
 
     } #model not null 

diff --git a/README.md b/README.md
@@ -31,7 +31,7 @@ Features
 - Extracts the necessary data from a database in OMOP Common Data Model format for multiple covariate settings.
 - Uses a large set of covariates including for example all drugs, diagnoses, procedures, as well as age, comorbidity indexes, and custom covariates.
 - Allows you to add custom covariates or cohort covariates.
-- Includes a large number of state-of-the-art machine learning algorithms that can be used to develop predictive models, including Regularized logistic regression, Random forest, Gradient boosting machines, Decision tree, Naive Bayes, K-nearest neighbours, Neural network and AdaBoost, SVM.
+- Includes a large number of state-of-the-art machine learning algorithms that can be used to develop predictive models, including Regularized logistic regression, Random forest, Gradient boosting machines, Decision tree, Naive Bayes, K-nearest neighbours, Neural network, AdaBoost and Support vector machines.
 - Allows you to add custom algorithms.
 - Allows you to add custom feature engineering
 - Allows you to add custom under/over sampling (or any other sampling) [note: based on existing research this is not recommended]
@@ -71,11 +71,11 @@ Demo of the Shiny Apps can be found here:
 
 Technology
 ==========
-PatientLevelPrediction is an R package, with some functions implemented in python.
+PatientLevelPrediction is an R package, with some functions using python through reticulate.
 
 System Requirements
 ===================
-Requires R (version 3.3.0 or higher). Installation on Windows requires [RTools](http://cran.r-project.org/bin/windows/Rtools/). Libraries used in PatientLevelPrediction require Java and Python.
+Requires R (version 4.0 or higher). Installation on Windows requires [RTools](http://cran.r-project.org/bin/windows/Rtools/). Libraries used in PatientLevelPrediction require Java and Python.
 
 The python installation is required for some of the machine learning algorithms. We advise to
 install Python 3.7 using Anaconda (https://www.continuum.io/downloads). 
@@ -129,7 +129,6 @@ Development
 ===========
 PatientLevelPrediction is being developed in R Studio.
 
-Beta
 
 # Acknowledgements