Add files via upload

OHDSI · Mar 5, 2021 · a94bf4e · a94bf4e
1 parent ee72c48
commit a94bf4e
Show file tree

Hide file tree

Showing 30 changed files with 11,857 additions and 0 deletions.
diff --git a/ohdsiBCv5/DESCRIPTION b/ohdsiBCv5/DESCRIPTION
@@ -0,0 +1,20 @@
+Package: ohdsiBCnew
+Type: Package
+Title: A Package Skeleton for Cohort Diagnostics
+Version: 0.0.1
+Author: Martijn Schuemie [aut, cre]
+Maintainer: Martijn J. Schuemie <[email protected]>
+Description: A skeleton package, to be used as a starting point when you just want to run cohort diagnostics.
+Depends:
+    DatabaseConnector (>= 2.2.0)
+Imports:
+	CohortDiagnostics
+Suggests:
+    knitr,
+    rmarkdown,
+    ROhdsiWebApi,
+    OhdsiRTools
+License: Apache License 2.0
+VignetteBuilder: knitr
+LazyData: TRUE
+RoxygenNote: 7.1.1
diff --git a/ohdsiBCv5/NAMESPACE b/ohdsiBCv5/NAMESPACE
@@ -0,0 +1,3 @@
+# Generated by roxygen2: do not edit by hand
+
+export(runCohortDiagnostics)
diff --git a/ohdsiBCv5/R/CohortDiagnostics.R b/ohdsiBCv5/R/CohortDiagnostics.R
@@ -0,0 +1,117 @@
+# Copyright 2020 Observational Health Data Sciences and Informatics
+#
+# This file is part of ohdsiBCnew
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#' Execute the cohort diagnostics
+#'
+#' @details
+#' This function executes the cohort diagnostics.
+#'
+#' @param connectionDetails    An object of type \code{connectionDetails} as created using the
+#'                             \code{\link[DatabaseConnector]{createConnectionDetails}} function in the
+#'                             DatabaseConnector package.
+#' @param cdmDatabaseSchema    Schema name where your patient-level data in OMOP CDM format resides.
+#'                             Note that for SQL Server, this should include both the database and
+#'                             schema name, for example 'cdm_data.dbo'.
+#' @param cohortDatabaseSchema Schema name where intermediate data can be stored. You will need to have
+#'                             write priviliges in this schema. Note that for SQL Server, this should
+#'                             include both the database and schema name, for example 'cdm_data.dbo'.
+#' @param cohortTable          The name of the table that will be created in the work database schema.
+#'                             This table will hold the exposure and outcome cohorts used in this
+#'                             study.
+#' @param oracleTempSchema     Should be used in Oracle to specify a schema where the user has write
+#'                             priviliges for storing temporary tables.
+#' @param outputFolder         Name of local folder to place results; make sure to use forward slashes
+#'                             (/). Do not use a folder on a network drive since this greatly impacts
+#'                             performance.
+#' @param databaseId           A short string for identifying the database (e.g.
+#'                             'Synpuf').
+#' @param databaseName         The full name of the database (e.g. 'Medicare Claims
+#'                             Synthetic Public Use Files (SynPUFs)').
+#' @param databaseDescription  A short description (several sentences) of the database.
+#' @param createCohorts        Create the cohortTable table with the exposure and outcome cohorts?
+#' @param runInclusionStatistics      Generate and export statistic on the cohort incusion rules?
+#' @param runIncludedSourceConcepts   Generate and export the source concepts included in the cohorts?
+#' @param runOrphanConcepts           Generate and export potential orphan concepts?
+#' @param runTimeDistributions        Generate and export cohort time distributions?
+#' @param runBreakdownIndexEvents     Generate and export the breakdown of index events?
+#' @param runIncidenceRates      Generate and export the cohort incidence rates?
+#' @param runCohortOverlap            Generate and export the cohort overlap?
+#' @param runCohortCharacterization   Generate and export the cohort characterization?
+#' @param minCellCount         The minimum number of subjects contributing to a count before it can be included 
+#'                             in packaged results.
+#'
+#' @export
+runCohortDiagnostics <- function(connectionDetails,
+                                 cdmDatabaseSchema,
+                                 cohortDatabaseSchema = cdmDatabaseSchema,
+                                 cohortTable = "cohort",
+                                 oracleTempSchema = cohortDatabaseSchema,
+                                 outputFolder,
+                                 databaseId = "Unknown",
+                                 databaseName = "Unknown",
+                                 databaseDescription = "Unknown",
+                                 createCohorts = TRUE,
+                                 runInclusionStatistics = TRUE,
+                                 runIncludedSourceConcepts = TRUE,
+                                 runOrphanConcepts = TRUE,
+                                 runTimeDistributions = TRUE,
+                                 runBreakdownIndexEvents = TRUE,
+                                 runIncidenceRates = TRUE,
+                                 runCohortOverlap = TRUE,
+                                 runCohortCharacterization = TRUE,
+                                 minCellCount = 5) {
+  if (!file.exists(outputFolder))
+    dir.create(outputFolder, recursive = TRUE)
+
+  ParallelLogger::addDefaultFileLogger(file.path(outputFolder, "log.txt"))
+  ParallelLogger::addDefaultErrorReportLogger(file.path(outputFolder, "errorReportR.txt"))
+  on.exit(ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE))
+  on.exit(ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE), add = TRUE)
+
+  if (createCohorts) {
+    ParallelLogger::logInfo("Creating cohorts")
+    connection <- DatabaseConnector::connect(connectionDetails)
+    .createCohorts(connection = connection,
+                   cdmDatabaseSchema = cdmDatabaseSchema,
+                   cohortDatabaseSchema = cohortDatabaseSchema,
+                   cohortTable = cohortTable,
+                   oracleTempSchema = oracleTempSchema,
+                   outputFolder = outputFolder)
+    DatabaseConnector::disconnect(connection)
+  }
+
+  ParallelLogger::logInfo("Running study diagnostics")
+  CohortDiagnostics::runCohortDiagnostics(packageName = "ohdsiBCnew",
+                                          connectionDetails = connectionDetails,
+                                          cdmDatabaseSchema = cdmDatabaseSchema,
+                                          oracleTempSchema = oracleTempSchema,
+                                          cohortDatabaseSchema = cohortDatabaseSchema,
+                                          cohortTable = cohortTable,
+                                          inclusionStatisticsFolder = outputFolder,
+                                          exportFolder = file.path(outputFolder, "diagnosticsExport"),
+                                          databaseId = databaseId,
+                                          databaseName = databaseName,
+                                          databaseDescription = databaseDescription,
+                                          runInclusionStatistics = runInclusionStatistics,
+                                          runIncludedSourceConcepts = runIncludedSourceConcepts,
+                                          runOrphanConcepts = runOrphanConcepts,
+                                          runTimeDistributions = runTimeDistributions,
+                                          runBreakdownIndexEvents = runBreakdownIndexEvents,
+                                          runIncidenceRate = runIncidenceRates,
+                                          runCohortOverlap = runCohortOverlap,
+                                          runCohortCharacterization = runCohortCharacterization,
+                                          minCellCount = minCellCount)
+}
diff --git a/ohdsiBCv5/R/CreateCohorts.R b/ohdsiBCv5/R/CreateCohorts.R
@@ -0,0 +1,111 @@
+# Copyright 2020 Observational Health Data Sciences and Informatics
+#
+# This file is part of ohdsiBCnew
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.createCohorts <- function(connection,
+                           cdmDatabaseSchema,
+                           vocabularyDatabaseSchema = cdmDatabaseSchema,
+                           cohortDatabaseSchema,
+                           cohortTable,
+                           oracleTempSchema,
+                           outputFolder) {
+
+  # Create study cohort table structure:
+  sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql",
+                                           packageName = "ohdsiBCnew",
+                                           dbms = attr(connection, "dbms"),
+                                           oracleTempSchema = oracleTempSchema,
+                                           cohort_database_schema = cohortDatabaseSchema,
+                                           cohort_table = cohortTable)
+  DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE)
+
+
+  # Insert rule names in cohort_inclusion table:
+  pathToCsv <- system.file("cohorts", "InclusionRules.csv", package = "ohdsiBCnew")
+  inclusionRules <- readr::read_csv(pathToCsv, col_types = readr::cols()) 
+  inclusionRules <- data.frame(cohort_definition_id = inclusionRules$cohortId,
+                               rule_sequence = inclusionRules$ruleSequence,
+                               name = inclusionRules$ruleName)
+  DatabaseConnector::insertTable(connection = connection,
+                                 tableName = "#cohort_inclusion",
+                                 data = inclusionRules,
+                                 dropTableIfExists = FALSE,
+                                 createTable = FALSE,
+                                 tempTable = TRUE,
+                                 oracleTempSchema = oracleTempSchema)
+
+
+  # Instantiate cohorts:
+  pathToCsv <- system.file("settings", "CohortsToCreate.csv", package = "ohdsiBCnew")
+  cohortsToCreate <- readr::read_csv(pathToCsv, col_types = readr::cols())
+  for (i in 1:nrow(cohortsToCreate)) {
+    writeLines(paste("Creating cohort:", cohortsToCreate$name[i]))
+    sql <- SqlRender::loadRenderTranslateSql(sqlFilename = paste0(cohortsToCreate$name[i], ".sql"),
+                                             packageName = "ohdsiBCnew",
+                                             dbms = attr(connection, "dbms"),
+                                             oracleTempSchema = oracleTempSchema,
+                                             cdm_database_schema = cdmDatabaseSchema,
+                                             vocabulary_database_schema = vocabularyDatabaseSchema,
+
+                                             results_database_schema.cohort_inclusion = "#cohort_inclusion",  
+                                             results_database_schema.cohort_inclusion_result = "#cohort_inc_result",  
+                                             results_database_schema.cohort_inclusion_stats = "#cohort_inc_stats",  
+                                             results_database_schema.cohort_summary_stats = "#cohort_summary_stats",  
+
+                                             target_database_schema = cohortDatabaseSchema,
+                                             target_cohort_table = cohortTable,
+                                             target_cohort_id = cohortsToCreate$cohortId[i])
+    DatabaseConnector::executeSql(connection, sql)
+  }
+
+  # Fetch cohort counts:
+  sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @cohort_database_schema.@cohort_table GROUP BY cohort_definition_id"
+  sql <- SqlRender::render(sql,
+                           cohort_database_schema = cohortDatabaseSchema,
+                           cohort_table = cohortTable)
+  sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms"))
+  counts <- DatabaseConnector::querySql(connection, sql)
+  names(counts) <- SqlRender::snakeCaseToCamelCase(names(counts))
+  counts <- merge(counts, data.frame(cohortDefinitionId = cohortsToCreate$cohortId,
+                                     cohortName  = cohortsToCreate$name))
+  write.csv(counts, file.path(outputFolder, "CohortCounts.csv"))
+
+
+  # Fetch inclusion rule stats and drop tables:
+  fetchStats <- function(tableName) {
+    sql <- "SELECT * FROM #@table_name"
+    sql <- SqlRender::render(sql, table_name = tableName)
+    sql <- SqlRender::translate(sql = sql, 
+                                targetDialect = attr(connection, "dbms"),
+                                oracleTempSchema = oracleTempSchema)
+    stats <- DatabaseConnector::querySql(connection, sql)
+    names(stats) <- SqlRender::snakeCaseToCamelCase(names(stats))
+    fileName <- file.path(outputFolder, paste0(SqlRender::snakeCaseToCamelCase(tableName), ".csv"))
+    write.csv(stats, fileName, row.names = FALSE)
+
+    sql <- "TRUNCATE TABLE #@table_name; DROP TABLE #@table_name;"
+    sql <- SqlRender::render(sql, table_name = tableName)
+    sql <- SqlRender::translate(sql = sql, 
+                                targetDialect = attr(connection, "dbms"),
+                                oracleTempSchema = oracleTempSchema)
+    DatabaseConnector::executeSql(connection, sql)
+  }
+  fetchStats("cohort_inclusion")
+  fetchStats("cohort_inc_result")
+  fetchStats("cohort_inc_stats")
+  fetchStats("cohort_summary_stats")
+
+}
+
diff --git a/ohdsiBCv5/extras/CodeToRun.R b/ohdsiBCv5/extras/CodeToRun.R
@@ -0,0 +1,111 @@
+# Make sure to install all dependencies (not needed if already done):
+install.packages("SqlRender")
+install.packages("DatabaseConnector")
+install.packages("ggplot2")
+install.packages("ParallelLogger")
+install.packages("readr")
+install.packages("tibble")
+install.packages("dplyr")
+install.packages("RJSONIO")
+install.packages("devtools")
+devtools::install_github("FeatureExtraction")
+devtools::install_github("ROhdsiWebApi")
+devtools::install_github("CohortDiagnostics")
+
+
+# Load the package
+library(ohdsiBCnew)
+
+# Optional: specify where the temporary files will be created:
+options(andromedaTempFolder = "s:/andromedaTemp")
+
+# Maximum number of cores to be used:
+maxCores <- parallel::detectCores()
+
+
+# Details for connecting to the server:
+connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "pdw",
+                                                                server = Sys.getenv("PDW_SERVER"),
+                                                                user = NULL,
+                                                                password = NULL,
+                                                                port = Sys.getenv("PDW_PORT"))
+
+# For Oracle: define a schema that can be used to emulate temp tables:
+oracleTempSchema <- NULL
+
+# Details specific to the database:
+outputFolder <- paste0(getwd(),"/results")
+cdmDatabaseSchema <- "cdm_ibm_mdcd_v1023.dbo"
+cohortDatabaseSchema <- "scratch.dbo"
+cohortTable <- "mschuemi_skeleton_mdcd"
+databaseId <- "MDCD"
+databaseName <- "Truven Health MarketScan® Multi-State Medicaid Database"
+databaseDescription <- "Truven Health MarketScan® Multi-State Medicaid Database (MDCD) adjudicated US health insurance claims for Medicaid enrollees from multiple states and includes hospital discharge diagnoses, outpatient diagnoses and procedures, and outpatient pharmacy claims as well as ethnicity and Medicare eligibility. Members maintain their same identifier even if they leave the system for a brief period however the dataset lacks lab data. [For further information link to RWE site for Truven MDCD."
+
+# Use this to run the cohorttDiagnostics. The results will be stored in the diagnosticsExport subfolder of the outputFolder. This can be shared between sites.
+runCohortDiagnostics(connectionDetails = connectionDetails,
+                     cdmDatabaseSchema = cdmDatabaseSchema,
+                     cohortDatabaseSchema = cohortDatabaseSchema,
+                     cohortTable = cohortTable,
+                     oracleTempSchema = oracleTempSchema,
+                     outputFolder = outputFolder,
+                     databaseId = databaseId,
+                     databaseName = databaseName,
+                     databaseDescription = databaseDescription,
+                     createCohorts = TRUE,
+                     runInclusionStatistics = TRUE,
+                     runIncludedSourceConcepts = TRUE,
+                     runOrphanConcepts = TRUE,
+                     runTimeDistributions = TRUE,
+                     runBreakdownIndexEvents = TRUE,
+                     runIncidenceRates = TRUE,
+                     runCohortOverlap = TRUE,
+                     runCohortCharacterization = TRUE,
+                     minCellCount = 5)
+
+# To view the results:
+# Optional: if there are results zip files from multiple sites in a folder, this merges them, which will speed up starting the viewer:
+CohortDiagnostics::preMergeDiagnosticsFiles(file.path(outputFolder, "diagnosticsExport"))
+
+# Use this to view the results. Multiple zip files can be in the same folder. If the files were pre-merged, this is automatically detected: 
+CohortDiagnostics::launchDiagnosticsExplorer(file.path(outputFolder, "diagnosticsExport"))
+
+
+# To explore a specific cohort in the local database, viewing patient profiles:
+CohortDiagnostics::launchCohortExplorer(connectionDetails = connectionDetails,
+                                        cdmDatabaseSchema = cdmDatabaseSchema,
+                                        cohortDatabaseSchema = cohortDatabaseSchema,
+                                        cohortTable = cohortTable,
+                                        cohortId = 123)
+# Where 123 is the ID of the cohort you wish to inspect.
+
+
+###########BC Outcomes #####################
+regimenIngredientsTable <- "hms_cancer_regimen_ingredients"
+deathTable <- "death"
+count_mask <- 10
+
+library(tidyverse)
+#install.packages("lubridate")
+library(lubridate)
+#install.packages("toOrdinal")
+#library(toOrdinal)
+#install.packages("RColorBrewer")
+library(RColorBrewer)
+#install.packages("survival")
+library(survival)
+
+#### Run
+source("extras/regimen_stats.R")
+outputFolder <- paste0(getwd(),"/results/Additional")
+
+write.csv(population_summary, file.path(outputFolder, "population_summary.csv"))
+write.csv(stats_by_line, file.path(outputFolder, "lines_of_treatment.csv"))
+write.csv(regimens_by_treatment_line, file.path(outputFolder, "regimens_by_line.csv"))
+write.csv(yearly_regimens_by_treatment_line, file.path(outputFolder, "yearly_regimens_by_line.csv"))
+write.csv(km_outputs$OS, file.path(outputFolder, "os_km.csv"))
+write.csv(km_outputs$TTNT, file.path(outputFolder, "ttnt_km.csv"))
+write.csv(km_outputs$TTD, file.path(outputFolder, "ttd.csv"))
+
+write.csv(km_outputs$TFI, file.path(outputFolder, "tfi_km.csv"))
+write.csv(ages, file.path(outputFolder, "age.csv"))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Generated by roxygen2: do not edit by hand

		export(runCohortDiagnostics)