-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
11,857 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Package: ohdsiBCnew | ||
Type: Package | ||
Title: A Package Skeleton for Cohort Diagnostics | ||
Version: 0.0.1 | ||
Author: Martijn Schuemie [aut, cre] | ||
Maintainer: Martijn J. Schuemie <[email protected]> | ||
Description: A skeleton package, to be used as a starting point when you just want to run cohort diagnostics. | ||
Depends: | ||
DatabaseConnector (>= 2.2.0) | ||
Imports: | ||
CohortDiagnostics | ||
Suggests: | ||
knitr, | ||
rmarkdown, | ||
ROhdsiWebApi, | ||
OhdsiRTools | ||
License: Apache License 2.0 | ||
VignetteBuilder: knitr | ||
LazyData: TRUE | ||
RoxygenNote: 7.1.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(runCohortDiagnostics) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# Copyright 2020 Observational Health Data Sciences and Informatics | ||
# | ||
# This file is part of ohdsiBCnew | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
#' Execute the cohort diagnostics | ||
#' | ||
#' @details | ||
#' This function executes the cohort diagnostics. | ||
#' | ||
#' @param connectionDetails An object of type \code{connectionDetails} as created using the | ||
#' \code{\link[DatabaseConnector]{createConnectionDetails}} function in the | ||
#' DatabaseConnector package. | ||
#' @param cdmDatabaseSchema Schema name where your patient-level data in OMOP CDM format resides. | ||
#' Note that for SQL Server, this should include both the database and | ||
#' schema name, for example 'cdm_data.dbo'. | ||
#' @param cohortDatabaseSchema Schema name where intermediate data can be stored. You will need to have | ||
#' write priviliges in this schema. Note that for SQL Server, this should | ||
#' include both the database and schema name, for example 'cdm_data.dbo'. | ||
#' @param cohortTable The name of the table that will be created in the work database schema. | ||
#' This table will hold the exposure and outcome cohorts used in this | ||
#' study. | ||
#' @param oracleTempSchema Should be used in Oracle to specify a schema where the user has write | ||
#' priviliges for storing temporary tables. | ||
#' @param outputFolder Name of local folder to place results; make sure to use forward slashes | ||
#' (/). Do not use a folder on a network drive since this greatly impacts | ||
#' performance. | ||
#' @param databaseId A short string for identifying the database (e.g. | ||
#' 'Synpuf'). | ||
#' @param databaseName The full name of the database (e.g. 'Medicare Claims | ||
#' Synthetic Public Use Files (SynPUFs)'). | ||
#' @param databaseDescription A short description (several sentences) of the database. | ||
#' @param createCohorts Create the cohortTable table with the exposure and outcome cohorts? | ||
#' @param runInclusionStatistics Generate and export statistic on the cohort incusion rules? | ||
#' @param runIncludedSourceConcepts Generate and export the source concepts included in the cohorts? | ||
#' @param runOrphanConcepts Generate and export potential orphan concepts? | ||
#' @param runTimeDistributions Generate and export cohort time distributions? | ||
#' @param runBreakdownIndexEvents Generate and export the breakdown of index events? | ||
#' @param runIncidenceRates Generate and export the cohort incidence rates? | ||
#' @param runCohortOverlap Generate and export the cohort overlap? | ||
#' @param runCohortCharacterization Generate and export the cohort characterization? | ||
#' @param minCellCount The minimum number of subjects contributing to a count before it can be included | ||
#' in packaged results. | ||
#' | ||
#' @export | ||
runCohortDiagnostics <- function(connectionDetails, | ||
cdmDatabaseSchema, | ||
cohortDatabaseSchema = cdmDatabaseSchema, | ||
cohortTable = "cohort", | ||
oracleTempSchema = cohortDatabaseSchema, | ||
outputFolder, | ||
databaseId = "Unknown", | ||
databaseName = "Unknown", | ||
databaseDescription = "Unknown", | ||
createCohorts = TRUE, | ||
runInclusionStatistics = TRUE, | ||
runIncludedSourceConcepts = TRUE, | ||
runOrphanConcepts = TRUE, | ||
runTimeDistributions = TRUE, | ||
runBreakdownIndexEvents = TRUE, | ||
runIncidenceRates = TRUE, | ||
runCohortOverlap = TRUE, | ||
runCohortCharacterization = TRUE, | ||
minCellCount = 5) { | ||
if (!file.exists(outputFolder)) | ||
dir.create(outputFolder, recursive = TRUE) | ||
|
||
ParallelLogger::addDefaultFileLogger(file.path(outputFolder, "log.txt")) | ||
ParallelLogger::addDefaultErrorReportLogger(file.path(outputFolder, "errorReportR.txt")) | ||
on.exit(ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE)) | ||
on.exit(ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE), add = TRUE) | ||
|
||
if (createCohorts) { | ||
ParallelLogger::logInfo("Creating cohorts") | ||
connection <- DatabaseConnector::connect(connectionDetails) | ||
.createCohorts(connection = connection, | ||
cdmDatabaseSchema = cdmDatabaseSchema, | ||
cohortDatabaseSchema = cohortDatabaseSchema, | ||
cohortTable = cohortTable, | ||
oracleTempSchema = oracleTempSchema, | ||
outputFolder = outputFolder) | ||
DatabaseConnector::disconnect(connection) | ||
} | ||
|
||
ParallelLogger::logInfo("Running study diagnostics") | ||
CohortDiagnostics::runCohortDiagnostics(packageName = "ohdsiBCnew", | ||
connectionDetails = connectionDetails, | ||
cdmDatabaseSchema = cdmDatabaseSchema, | ||
oracleTempSchema = oracleTempSchema, | ||
cohortDatabaseSchema = cohortDatabaseSchema, | ||
cohortTable = cohortTable, | ||
inclusionStatisticsFolder = outputFolder, | ||
exportFolder = file.path(outputFolder, "diagnosticsExport"), | ||
databaseId = databaseId, | ||
databaseName = databaseName, | ||
databaseDescription = databaseDescription, | ||
runInclusionStatistics = runInclusionStatistics, | ||
runIncludedSourceConcepts = runIncludedSourceConcepts, | ||
runOrphanConcepts = runOrphanConcepts, | ||
runTimeDistributions = runTimeDistributions, | ||
runBreakdownIndexEvents = runBreakdownIndexEvents, | ||
runIncidenceRate = runIncidenceRates, | ||
runCohortOverlap = runCohortOverlap, | ||
runCohortCharacterization = runCohortCharacterization, | ||
minCellCount = minCellCount) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# Copyright 2020 Observational Health Data Sciences and Informatics | ||
# | ||
# This file is part of ohdsiBCnew | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
.createCohorts <- function(connection, | ||
cdmDatabaseSchema, | ||
vocabularyDatabaseSchema = cdmDatabaseSchema, | ||
cohortDatabaseSchema, | ||
cohortTable, | ||
oracleTempSchema, | ||
outputFolder) { | ||
|
||
# Create study cohort table structure: | ||
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql", | ||
packageName = "ohdsiBCnew", | ||
dbms = attr(connection, "dbms"), | ||
oracleTempSchema = oracleTempSchema, | ||
cohort_database_schema = cohortDatabaseSchema, | ||
cohort_table = cohortTable) | ||
DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) | ||
|
||
|
||
# Insert rule names in cohort_inclusion table: | ||
pathToCsv <- system.file("cohorts", "InclusionRules.csv", package = "ohdsiBCnew") | ||
inclusionRules <- readr::read_csv(pathToCsv, col_types = readr::cols()) | ||
inclusionRules <- data.frame(cohort_definition_id = inclusionRules$cohortId, | ||
rule_sequence = inclusionRules$ruleSequence, | ||
name = inclusionRules$ruleName) | ||
DatabaseConnector::insertTable(connection = connection, | ||
tableName = "#cohort_inclusion", | ||
data = inclusionRules, | ||
dropTableIfExists = FALSE, | ||
createTable = FALSE, | ||
tempTable = TRUE, | ||
oracleTempSchema = oracleTempSchema) | ||
|
||
|
||
# Instantiate cohorts: | ||
pathToCsv <- system.file("settings", "CohortsToCreate.csv", package = "ohdsiBCnew") | ||
cohortsToCreate <- readr::read_csv(pathToCsv, col_types = readr::cols()) | ||
for (i in 1:nrow(cohortsToCreate)) { | ||
writeLines(paste("Creating cohort:", cohortsToCreate$name[i])) | ||
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = paste0(cohortsToCreate$name[i], ".sql"), | ||
packageName = "ohdsiBCnew", | ||
dbms = attr(connection, "dbms"), | ||
oracleTempSchema = oracleTempSchema, | ||
cdm_database_schema = cdmDatabaseSchema, | ||
vocabulary_database_schema = vocabularyDatabaseSchema, | ||
|
||
results_database_schema.cohort_inclusion = "#cohort_inclusion", | ||
results_database_schema.cohort_inclusion_result = "#cohort_inc_result", | ||
results_database_schema.cohort_inclusion_stats = "#cohort_inc_stats", | ||
results_database_schema.cohort_summary_stats = "#cohort_summary_stats", | ||
|
||
target_database_schema = cohortDatabaseSchema, | ||
target_cohort_table = cohortTable, | ||
target_cohort_id = cohortsToCreate$cohortId[i]) | ||
DatabaseConnector::executeSql(connection, sql) | ||
} | ||
|
||
# Fetch cohort counts: | ||
sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @cohort_database_schema.@cohort_table GROUP BY cohort_definition_id" | ||
sql <- SqlRender::render(sql, | ||
cohort_database_schema = cohortDatabaseSchema, | ||
cohort_table = cohortTable) | ||
sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms")) | ||
counts <- DatabaseConnector::querySql(connection, sql) | ||
names(counts) <- SqlRender::snakeCaseToCamelCase(names(counts)) | ||
counts <- merge(counts, data.frame(cohortDefinitionId = cohortsToCreate$cohortId, | ||
cohortName = cohortsToCreate$name)) | ||
write.csv(counts, file.path(outputFolder, "CohortCounts.csv")) | ||
|
||
|
||
# Fetch inclusion rule stats and drop tables: | ||
fetchStats <- function(tableName) { | ||
sql <- "SELECT * FROM #@table_name" | ||
sql <- SqlRender::render(sql, table_name = tableName) | ||
sql <- SqlRender::translate(sql = sql, | ||
targetDialect = attr(connection, "dbms"), | ||
oracleTempSchema = oracleTempSchema) | ||
stats <- DatabaseConnector::querySql(connection, sql) | ||
names(stats) <- SqlRender::snakeCaseToCamelCase(names(stats)) | ||
fileName <- file.path(outputFolder, paste0(SqlRender::snakeCaseToCamelCase(tableName), ".csv")) | ||
write.csv(stats, fileName, row.names = FALSE) | ||
|
||
sql <- "TRUNCATE TABLE #@table_name; DROP TABLE #@table_name;" | ||
sql <- SqlRender::render(sql, table_name = tableName) | ||
sql <- SqlRender::translate(sql = sql, | ||
targetDialect = attr(connection, "dbms"), | ||
oracleTempSchema = oracleTempSchema) | ||
DatabaseConnector::executeSql(connection, sql) | ||
} | ||
fetchStats("cohort_inclusion") | ||
fetchStats("cohort_inc_result") | ||
fetchStats("cohort_inc_stats") | ||
fetchStats("cohort_summary_stats") | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# Make sure to install all dependencies (not needed if already done): | ||
install.packages("SqlRender") | ||
install.packages("DatabaseConnector") | ||
install.packages("ggplot2") | ||
install.packages("ParallelLogger") | ||
install.packages("readr") | ||
install.packages("tibble") | ||
install.packages("dplyr") | ||
install.packages("RJSONIO") | ||
install.packages("devtools") | ||
devtools::install_github("FeatureExtraction") | ||
devtools::install_github("ROhdsiWebApi") | ||
devtools::install_github("CohortDiagnostics") | ||
|
||
|
||
# Load the package | ||
library(ohdsiBCnew) | ||
|
||
# Optional: specify where the temporary files will be created: | ||
options(andromedaTempFolder = "s:/andromedaTemp") | ||
|
||
# Maximum number of cores to be used: | ||
maxCores <- parallel::detectCores() | ||
|
||
|
||
# Details for connecting to the server: | ||
connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "pdw", | ||
server = Sys.getenv("PDW_SERVER"), | ||
user = NULL, | ||
password = NULL, | ||
port = Sys.getenv("PDW_PORT")) | ||
|
||
# For Oracle: define a schema that can be used to emulate temp tables: | ||
oracleTempSchema <- NULL | ||
|
||
# Details specific to the database: | ||
outputFolder <- paste0(getwd(),"/results") | ||
cdmDatabaseSchema <- "cdm_ibm_mdcd_v1023.dbo" | ||
cohortDatabaseSchema <- "scratch.dbo" | ||
cohortTable <- "mschuemi_skeleton_mdcd" | ||
databaseId <- "MDCD" | ||
databaseName <- "Truven Health MarketScan® Multi-State Medicaid Database" | ||
databaseDescription <- "Truven Health MarketScan® Multi-State Medicaid Database (MDCD) adjudicated US health insurance claims for Medicaid enrollees from multiple states and includes hospital discharge diagnoses, outpatient diagnoses and procedures, and outpatient pharmacy claims as well as ethnicity and Medicare eligibility. Members maintain their same identifier even if they leave the system for a brief period however the dataset lacks lab data. [For further information link to RWE site for Truven MDCD." | ||
|
||
# Use this to run the cohorttDiagnostics. The results will be stored in the diagnosticsExport subfolder of the outputFolder. This can be shared between sites. | ||
runCohortDiagnostics(connectionDetails = connectionDetails, | ||
cdmDatabaseSchema = cdmDatabaseSchema, | ||
cohortDatabaseSchema = cohortDatabaseSchema, | ||
cohortTable = cohortTable, | ||
oracleTempSchema = oracleTempSchema, | ||
outputFolder = outputFolder, | ||
databaseId = databaseId, | ||
databaseName = databaseName, | ||
databaseDescription = databaseDescription, | ||
createCohorts = TRUE, | ||
runInclusionStatistics = TRUE, | ||
runIncludedSourceConcepts = TRUE, | ||
runOrphanConcepts = TRUE, | ||
runTimeDistributions = TRUE, | ||
runBreakdownIndexEvents = TRUE, | ||
runIncidenceRates = TRUE, | ||
runCohortOverlap = TRUE, | ||
runCohortCharacterization = TRUE, | ||
minCellCount = 5) | ||
|
||
# To view the results: | ||
# Optional: if there are results zip files from multiple sites in a folder, this merges them, which will speed up starting the viewer: | ||
CohortDiagnostics::preMergeDiagnosticsFiles(file.path(outputFolder, "diagnosticsExport")) | ||
|
||
# Use this to view the results. Multiple zip files can be in the same folder. If the files were pre-merged, this is automatically detected: | ||
CohortDiagnostics::launchDiagnosticsExplorer(file.path(outputFolder, "diagnosticsExport")) | ||
|
||
|
||
# To explore a specific cohort in the local database, viewing patient profiles: | ||
CohortDiagnostics::launchCohortExplorer(connectionDetails = connectionDetails, | ||
cdmDatabaseSchema = cdmDatabaseSchema, | ||
cohortDatabaseSchema = cohortDatabaseSchema, | ||
cohortTable = cohortTable, | ||
cohortId = 123) | ||
# Where 123 is the ID of the cohort you wish to inspect. | ||
|
||
|
||
###########BC Outcomes ##################### | ||
regimenIngredientsTable <- "hms_cancer_regimen_ingredients" | ||
deathTable <- "death" | ||
count_mask <- 10 | ||
|
||
library(tidyverse) | ||
#install.packages("lubridate") | ||
library(lubridate) | ||
#install.packages("toOrdinal") | ||
#library(toOrdinal) | ||
#install.packages("RColorBrewer") | ||
library(RColorBrewer) | ||
#install.packages("survival") | ||
library(survival) | ||
|
||
#### Run | ||
source("extras/regimen_stats.R") | ||
outputFolder <- paste0(getwd(),"/results/Additional") | ||
|
||
write.csv(population_summary, file.path(outputFolder, "population_summary.csv")) | ||
write.csv(stats_by_line, file.path(outputFolder, "lines_of_treatment.csv")) | ||
write.csv(regimens_by_treatment_line, file.path(outputFolder, "regimens_by_line.csv")) | ||
write.csv(yearly_regimens_by_treatment_line, file.path(outputFolder, "yearly_regimens_by_line.csv")) | ||
write.csv(km_outputs$OS, file.path(outputFolder, "os_km.csv")) | ||
write.csv(km_outputs$TTNT, file.path(outputFolder, "ttnt_km.csv")) | ||
write.csv(km_outputs$TTD, file.path(outputFolder, "ttd.csv")) | ||
|
||
write.csv(km_outputs$TFI, file.path(outputFolder, "tfi_km.csv")) | ||
write.csv(ages, file.path(outputFolder, "age.csv")) |
Oops, something went wrong.