Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
sratwani authored Mar 5, 2021
1 parent ee72c48 commit a94bf4e
Show file tree
Hide file tree
Showing 30 changed files with 11,857 additions and 0 deletions.
20 changes: 20 additions & 0 deletions ohdsiBCv5/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Package: ohdsiBCnew
Type: Package
Title: A Package Skeleton for Cohort Diagnostics
Version: 0.0.1
Author: Martijn Schuemie [aut, cre]
Maintainer: Martijn J. Schuemie <[email protected]>
Description: A skeleton package, to be used as a starting point when you just want to run cohort diagnostics.
Depends:
DatabaseConnector (>= 2.2.0)
Imports:
CohortDiagnostics
Suggests:
knitr,
rmarkdown,
ROhdsiWebApi,
OhdsiRTools
License: Apache License 2.0
VignetteBuilder: knitr
LazyData: TRUE
RoxygenNote: 7.1.1
3 changes: 3 additions & 0 deletions ohdsiBCv5/NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Generated by roxygen2: do not edit by hand

export(runCohortDiagnostics)
117 changes: 117 additions & 0 deletions ohdsiBCv5/R/CohortDiagnostics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of ohdsiBCnew
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Execute the cohort diagnostics
#'
#' @details
#' This function executes the cohort diagnostics.
#'
#' @param connectionDetails An object of type \code{connectionDetails} as created using the
#' \code{\link[DatabaseConnector]{createConnectionDetails}} function in the
#' DatabaseConnector package.
#' @param cdmDatabaseSchema Schema name where your patient-level data in OMOP CDM format resides.
#' Note that for SQL Server, this should include both the database and
#' schema name, for example 'cdm_data.dbo'.
#' @param cohortDatabaseSchema Schema name where intermediate data can be stored. You will need to have
#' write priviliges in this schema. Note that for SQL Server, this should
#' include both the database and schema name, for example 'cdm_data.dbo'.
#' @param cohortTable The name of the table that will be created in the work database schema.
#' This table will hold the exposure and outcome cohorts used in this
#' study.
#' @param oracleTempSchema Should be used in Oracle to specify a schema where the user has write
#' priviliges for storing temporary tables.
#' @param outputFolder Name of local folder to place results; make sure to use forward slashes
#' (/). Do not use a folder on a network drive since this greatly impacts
#' performance.
#' @param databaseId A short string for identifying the database (e.g.
#' 'Synpuf').
#' @param databaseName The full name of the database (e.g. 'Medicare Claims
#' Synthetic Public Use Files (SynPUFs)').
#' @param databaseDescription A short description (several sentences) of the database.
#' @param createCohorts Create the cohortTable table with the exposure and outcome cohorts?
#' @param runInclusionStatistics Generate and export statistic on the cohort incusion rules?
#' @param runIncludedSourceConcepts Generate and export the source concepts included in the cohorts?
#' @param runOrphanConcepts Generate and export potential orphan concepts?
#' @param runTimeDistributions Generate and export cohort time distributions?
#' @param runBreakdownIndexEvents Generate and export the breakdown of index events?
#' @param runIncidenceRates Generate and export the cohort incidence rates?
#' @param runCohortOverlap Generate and export the cohort overlap?
#' @param runCohortCharacterization Generate and export the cohort characterization?
#' @param minCellCount The minimum number of subjects contributing to a count before it can be included
#' in packaged results.
#'
#' @export
runCohortDiagnostics <- function(connectionDetails,
cdmDatabaseSchema,
cohortDatabaseSchema = cdmDatabaseSchema,
cohortTable = "cohort",
oracleTempSchema = cohortDatabaseSchema,
outputFolder,
databaseId = "Unknown",
databaseName = "Unknown",
databaseDescription = "Unknown",
createCohorts = TRUE,
runInclusionStatistics = TRUE,
runIncludedSourceConcepts = TRUE,
runOrphanConcepts = TRUE,
runTimeDistributions = TRUE,
runBreakdownIndexEvents = TRUE,
runIncidenceRates = TRUE,
runCohortOverlap = TRUE,
runCohortCharacterization = TRUE,
minCellCount = 5) {
if (!file.exists(outputFolder))
dir.create(outputFolder, recursive = TRUE)

ParallelLogger::addDefaultFileLogger(file.path(outputFolder, "log.txt"))
ParallelLogger::addDefaultErrorReportLogger(file.path(outputFolder, "errorReportR.txt"))
on.exit(ParallelLogger::unregisterLogger("DEFAULT_FILE_LOGGER", silent = TRUE))
on.exit(ParallelLogger::unregisterLogger("DEFAULT_ERRORREPORT_LOGGER", silent = TRUE), add = TRUE)

if (createCohorts) {
ParallelLogger::logInfo("Creating cohorts")
connection <- DatabaseConnector::connect(connectionDetails)
.createCohorts(connection = connection,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
oracleTempSchema = oracleTempSchema,
outputFolder = outputFolder)
DatabaseConnector::disconnect(connection)
}

ParallelLogger::logInfo("Running study diagnostics")
CohortDiagnostics::runCohortDiagnostics(packageName = "ohdsiBCnew",
connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
oracleTempSchema = oracleTempSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
inclusionStatisticsFolder = outputFolder,
exportFolder = file.path(outputFolder, "diagnosticsExport"),
databaseId = databaseId,
databaseName = databaseName,
databaseDescription = databaseDescription,
runInclusionStatistics = runInclusionStatistics,
runIncludedSourceConcepts = runIncludedSourceConcepts,
runOrphanConcepts = runOrphanConcepts,
runTimeDistributions = runTimeDistributions,
runBreakdownIndexEvents = runBreakdownIndexEvents,
runIncidenceRate = runIncidenceRates,
runCohortOverlap = runCohortOverlap,
runCohortCharacterization = runCohortCharacterization,
minCellCount = minCellCount)
}
111 changes: 111 additions & 0 deletions ohdsiBCv5/R/CreateCohorts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of ohdsiBCnew
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

.createCohorts <- function(connection,
cdmDatabaseSchema,
vocabularyDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema,
cohortTable,
oracleTempSchema,
outputFolder) {

# Create study cohort table structure:
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql",
packageName = "ohdsiBCnew",
dbms = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema,
cohort_database_schema = cohortDatabaseSchema,
cohort_table = cohortTable)
DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE)


# Insert rule names in cohort_inclusion table:
pathToCsv <- system.file("cohorts", "InclusionRules.csv", package = "ohdsiBCnew")
inclusionRules <- readr::read_csv(pathToCsv, col_types = readr::cols())
inclusionRules <- data.frame(cohort_definition_id = inclusionRules$cohortId,
rule_sequence = inclusionRules$ruleSequence,
name = inclusionRules$ruleName)
DatabaseConnector::insertTable(connection = connection,
tableName = "#cohort_inclusion",
data = inclusionRules,
dropTableIfExists = FALSE,
createTable = FALSE,
tempTable = TRUE,
oracleTempSchema = oracleTempSchema)


# Instantiate cohorts:
pathToCsv <- system.file("settings", "CohortsToCreate.csv", package = "ohdsiBCnew")
cohortsToCreate <- readr::read_csv(pathToCsv, col_types = readr::cols())
for (i in 1:nrow(cohortsToCreate)) {
writeLines(paste("Creating cohort:", cohortsToCreate$name[i]))
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = paste0(cohortsToCreate$name[i], ".sql"),
packageName = "ohdsiBCnew",
dbms = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema,
cdm_database_schema = cdmDatabaseSchema,
vocabulary_database_schema = vocabularyDatabaseSchema,

results_database_schema.cohort_inclusion = "#cohort_inclusion",
results_database_schema.cohort_inclusion_result = "#cohort_inc_result",
results_database_schema.cohort_inclusion_stats = "#cohort_inc_stats",
results_database_schema.cohort_summary_stats = "#cohort_summary_stats",

target_database_schema = cohortDatabaseSchema,
target_cohort_table = cohortTable,
target_cohort_id = cohortsToCreate$cohortId[i])
DatabaseConnector::executeSql(connection, sql)
}

# Fetch cohort counts:
sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @cohort_database_schema.@cohort_table GROUP BY cohort_definition_id"
sql <- SqlRender::render(sql,
cohort_database_schema = cohortDatabaseSchema,
cohort_table = cohortTable)
sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms"))
counts <- DatabaseConnector::querySql(connection, sql)
names(counts) <- SqlRender::snakeCaseToCamelCase(names(counts))
counts <- merge(counts, data.frame(cohortDefinitionId = cohortsToCreate$cohortId,
cohortName = cohortsToCreate$name))
write.csv(counts, file.path(outputFolder, "CohortCounts.csv"))


# Fetch inclusion rule stats and drop tables:
fetchStats <- function(tableName) {
sql <- "SELECT * FROM #@table_name"
sql <- SqlRender::render(sql, table_name = tableName)
sql <- SqlRender::translate(sql = sql,
targetDialect = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema)
stats <- DatabaseConnector::querySql(connection, sql)
names(stats) <- SqlRender::snakeCaseToCamelCase(names(stats))
fileName <- file.path(outputFolder, paste0(SqlRender::snakeCaseToCamelCase(tableName), ".csv"))
write.csv(stats, fileName, row.names = FALSE)

sql <- "TRUNCATE TABLE #@table_name; DROP TABLE #@table_name;"
sql <- SqlRender::render(sql, table_name = tableName)
sql <- SqlRender::translate(sql = sql,
targetDialect = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema)
DatabaseConnector::executeSql(connection, sql)
}
fetchStats("cohort_inclusion")
fetchStats("cohort_inc_result")
fetchStats("cohort_inc_stats")
fetchStats("cohort_summary_stats")

}

111 changes: 111 additions & 0 deletions ohdsiBCv5/extras/CodeToRun.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Make sure to install all dependencies (not needed if already done):
install.packages("SqlRender")
install.packages("DatabaseConnector")
install.packages("ggplot2")
install.packages("ParallelLogger")
install.packages("readr")
install.packages("tibble")
install.packages("dplyr")
install.packages("RJSONIO")
install.packages("devtools")
devtools::install_github("FeatureExtraction")
devtools::install_github("ROhdsiWebApi")
devtools::install_github("CohortDiagnostics")


# Load the package
library(ohdsiBCnew)

# Optional: specify where the temporary files will be created:
options(andromedaTempFolder = "s:/andromedaTemp")

# Maximum number of cores to be used:
maxCores <- parallel::detectCores()


# Details for connecting to the server:
connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "pdw",
server = Sys.getenv("PDW_SERVER"),
user = NULL,
password = NULL,
port = Sys.getenv("PDW_PORT"))

# For Oracle: define a schema that can be used to emulate temp tables:
oracleTempSchema <- NULL

# Details specific to the database:
outputFolder <- paste0(getwd(),"/results")
cdmDatabaseSchema <- "cdm_ibm_mdcd_v1023.dbo"
cohortDatabaseSchema <- "scratch.dbo"
cohortTable <- "mschuemi_skeleton_mdcd"
databaseId <- "MDCD"
databaseName <- "Truven Health MarketScan® Multi-State Medicaid Database"
databaseDescription <- "Truven Health MarketScan® Multi-State Medicaid Database (MDCD) adjudicated US health insurance claims for Medicaid enrollees from multiple states and includes hospital discharge diagnoses, outpatient diagnoses and procedures, and outpatient pharmacy claims as well as ethnicity and Medicare eligibility. Members maintain their same identifier even if they leave the system for a brief period however the dataset lacks lab data. [For further information link to RWE site for Truven MDCD."

# Use this to run the cohorttDiagnostics. The results will be stored in the diagnosticsExport subfolder of the outputFolder. This can be shared between sites.
runCohortDiagnostics(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
oracleTempSchema = oracleTempSchema,
outputFolder = outputFolder,
databaseId = databaseId,
databaseName = databaseName,
databaseDescription = databaseDescription,
createCohorts = TRUE,
runInclusionStatistics = TRUE,
runIncludedSourceConcepts = TRUE,
runOrphanConcepts = TRUE,
runTimeDistributions = TRUE,
runBreakdownIndexEvents = TRUE,
runIncidenceRates = TRUE,
runCohortOverlap = TRUE,
runCohortCharacterization = TRUE,
minCellCount = 5)

# To view the results:
# Optional: if there are results zip files from multiple sites in a folder, this merges them, which will speed up starting the viewer:
CohortDiagnostics::preMergeDiagnosticsFiles(file.path(outputFolder, "diagnosticsExport"))

# Use this to view the results. Multiple zip files can be in the same folder. If the files were pre-merged, this is automatically detected:
CohortDiagnostics::launchDiagnosticsExplorer(file.path(outputFolder, "diagnosticsExport"))


# To explore a specific cohort in the local database, viewing patient profiles:
CohortDiagnostics::launchCohortExplorer(connectionDetails = connectionDetails,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
cohortId = 123)
# Where 123 is the ID of the cohort you wish to inspect.


###########BC Outcomes #####################
regimenIngredientsTable <- "hms_cancer_regimen_ingredients"
deathTable <- "death"
count_mask <- 10

library(tidyverse)
#install.packages("lubridate")
library(lubridate)
#install.packages("toOrdinal")
#library(toOrdinal)
#install.packages("RColorBrewer")
library(RColorBrewer)
#install.packages("survival")
library(survival)

#### Run
source("extras/regimen_stats.R")
outputFolder <- paste0(getwd(),"/results/Additional")

write.csv(population_summary, file.path(outputFolder, "population_summary.csv"))
write.csv(stats_by_line, file.path(outputFolder, "lines_of_treatment.csv"))
write.csv(regimens_by_treatment_line, file.path(outputFolder, "regimens_by_line.csv"))
write.csv(yearly_regimens_by_treatment_line, file.path(outputFolder, "yearly_regimens_by_line.csv"))
write.csv(km_outputs$OS, file.path(outputFolder, "os_km.csv"))
write.csv(km_outputs$TTNT, file.path(outputFolder, "ttnt_km.csv"))
write.csv(km_outputs$TTD, file.path(outputFolder, "ttd.csv"))

write.csv(km_outputs$TFI, file.path(outputFolder, "tfi_km.csv"))
write.csv(ages, file.path(outputFolder, "age.csv"))
Loading

0 comments on commit a94bf4e

Please sign in to comment.