diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 6b0cd802..d030c248 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -46,13 +46,13 @@ jobs: arch: "arm64" # - os: "windows-latest" # julia-version: '1' - # shell: "pwsh" + # shell: "msys2" # compiler: "g++" # arch: "x64" defaults: run: - shell: ${{ matrix.shell }} + shell: ${{ matrix.shell }} {0} steps: - name: Checkout repository @@ -65,28 +65,53 @@ jobs: brew install ffmpeg - name: Install MSYS2 on Windows - if: matrix.os.name == 'windows-latest' + if: matrix.os == 'windows-latest' uses: msys2/setup-msys2@v2 with: update: true install: mingw-w64-x86_64-binutils mingw-w64-x86_64-gcc mingw-w64-x86_64-headers-git mingw-w64-x86_64-gcc-libs mingw-w64-x86_64-libwinpthread-git mingw-w64-x86_64-lapack mingw-w64-x86_64-openblas mingw-w64-x86_64-libxml2 mingw-w64-x86_64-bzip2 mingw-w64-x86_64-python mingw-w64-x86_64-python-zstandard mingw-w64-x86_64-python-cffi make bison flex mingw-w64-x86_64-ca-certificates mingw-w64-x86_64-diffutils + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.julia-version }} arch: ${{ matrix.arch }} + + - name: Add Julia to PATH on Windows + if: matrix.os == 'windows-latest' + run: echo "C:\\hostedtoolcache\\windows\\julia\\${{ matrix.julia-version }}\\${{ matrix.arch }}\\bin" >> $GITHUB_PATH + - name: Set environment variable PHYSICELL_CPP run: echo "PHYSICELL_CPP=${{ matrix.compiler }}" >> $GITHUB_ENV + - uses: julia-actions/cache@v2 - - name: Add PCVCTRegistry + + - name: Add PCVCTRegistry (Windows) + if: matrix.os == 'windows-latest' + shell: pwsh run: julia -e 'import Pkg; Pkg.Registry.add("General"); Pkg.Registry.add(Pkg.RegistrySpec(url="https://github.com/drbergman/PCVCTRegistry.git"))' + + - name: Add PCVCTRegistry (non-Windows) + if: matrix.os != 'windows-latest' + run: julia -e 'import Pkg; Pkg.Registry.add("General"); Pkg.Registry.add(Pkg.RegistrySpec(url="https://github.com/drbergman/PCVCTRegistry.git"))' + - uses: julia-actions/julia-buildpkg@v1 + + - name: Install libRoadRunner dependencies on Ubuntu + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + wget http://security.ubuntu.com/ubuntu/pool/universe/n/ncurses/libtinfo5_6.3-2ubuntu0.1_amd64.deb + sudo apt-get install -y ./libtinfo5_6.3-2ubuntu0.1_amd64.deb + - name: Run all tests uses: julia-actions/julia-runtest@v1 env: PCVCT_NUM_PARALLEL_SIMS: 8 PHYSICELL_CPP: ${{ matrix.compiler }} # maybe necessary for windows?? PCVCT_PUBLIC_REPO_AUTH: ${{ secrets.PUBLIC_REPO_AUTH }} + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v5 with: files: lcov.info @@ -102,23 +127,30 @@ jobs: statuses: write steps: - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 with: version: '1' + - uses: julia-actions/cache@v2 + - name: Add PCVCTRegistry run: julia -e 'import Pkg; Pkg.Registry.add("General"); Pkg.Registry.add(Pkg.RegistrySpec(url="https://github.com/drbergman/PCVCTRegistry.git"))' + - name: Configure doc environment shell: julia --project=docs --color=yes {0} run: | using Pkg Pkg.develop(PackageSpec(path=pwd())) Pkg.instantiate() + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-docdeploy@v1 env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} + - name: Run doctests shell: julia --project=docs --color=yes {0} run: | diff --git a/Project.toml b/Project.toml index 519f3e08..f4d18031 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,10 @@ name = "pcvct" uuid = "3c374bc7-7384-4f83-8ca0-87b8c727e6ff" authors = ["Daniel Bergman and contributors"] -version = "0.0.15" +version = "0.0.16" [deps] +AutoHashEquals = "15f4f7f2-30c1-5605-9d31-71845cf9641f" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" @@ -28,9 +29,11 @@ RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9" Sobol = "ed01d8cd-4d21-5b2a-85b4-cc3bdc58bad4" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] +AutoHashEquals = "2.2.0" CSV = "0.10" DataFrames = "1" Distributions = "0.25" @@ -53,6 +56,7 @@ RecipesBase = "1.3.4" SQLite = "1" Sobol = "1" Statistics = "1" +TOML = "1.0.3" Tables = "1" julia = "1.6.7" diff --git a/docs/make.jl b/docs/make.jl index d745bd3e..b1967add 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -18,9 +18,11 @@ makedocs(; "Getting started" => "man/getting_started.md", "CoVariations" => "man/covariations.md", "Data directory" => "man/data_directory.md", + "Intracellular inputs" => "man/intracellular_inputs.md", "Known limitations" => "man/known_limitations.md", "PhysiCell Studio" => "man/physicell_studio.md", "Sensitivity analysis" => "man/sensitivity_analysis.md", + "Developer guide" => "man/developer_guide.md", ], "Documentation" => map( s -> "lib/$(s)", diff --git a/docs/src/lib/VCTComponents.md b/docs/src/lib/VCTComponents.md new file mode 100644 index 00000000..2f14f3c9 --- /dev/null +++ b/docs/src/lib/VCTComponents.md @@ -0,0 +1,14 @@ +```@meta +CollapsedDocStrings = true +``` + +# VCTComponents + +Allows for combining PhysiCell input components into whole inputs. + +Currently, only supports this for intracellular ODE (libRoadRunner) models. + +```@autodocs +Modules = [pcvct] +Pages = ["VCTComponents.jl"] +``` \ No newline at end of file diff --git a/docs/src/lib/VCTConfiguration.md b/docs/src/lib/VCTConfiguration.md index 5b88647c..0ba1c1a6 100644 --- a/docs/src/lib/VCTConfiguration.md +++ b/docs/src/lib/VCTConfiguration.md @@ -6,7 +6,7 @@ CollapsedDocStrings = true Interface with the configuration file necessary for PhysiCell simulations. -Provide functionality for accessing and modifying elements in any XML, including the PhysiCell configuration file, XML rules file, and XML IC cell file. +Provide functionality for accessing and modifying elements in any XML, including the PhysiCell configuration file, XML rules file, combined intracellular XML file, XML IC cell file, and XML IC ECM file. ```@autodocs Modules = [pcvct] diff --git a/docs/src/man/data_directory.md b/docs/src/man/data_directory.md index 334f3e44..3abc24f8 100644 --- a/docs/src/man/data_directory.md +++ b/docs/src/man/data_directory.md @@ -13,6 +13,7 @@ project-dir/ │ │ ├── dcs/ │ │ ├── ecms/ │ │ └── substrates/ +│ ├── intracellulars/ │ ├── rulesets_collections/ ... ``` @@ -31,6 +32,23 @@ Add within `data/inputs/custom_codes/default/` the following, each exactly as is - `Makefile` - `custom_modules/` +## Rulesets collections + +Add a single file within `data/inputs/rulesets_collections/default/` called `base_rulesets.csv` with the base ruleset collection for your PhysiCell project. +If your project does not use rules, you can skip this step. + +You may also place an XML file here. Use [PhysiCellXMLRules.jl](https://github.com/drbergman/PhysiCellXMLRules.jl) to create one from a standard CSV version of the rules. + +**Important**: In either case, the variations you define *must* be on the XML version. +After calling `initializeModelManager()`, any folder with `base_rulesets.csv` will now be populated with a `base_rulesets.xml` file that can be reference to set the XML paths. + +## Intracellulars + +Add a single XML file within `data/inputs/intracellulars/default/` called `intracellular.xml` in which the root has two child elements: `cell_definitions` and `intracellulars`. +This currently only supports libRoadRunner, i.e., ODEs. +See the `sample_projects_intracellular/combined/template-combined` for an example. +See [Intracellular inputs](@ref) for much more information. + ## ICs These folders are optional as not every model includes initial conditions as separate files. @@ -47,7 +65,14 @@ cells/ Proceed similarly for `dcs/`, `ecms/`, and `substrates/`, renaming those files to `dcs.csv`, `ecm.csv`, and `substrates.csv`, respectively. -## Rulesets collections +### IC cells -Add a single file within `data/inputs/rulesets_collections/default/` called `base_rulesets.csv` with the base ruleset collection for your PhysiCell project. -If your project does not use rules, you can skip this step. \ No newline at end of file +pcvct uses [PhysiCellCellCreator.jl](https://github.com/drbergman/PhysiCellCellCreator.jl) to allow for creation of `cells.csv` files based on geometries defined in a `cells.xml` file. +To use this, first create such an XML document (see [PhysiCellCellCreator.jl](https://github.com/drbergman/PhysiCellCellCreator.jl) for details) and place this in place of the `cells.csv` file. +You may make variations on this in the same was as for `config` and `rulesets_collection`. + +### IC ecm + +pcvct uses [PhysiCellECMCreator.jl](https://github.com/drbergman/PhysiCellECMCreator.jl) to allow for creation of `ecm.csv` files based on the structure defined in a `ecm.xml` file. +To use this, first create such an XML document (see [PhysiCellECMCreator.jl](https://github.com/drbergman/PhysiCellECMCreator.jl) for details) and place this in place of the `ecm.csv` file. +You may make variations on this in the same was as for `config` and `rulesets_collection`. \ No newline at end of file diff --git a/docs/src/man/developer_guide.md b/docs/src/man/developer_guide.md new file mode 100644 index 00000000..63def33e --- /dev/null +++ b/docs/src/man/developer_guide.md @@ -0,0 +1,6 @@ +# Developer guide + +## Style guide +- Use `#!` for comments that are informative + - This helps find code lines commented out in development. + - Using the regexp `^(\s+)?# .+\n` seems to work well for finding commented out code lines. \ No newline at end of file diff --git a/docs/src/man/intracellular_inputs.md b/docs/src/man/intracellular_inputs.md new file mode 100644 index 00000000..3335f250 --- /dev/null +++ b/docs/src/man/intracellular_inputs.md @@ -0,0 +1,31 @@ +# Intracellular inputs + +pcvct currently only supports ODE intracellular models using libRoadRunner. +It uses a specialized format to achieve this, creating the SBML files needed by libRoadRunner at PhysiCell runtime. +Briefly, the `intracellular.xml` file defines a mapping between cell definitions and intracellular models. +See the template provided [here](https://github.com/drbergman/PhysiCell/blob/my-physicell/sample_projects_intracellular/combined/template-combined/config/sample_combined_sbmls.xml). + +To facilitate creation of such files, and to make it easy to mix-and-match intracellular models, users can place the SBML files that define the ODEs into `data/components/roadrunner` and then simply reference those to construct the specialized XMLs needed. +For example, place the `Toy_Metabolic_Model.xml` from [sample_projects_intracellular/ode/ode_energy/config/](https://github.com/drbergman/PhysiCell/blob/my-physicell/sample_projects_intracellular/ode/ode_energy/config) into `data/components/roadrunner` and assemble the XML as follows + +```julia +cell_type = "default" # name of the cell type using this intracellular model +component = PhysiCellComponent("roadrunner", "Toy_Metabolic_Model.xml") # pass in the type of the component and the name of the file to use +cell_type_to_component = Dict{String, PhysiCellComponent}(cell_type => component) # add other entries to this Dict for other cell types using an intracellular model +intracellular_folder = assembleIntracellular!(cell_type_to_component; name="toy_metabolic") # will return "toy_metabolic" or "toy_metabolic_n" +``` + +This creates a folder at `data/inputs/intracellulars/` with the name stored in `intracellular_folder`. +Also, the `!` in `assembleIntracellular!` references how the components in the `cell_type_to_component` `Dict` are updated to match those in `data/inputs/intracellulars/$(intracellular_folder)/intracellular.xml`. +Use these IDs to make variations on the components by using + +```julia +xml_path = ["intracellulars", "intracellular:ID:$(component.id)", ...] +``` + +where the `...` is the path starting with the root of the XML file (`sbml` for SBML files). + +Finally, pass this folder into `InputFolders` to use this input in simulation runs: +```julia +inputs = InputFolders(...; ..., intracellular=intracellular_folder, ...) +``` \ No newline at end of file diff --git a/docs/src/man/known_limitations.md b/docs/src/man/known_limitations.md index e1eb7854..b9bae5a8 100644 --- a/docs/src/man/known_limitations.md +++ b/docs/src/man/known_limitations.md @@ -7,4 +7,8 @@ If you do need an upper bound on the number of simulations in such a grouping, s It is assumed that most, if not all use cases, will benefit from more simulations. ## Initial conditions not loaded when launching PhysiCell Studio for a simulation. -When launching PhysiCell Studio from pcvct, the initial conditions (cells and substrates) are not loaded. \ No newline at end of file +When launching PhysiCell Studio from pcvct, the initial conditions (cells and substrates) are not loaded. + +## Limited intracellular models +Currently only supports ODE intracellular models (using libRoadRunner). +Does not support MaBoSS or dFBA. \ No newline at end of file diff --git a/src/VCTAnalysis/motility.jl b/src/VCTAnalysis/motility.jl index a84c3e28..d1caa0d0 100644 --- a/src/VCTAnalysis/motility.jl +++ b/src/VCTAnalysis/motility.jl @@ -34,14 +34,14 @@ function meanSpeed(p; direction=:any)::NTuple{3,Dict{String,Float64}} distance_dict = Dict{String, Float64}(zip(cell_type_names, zeros(Float64, length(cell_type_names)))) time_dict = Dict{String, Float64}(zip(cell_type_names, zeros(Float64, length(cell_type_names)))) while start_ind <= length(type_change) - I = findfirst(type_change[start_ind:end]) # from s to I, cell_type_name is constant. at I+1 it changes - I = isnothing(I) ? length(type_change)+2-start_ind : I # if the cell_type_name is constant till the end, set I to be at the end - # If start_ind = 1 (at start of sim) and I = 2 (so cell_type_name[3] != cell_type_name[2], meaning that for steps [1,2] cell_type_name is constnat), only use dx in stepping from 1->2 since somewhere in 2->3 the type changes. That is, use dx[1] - distance_dict[cell_type_name[start_ind]] += sum(dist_fn.(dx[start_ind:I-1], dy[start_ind:I-1], dz[start_ind:I-1])) # only count distance travelled while remaining in the initial cell_type_name - time_dict[cell_type_name[start_ind]] += p.time[start_ind+I-1] - p.time[start_ind] # record time spent in this cell_type_name (note p.time is not diffs like dx and dy are, hence the difference in indices) - start_ind += I # advance the start to the first instance of a new cell_type_name + I = findfirst(type_change[start_ind:end]) #! from s to I, cell_type_name is constant. at I+1 it changes + I = isnothing(I) ? length(type_change)+2-start_ind : I #! if the cell_type_name is constant till the end, set I to be at the end + #! If start_ind = 1 (at start of sim) and I = 2 (so cell_type_name[3] != cell_type_name[2], meaning that for steps [1,2] cell_type_name is constnat), only use dx in stepping from 1->2 since somewhere in 2->3 the type changes. That is, use dx[1] + distance_dict[cell_type_name[start_ind]] += sum(dist_fn.(dx[start_ind:I-1], dy[start_ind:I-1], dz[start_ind:I-1])) #! only count distance travelled while remaining in the initial cell_type_name + time_dict[cell_type_name[start_ind]] += p.time[start_ind+I-1] - p.time[start_ind] #! record time spent in this cell_type_name (note p.time is not diffs like dx and dy are, hence the difference in indices) + start_ind += I #! advance the start to the first instance of a new cell_type_name end - speed_dict = [k => distance_dict[k] / time_dict[k] for k in cell_type_names] |> Dict{String,Float64} # convert to speed + speed_dict = [k => distance_dict[k] / time_dict[k] for k in cell_type_names] |> Dict{String,Float64} #! convert to speed return speed_dict, distance_dict, time_dict end diff --git a/src/VCTAnalysis/population.jl b/src/VCTAnalysis/population.jl index 8fc0d991..e24a9f52 100644 --- a/src/VCTAnalysis/population.jl +++ b/src/VCTAnalysis/population.jl @@ -163,7 +163,7 @@ function MonadPopulationTimeSeries(monad::Monad; include_dead::Bool=false) @assert time == spts.time "Simulations $(simulation_ids[1]) and $(simulation_id) in monad $(monad.id) have different times in their time series." end for (name, cell_count) in pairs(spts.cell_count) - if !(name in keys(cell_count_arrays)) + if !haskey(cell_count_arrays, name) cell_count_arrays[name] = zeros(Int, length(time), monad_length) end cell_count_arrays[name][:,i] = cell_count @@ -195,18 +195,18 @@ function populationTimeSeries(M::AbstractMonad; include_dead::Bool=false) end end -# plot recipes +#! plot recipes getMeanCounts(s::SimulationPopulationTimeSeries) = s.cell_count getMeanCounts(m::MonadPopulationTimeSeries) = m.cell_count_means @recipe function f(M::AbstractMonad; include_dead=false, include_cell_types=:all, exclude_cell_types=String[]) pts = populationTimeSeries(M; include_dead=include_dead) - # allow for single string input for either of these + #! allow for single string input for either of these include_cell_types = include_cell_types == :all ? :all : (include_cell_types isa String ? [include_cell_types] : include_cell_types) exclude_cell_types = exclude_cell_types isa String ? [exclude_cell_types] : exclude_cell_types for (name, counts) in pairs(getMeanCounts(pts)) - skip = include_cell_types != :all && !(name in include_cell_types) # skip this cell type as only a subset was requested and this was not in it - skip = skip || name in exclude_cell_types # skip this cell type as it was requested to be excluded + skip = include_cell_types != :all && !(name in include_cell_types) #! skip this cell type as only a subset was requested and this was not in it + skip = skip || name in exclude_cell_types #! skip this cell type as it was requested to be excluded if skip continue end @@ -232,15 +232,15 @@ end sim_id = monad.simulation_ids[1] row_ind = findfirst(df[!, :SimID] .== sim_id) row = df[row_ind, :] - title_tuple = [row[name] for name in names(row) if !(name in ["SimID", "ConfigVarID", "RulesVarID"])] + title_tuple = [row[name] for name in names(row) if !(name in ["SimID"; shortLocationVariationID.(String, project_locations.varied)])] push!(title_tuples, title_tuple) end - + order = sortperm(title_tuples) title_tuples = title_tuples[order] monads = monads[order] - layout --> (length(monads), 1) # easy room for improvement here + layout --> (length(monads), 1) #! easy room for improvement here for (i, (monad, title_tuple)) in enumerate(zip(monads, title_tuples)) @series begin @@ -306,7 +306,7 @@ end end if cell_types == :all for (name, cell_count) in pairs(spts.cell_count) - if !(name in keys(cell_count_arrays)) + if !haskey(cell_count_arrays, name) cell_count_arrays[name] = zeros(Int, length(time), monad_length) end cell_count_arrays[name][:,i] = cell_count @@ -319,7 +319,7 @@ end if cell_type isa String cell_type = [cell_type] end - if !(cell_type in keys(cell_count_arrays)) + if !haskey(cell_count_arrays, cell_type) cell_count_arrays[cell_type] = zeros(Int, length(time), monad_length) end cell_count_arrays[cell_type][:,i] = sum([spts.cell_count[ct] for ct in cell_type]) @@ -336,7 +336,7 @@ end monad_summary[monad.id] = (time=time, cell_count_means=cell_count_means, cell_count_stds=cell_count_stds) end - layout --> (length(all_cell_types), 1) # easy room for improvement here + layout --> (length(all_cell_types), 1) #! easy room for improvement here for (i, cell_type) in enumerate(all_cell_types) @series begin diff --git a/src/VCTAnalysis/substrate.jl b/src/VCTAnalysis/substrate.jl index 1866caf5..45d55d36 100644 --- a/src/VCTAnalysis/substrate.jl +++ b/src/VCTAnalysis/substrate.jl @@ -73,7 +73,7 @@ function averageExtracellularSubstrate(snapshot::PhysiCellSnapshot; cell_type_to substrates = snapshot.substrates mesh = snapshot.mesh - aes = Dict{String, Dict{String, Real}}() # aes[cell_type_name][substrate_name] = average + aes = Dict{String, Dict{String, Real}}() cells_to_keep = include_dead ? deepcopy(cells) : cells[.!cells.dead, :] for cell_type_name in values(cell_type_to_name_dict) cell_type_cells = cells_to_keep[cells_to_keep.cell_type_name .== cell_type_name, :] @@ -123,7 +123,7 @@ function ExtracellularSubstrateTimeSeries(sequence::PhysiCellSequence; include_d for (i, snapshot) in enumerate(sequence.snapshots) snapshot_data = averageExtracellularSubstrate(snapshot; cell_type_to_name_dict=cell_type_to_name_dict, substrate_names=substrate_names, include_dead=include_dead) for cell_type_name in keys(snapshot_data) - if !(cell_type_name in keys(data)) + if !haskey(data, cell_type_name) data[cell_type_name] = Dict{String, Vector{Real}}() end for substrate_name in keys(snapshot_data[cell_type_name]) @@ -154,7 +154,7 @@ function ExtracellularSubstrateTimeSeries(simulation_id::Integer; include_dead:: continue end cell_type_name, substrate_name = split(name, " AND ") - if !(cell_type_name in keys(data)) + if !haskey(data, cell_type_name) data[cell_type_name] = Dict{String, Vector{Real}}() end data[cell_type_name][substrate_name] = df[!, name] diff --git a/src/VCTClasses.jl b/src/VCTClasses.jl index 9c4e3066..2af09fd3 100644 --- a/src/VCTClasses.jl +++ b/src/VCTClasses.jl @@ -10,9 +10,59 @@ Base.length(T::AbstractTrial) = getSimulationIDs(T) |> length ############ InputFolders ############ ########################################## +""" + InputFolder + +Hold the information for a single input folder. + +# Fields +- `location::Symbol`: The location of the input folder, e.g. `:config`, `:custom_code`, etc. Options are defined in `data/inputs.toml`. +- `id::Int`: The ID of the input folder in the database. +- `folder::String`: The name of the input folder. It will be in `data/inputs/`. +- `basename::Union{String,Missing}`: The basename of the input file. This can be used to determine if the input file is varied. +- `required::Bool`: Whether the input folder is required. This is defined in `data/inputs.toml`. +- `varied::Bool`: Whether the input folder is varied. This is determined by the presence of a varied basename in the input folder. +- `path_from_inputs::String`: The path from the `data/inputs` directory to the input folder. This is defined in `data/inputs.toml`. +""" struct InputFolder + location::Symbol id::Int folder::String + basename::Union{String,Missing} + required::Bool + varied::Bool + path_from_inputs::String + + function InputFolder(location::Symbol, id::Int, folder::String) + location_dict = inputs_dict[location] + required = location_dict["required"] + if isempty(folder) + if required + error("Folder for $location must be provided") + end + return new(location, id, folder, missing, required, false, "") + end + path_from_inputs = joinpath(location_dict["path_from_inputs"], folder) + basename = location_dict["basename"] + varied = folderIsVaried(location, folder) + if basename isa Vector + possible_files = [joinpath(locationPath(location, folder), x) for x in basename] + basename_index = possible_files .|> isfile |> findfirst + if isnothing(basename_index) + error("Neither of $possible_files exist") + end + basename = basename[basename_index] + end + return new(location, id, folder, basename, required, varied, path_from_inputs) + end + function InputFolder(location::Symbol, id::Int) + folder = inputFolderName(location, id) + return InputFolder(location, id, folder) + end + function InputFolder(location::Symbol, folder::String) + id = retrieveID(location, folder) + return InputFolder(location, id, folder) + end end """ @@ -20,87 +70,89 @@ end Consolidate the folder information for a simulation/monad/sampling. -Pass the folder names within the `inputs/` directory to create an `InputFolders` object. -Pass them in the order of `config`, `custom_code`, `rulesets_collection`, `ic_cell`, `ic_substrate`, `ic_ecm`, `ic_dc`. -Or use the keyword-based constructors: +Pass the folder names within the `inputs/` directory to create an `InputFolders` object. +The `path_from_inputs` is defined in the `data/inputs.toml` file for each. +It is possible to acces the [`InputFolder`](@ref) values using index notation, e.g. `input_folders[:config]`. +Several constructors exist: +1. All folders passed as keyword arguments. Omitted folders are assumed to be \"\", i.e. those inputs are unused. ```julia -InputFolders(config, custom_code; rulesets_collection="", ic_cell="", ic_substrate="", ic_ecm="", ic_dc="") +InputFolders(; config="default", custom_codes="default", rulesets_collection="default") ``` +2. Pass in the required inputs as arguments and the optional inputs as keyword arguments. The required folders must be passed in alphabetical order. +Refer to the names defined in `data/inputs.toml` to see this order. Omitted optional folders are assumed to be \"\", i.e. those inputs are unused. ```julia -InputFolders(; config="", custom_code="", rulesets_collection="", ic_cell="", ic_substrate="", ic_ecm="", ic_dc="") +config_folder = "default" +custom_code_folder = "default" +ic_cell_folder = "cells_in_disc" +InputFolders(config_folder, custom_code_folder; ic_cell=ic_cell_folder) ``` # Fields -- `config::InputFolder`: id and folder name for the base configuration folder. -- `custom_code::InputFolder`: id and folder name for the custom code folder. -- `rulesets_collection::InputFolder`: id and folder name for the rulesets collection folder. -- `ic_cell::InputFolder`: id and folder name for the initial condition (IC) cells folder. -- `ic_substrate::InputFolder`: id and folder name for the initial condition (IC) substrate folder. -- `ic_ecm::InputFolder`: id and folder name for the initial condition (IC) extracellular matrix (ECM) folder. -- `ic_dc::InputFolder`: id and folder name for the initial condition (IC) dirichlet conditions (DC) folder. +- `input_folders::NamedTuple`: The input locations defined in `data/inputs.toml` define the keys. The values are [`InputFolder`](@ref)s. """ struct InputFolders - config::InputFolder # id and folder name for the base configuration folder - custom_code::InputFolder # id and folder name for the custom code folder - rulesets_collection::InputFolder # id and folder name for the rulesets collection folder - ic_cell::InputFolder # id and folder name for the initial condition (IC) cells folder - ic_substrate::InputFolder # id and folder name for the initial condition (IC) substrate folder - ic_ecm::InputFolder # id and folder name for the initial condition (IC) extracellular matrix (ECM) folder - ic_dc::InputFolder # id and folder name for the initial condition (IC) dirichlet conditions (DC) folder - - function InputFolders(config_folder::String, custom_code_folder::String, rulesets_collection_folder::String, ic_cell_folder::String, ic_substrate_folder::String, ic_ecm_folder::String, ic_dc_folder::String) - @assert config_folder != "" "config_folder must be provided" - @assert custom_code_folder != "" "custom_code_folder must be provided" - config = InputFolder(retrieveID("configs", config_folder), config_folder) - custom_code = InputFolder(retrieveID("custom_codes", custom_code_folder), custom_code_folder) - rulesets_collection = InputFolder(retrieveID("rulesets_collections", rulesets_collection_folder), rulesets_collection_folder) - ic_cell = InputFolder(retrieveID("ic_cells", ic_cell_folder), ic_cell_folder) - ic_substrate = InputFolder(retrieveID("ic_substrates", ic_substrate_folder), ic_substrate_folder) - ic_ecm = InputFolder(retrieveID("ic_ecms", ic_ecm_folder), ic_ecm_folder) - ic_dc = InputFolder(retrieveID("ic_dcs", ic_dc_folder), ic_dc_folder) - return new(config, custom_code, rulesets_collection, ic_cell, ic_substrate, ic_ecm, ic_dc) + input_folders::NamedTuple + + function InputFolders(location_pairs::Vector{<:Pair{Symbol,<:Union{String,Int}}}) + locs_already_here = first.(location_pairs) + invalid_locations = setdiff(locs_already_here, project_locations.all) + @assert isempty(invalid_locations) "Invalid locations: $invalid_locations.\nPossible locations are: $(project_locations.all)" + for loc in setdiff(project_locations.all, locs_already_here) + push!(location_pairs, loc => "") + end + return new([loc => InputFolder(loc, val) for (loc, val) in location_pairs] |> NamedTuple) end - function InputFolders(config_id::Int, custom_code_id::Int, rulesets_collection_id::Int, ic_cell_id::Int, ic_substrate_id::Int, ic_ecm_id::Int, ic_dc_id::Int) - @assert config_id > 0 "config_id must be positive" - @assert custom_code_id > 0 "custom_code_id must be positive" - config = InputFolder(config_id, configFolder(config_id)) - custom_code = InputFolder(custom_code_id, customCodesFolder(custom_code_id)) - rulesets_collection = InputFolder(rulesets_collection_id, rulesetsCollectionFolder(rulesets_collection_id)) - ic_cell = InputFolder(ic_cell_id, icCellFolder(ic_cell_id)) - ic_substrate = InputFolder(ic_substrate_id, icSubstrateFolder(ic_substrate_id)) - ic_ecm = InputFolder(ic_ecm_id, icECMFolder(ic_ecm_id)) - ic_dc = InputFolder(ic_dc_id, icDCFolder(ic_dc_id)) - return new(config, custom_code, rulesets_collection, ic_cell, ic_substrate, ic_ecm, ic_dc) + + function InputFolders(; kwargs...) + return InputFolders([loc => val for (loc, val) in kwargs]) end end -function InputFolders(config::String, custom_code::String; rulesets_collection::String="", ic_cell::String="", ic_substrate::String="", ic_ecm::String="", ic_dc::String="") - return InputFolders(config, custom_code, rulesets_collection, ic_cell, ic_substrate, ic_ecm, ic_dc) +function createSimpleInputFolders() + fn_args = join(["$(location)::String" for location in project_locations.required], ", ") + fn_kwargs = join(["$(location)::String=\"\"" for location in setdiff(project_locations.all, project_locations.required)], ", ") + ret_val = "[$(join([":$(location) => $(location)" for location in project_locations.all], ", "))] |> InputFolders" + """ + function InputFolders($(fn_args); $(fn_kwargs)) + return $(ret_val) + end + """ |> Meta.parse |> eval + return end -function InputFolders(; config::String="", custom_code::String="", rulesets_collection::String="", ic_cell::String="", ic_substrate::String="", ic_ecm::String="", ic_dc::String="") - return InputFolders(config, custom_code, rulesets_collection, ic_cell, ic_substrate, ic_ecm, ic_dc) -end +Base.getindex(input_folders::InputFolders, loc::Symbol) = input_folders.input_folders[loc] ########################################## ############ Variation IDs ############ ########################################## -struct VariationIDs - config::Int # integer identifying which variation on the base config file to use (config_variations.db) - rulesets_collection::Int # integer identifying which variation on the ruleset file to use (rulesets_collection_variations.db) - ic_cell::Int # integer identifying which variation on the ic cell file to use (ic_cell_variations.db) (only used if cells.xml, not used for cells.csv) - ic_ecm::Int # integer identifying which variation on the ic ecm file to use (ic_ecm_variations.db) (only used if ecm.xml, not used for ecm.csv) -end +""" + VariationID -function VariationIDs(inputs::InputFolders) - fns = fieldnames(VariationIDs) - base_variation_ids = [(getfield(inputs, fn).id==-1 ? -1 : 0) for fn in fns] - return VariationIDs(base_variation_ids...) +The variation IDs for any of the possibly varying inputs. + +For each input type that can be varied, a record of the current variation ID for that input type. +By convention, a values of `-1` indicates that the input is not being used (hence this is disallowed for a `required` input type). +A value of `0` indicates that the base file is being used, unvaried. +Hence, if the input type is sometimes varied (such as `ic_cell` with a `cells.csv` file), this value must be `0` in such conditions. +""" +struct VariationID + ids::NamedTuple + + function VariationID(inputs::InputFolders) + return new((loc => inputs[loc].id == -1 ? -1 : 0 for loc in project_locations.varied) |> NamedTuple) + end + + function VariationID(x::Vector{Pair{Symbol,Int}}) + #! this is slightly dangerous since no checks are made that the locations are valid. + #! but it is called often enough internally that it is worth it to have this constructor without checks + #! if this is added to the public API, then checks should be added + return new(x |> NamedTuple) + end end -variationIDNames() = (fieldnames(VariationIDs) .|> string) .* "_variation_id" +Base.getindex(variation_id::VariationID, loc::Symbol) = variation_id.ids[loc] ########################################## ############# Simulation ############# @@ -128,59 +180,52 @@ simulation = Simulation(simulation_id) # Fields - `id::Int`: integer uniquely identifying this simulation. Matches with the folder in `data/outputs/simulations/` - `inputs::InputFolders`: contains the folder info for this simulation. -- `variation_ids::VariationIDs`: contains the variation IDs for this simulation. +- `variation_id::VariationID`: contains the variation IDs for this simulation. """ struct Simulation <: AbstractMonad - id::Int # integer uniquely identifying this simulation + id::Int #! integer uniquely identifying this simulation inputs::InputFolders - variation_ids::VariationIDs + variation_id::VariationID - function Simulation(id::Int, inputs::InputFolders, variation_ids::VariationIDs) + function Simulation(id::Int, inputs::InputFolders, variation_id::VariationID) @assert id > 0 "id must be positive" - @assert variation_ids.config >= 0 "config variation id must be non-negative" - @assert variation_ids.rulesets_collection >= -1 "rulesets_collection variation id must be non-negative or -1 (indicating no rules)" - @assert variation_ids.ic_cell >= -1 "ic_cell variation id must be non-negative or -1 (indicating no ic cells)" - @assert variation_ids.ic_ecm >= -1 "ic_ecm variation id must be non-negative or -1 (indicating no ic ecm)" - if variation_ids.rulesets_collection != -1 - @assert inputs.rulesets_collection.folder != "" "rulesets_collection folder must be provided if rulesets_collection variation id is not -1 (indicating that the rules are in use)" - end - if variation_ids.ic_cell == -1 - @assert inputs.ic_cell.folder == "" "ic_cell variation_id must be >=0 if ic_cell folder is provided" - else - @assert inputs.ic_cell.folder != "" "ic_cell folder must be provided if ic_cell variation_id is not -1 (indicating that the cells are in use)" - @assert variation_ids.ic_cell == 0 || isfile(joinpath(data_dir, "inputs", "ics", "cells", inputs.ic_cell.folder, "cells.xml")) "cells.xml must be provided if ic_cell variation_id is >1 (indicating that the cell ic parameters are being varied)" - end - if variation_ids.ic_ecm == -1 - @assert inputs.ic_ecm.folder == "" "ic_ecm variation_id must be >=0 if ic_ecm folder is provided" - else - @assert inputs.ic_ecm.folder != "" "ic_ecm folder must be provided if ic_ecm variation_id is not -1 (indicating that the ecm is in use)" - @assert variation_ids.ic_ecm == 0 || isfile(joinpath(data_dir, "inputs", "ics", "ecms", inputs.ic_ecm.folder, "ecm.xml")) "ecm.xml must be provided if ic_ecm variation_id is >1 (indicating that the ecm ic parameters are being varied)" + for location in project_locations.varied + if inputs[location].required + @assert variation_id[location] >= 0 "$(location) variation id must be non-negative" + elseif inputs[location].id == -1 + @assert variation_id[location] == -1 "$(location) variation id must be -1 because there is associated folder indicating $(location) is not in use." + #! now we know this location is not required and it is in use + elseif !inputs[location].varied + #! this particular folder is not varying it, so make sure its variation id is 0, i.e. the base file in this folder + @assert variation_id[location] == 0 "$(inputs[location].folder) in $(location) is not varying but the variation id is not 0." + #! now we know that the folder is being varied, so just make sure the variation id is >=0 + else + @assert variation_id[location] >= 0 "$(location) variation id must be non-negative as the folder $(inputs[location].folder) is varying." + end end - return new(id, inputs, variation_ids) + return new(id, inputs, variation_id) end end -function Simulation(inputs::InputFolders, variation_ids::VariationIDs=VariationIDs(inputs)) +function Simulation(inputs::InputFolders, variation_id::VariationID=VariationID(inputs)) simulation_id = DBInterface.execute(db, """ - INSERT INTO simulations (physicell_version_id,\ - config_id,rulesets_collection_id,\ - ic_cell_id,ic_substrate_id,ic_ecm_id,ic_dc_id,\ - custom_code_id,\ - $(join(variationIDNames(), ",")),\ - status_code_id) \ + INSERT INTO simulations (\ + physicell_version_id,\ + $(join(locationIDNames(), ",")),\ + $(join(locationVariationIDNames(), ",")),\ + status_code_id\ + ) \ VALUES(\ - $(physicellVersionDBEntry()),\ - $(inputs.config.id),$(inputs.rulesets_collection.id),\ - $(inputs.ic_cell.id),$(inputs.ic_substrate.id),\ - $(inputs.ic_ecm.id),$(inputs.ic_dc.id),$(inputs.custom_code.id),\ - $(join([string(getfield(variation_ids, field)) for field in fieldnames(VariationIDs)],",")),\ - $(getStatusCodeID("Not Started")) + $(physicellVersionDBEntry()),\ + $(join([inputs[loc].id for loc in project_locations.all], ",")),\ + $(join([variation_id[loc] for loc in project_locations.varied],",")),\ + $(getStatusCodeID("Not Started")) ) RETURNING simulation_id; """ ) |> DataFrame |> x -> x.simulation_id[1] - return Simulation(simulation_id, inputs, variation_ids) + return Simulation(simulation_id, inputs, variation_id) end function getSimulation(simulation_id::Int) @@ -188,9 +233,10 @@ function getSimulation(simulation_id::Int) if isempty(df) error("Simulation $(simulation_id) not in the database.") end - inputs = InputFolders(df.config_id[1], df.custom_code_id[1], df.rulesets_collection_id[1], df.ic_cell_id[1], df.ic_substrate_id[1], df.ic_ecm_id[1], df.ic_dc_id[1]) - variation_ids = VariationIDs(df.config_variation_id[1], df.rulesets_collection_variation_id[1], df.ic_cell_variation_id[1], df.ic_ecm_variation_id[1]) - return Simulation(simulation_id, inputs, variation_ids) + inputs = [loc => df[1, locationIDName(loc)] for loc in project_locations.all] |> InputFolders + variation_id = [loc => df[1, locationVarIDName(loc)] for loc in project_locations.varied] |> VariationID + + return Simulation(simulation_id, inputs, variation_id) end Simulation(simulation_id::Int) = getSimulation(simulation_id) @@ -231,88 +277,81 @@ monad = Monad(monad_id; n_replicates=5) # ensures at least 5 simulations in the - `n_replicates::Int`: minimum number of simulations to ensure are part of this monad when running this monad. - `simulation_ids::Vector{Int}`: array of simulation IDs belonging to this monad. This need not have length equal to `n_replicates`. - `inputs::InputFolders`: contains the folder info for this monad. -- `variation_ids::VariationIDs`: contains the variation IDs for this monad. +- `variation_id::VariationID`: contains the variation IDs for this monad. """ struct Monad <: AbstractMonad - # a monad is a group of simulation replicates, i.e. identical up to randomness - id::Int # integer uniquely identifying this monad - n_replicates::Int # (minimum) number of simulations belonging to this monad - simulation_ids::Vector{Int} # simulation ids belonging to this monad + #! a monad is a group of simulation replicates, i.e. identical up to randomness + id::Int #! integer uniquely identifying this monad + n_replicates::Int #! (minimum) number of simulations belonging to this monad + simulation_ids::Vector{Int} #! simulation ids belonging to this monad - inputs::InputFolders # contains the folder names for the simulations in this monad + inputs::InputFolders #! contains the folder names for the simulations in this monad - variation_ids::VariationIDs + variation_id::VariationID - function Monad(n_replicates::Int, inputs::InputFolders, variation_ids::VariationIDs, use_previous::Bool) + function Monad(n_replicates::Int, inputs::InputFolders, variation_id::VariationID, use_previous::Bool) monad_id = DBInterface.execute(db, - """ - INSERT OR IGNORE INTO monads (physicell_version_id,\ - config_id,custom_code_id,\ - rulesets_collection_id,\ - ic_cell_id,ic_substrate_id,ic_ecm_id,ic_dc_id,\ - $(join(variationIDNames(), ","))\ - ) \ - VALUES(\ - $(physicellVersionDBEntry()),\ - $(inputs.config.id),$(inputs.custom_code.id),\ - $(inputs.rulesets_collection.id),\ - $(inputs.ic_cell.id),$(inputs.ic_substrate.id),\ - $(inputs.ic_ecm.id),$(inputs.ic_dc.id),\ - $(join([string(getfield(variation_ids, field)) for field in fieldnames(VariationIDs)],",")) - ) \ - RETURNING monad_id; - """ - ) |> DataFrame |> x -> x.monad_id + """ + INSERT OR IGNORE INTO monads (\ + physicell_version_id,\ + $(join(locationIDNames(), ",")),\ + $(join(locationVariationIDNames(), ","))\ + ) \ + VALUES(\ + $(physicellVersionDBEntry()),\ + $(join([inputs[loc].id for loc in project_locations.all], ",")),\ + $(join([variation_id[loc] for loc in project_locations.varied],","))\ + ) \ + RETURNING monad_id; + """ + ) |> DataFrame |> x -> x.monad_id if isempty(monad_id) monad_id = constructSelectQuery( - "monads", - """ - WHERE (physicell_version_id,config_id,custom_code_id,\ - rulesets_collection_id,\ - ic_cell_id,ic_substrate_id,\ - ic_ecm_id,ic_dc_id,\ - $(join(variationIDNames(), ",")))=\ - (\ - $(physicellVersionDBEntry()),\ - $(inputs.config.id),$(inputs.custom_code.id),\ - $(inputs.rulesets_collection.id),\ - $(inputs.ic_cell.id),$(inputs.ic_substrate.id),\ - $(inputs.ic_ecm.id),$(inputs.ic_dc.id),\ - $(join([string(getfield(variation_ids, field)) for field in fieldnames(VariationIDs)],",")) - );\ - """, - selection="monad_id" - ) |> queryToDataFrame |> x -> x.monad_id[1] # get the monad_id + "monads", + """ + WHERE (\ + physicell_version_id,\ + $(join(locationIDNames(), ",")),\ + $(join(locationVariationIDNames(), ","))\ + )=\ + (\ + $(physicellVersionDBEntry()),\ + $(join([inputs[loc].id for loc in project_locations.all], ",")),\ + $(join([variation_id[loc] for loc in project_locations.varied],","))\ + );\ + """, + selection="monad_id" + ) |> queryToDataFrame |> x -> x.monad_id[1] #! get the monad_id else - monad_id = monad_id[1] # get the monad_id + monad_id = monad_id[1] #! get the monad_id end - return Monad(monad_id, n_replicates, inputs, variation_ids, use_previous) + return Monad(monad_id, n_replicates, inputs, variation_id, use_previous) end - function Monad(id::Int, n_replicates::Int, inputs::InputFolders, variation_ids::VariationIDs, use_previous::Bool) + function Monad(id::Int, n_replicates::Int, inputs::InputFolders, variation_id::VariationID, use_previous::Bool) simulation_ids = use_previous ? readMonadSimulationIDs(id) : Int[] num_sims_to_add = n_replicates - length(simulation_ids) if num_sims_to_add > 0 for _ = 1:num_sims_to_add - simulation = Simulation(inputs, variation_ids) # create a new simulation - push!(simulation_ids, simulation.id) # add the simulation id to the monad + simulation = Simulation(inputs, variation_id) #! create a new simulation + push!(simulation_ids, simulation.id) #! add the simulation id to the monad end end @assert id > 0 "id must be positive" @assert n_replicates >= 0 "n_replicates must be non-negative" - # this could be done when adding new simulation ids to save some fie I/O - # doing it here just to make sure it is always up to date (and for consistency across classes) - recordSimulationIDs(id, simulation_ids) # record the simulation ids in a .csv file + #! this could be done when adding new simulation ids to save some fie I/O + #! doing it here just to make sure it is always up to date (and for consistency across classes) + recordSimulationIDs(id, simulation_ids) #! record the simulation ids in a .csv file - return new(id, n_replicates, simulation_ids, inputs, variation_ids) + return new(id, n_replicates, simulation_ids, inputs, variation_id) end end -function Monad(inputs::InputFolders, variation_ids::VariationIDs; use_previous::Bool=true) - n_replicates = 0 # not making a monad to run if not supplying the n_replicates info - Monad(n_replicates, inputs, variation_ids, use_previous) +function Monad(inputs::InputFolders, variation_id::VariationID; use_previous::Bool=true) + n_replicates = 0 #! not making a monad to run if not supplying the n_replicates info + Monad(n_replicates, inputs, variation_id, use_previous) end function getMonad(monad_id::Int, n_replicates::Int) @@ -320,22 +359,22 @@ function getMonad(monad_id::Int, n_replicates::Int) if isempty(df) error("Monad $(monad_id) not in the database.") end - inputs = InputFolders(df.config_id[1], df.custom_code_id[1], df.rulesets_collection_id[1], df.ic_cell_id[1], df.ic_substrate_id[1], df.ic_ecm_id[1], df.ic_dc_id[1]) - variation_ids = VariationIDs(df.config_variation_id[1], df.rulesets_collection_variation_id[1], df.ic_cell_variation_id[1], df.ic_ecm_variation_id[1]) + inputs = [loc => df[1, locationIDName(loc)] for loc in project_locations.all] |> InputFolders + variation_id = [loc => df[1, locationVarIDName(loc)] for loc in project_locations.varied] |> VariationID use_previous = true - return Monad(monad_id, n_replicates, inputs, variation_ids, use_previous) + return Monad(monad_id, n_replicates, inputs, variation_id, use_previous) end Monad(monad_id::Integer; n_replicates::Integer=0) = getMonad(monad_id, n_replicates) function Simulation(monad::Monad) - return Simulation(monad.inputs, monad.variation_ids) + return Simulation(monad.inputs, monad.variation_id) end function Monad(simulation::Simulation) - n_replicates = 0 # do not impose a min length on this monad + n_replicates = 0 #! do not impose a min length on this monad use_previous = true - monad = Monad(n_replicates, simulation.inputs, simulation.variation_ids, use_previous) + monad = Monad(n_replicates, simulation.inputs, simulation.variation_id, use_previous) addSimulationID!(monad, simulation.id) return monad end @@ -349,6 +388,8 @@ function addSimulationID!(monad::Monad, simulation_id::Int) return end +getVariationIDs(M::AbstractMonad) = [M.variation_id] + ########################################## ############## Sampling ############## ########################################## @@ -377,19 +418,19 @@ sampling = Sampling(sampling_id; n_replicates=5) # ensures at least 5 simulation - `n_replicates::Int`: minimum number of simulations to ensure are part of each monad when running this sampling. - `monad_ids::Vector{Int}`: array of monad IDs belonging to this sampling. - `inputs::InputFolders`: contains the folder info for this sampling. -- `variation_ids::Vector{VariationIDs}`: contains the variation IDs for each monad. +- `variation_ids::Vector{VariationID}`: contains the variation IDs for each monad. """ struct Sampling <: AbstractSampling - # sampling is a group of monads with config parameters varied - id::Int # integer uniquely identifying this sampling - n_replicates::Int # minimum length of each monad belonging to this sampling - monad_ids::Vector{Int} # array of monad indices belonging to this sampling + #! sampling is a group of monads with config parameters varied + id::Int #! integer uniquely identifying this sampling + n_replicates::Int #! minimum length of each monad belonging to this sampling + monad_ids::Vector{Int} #! array of monad indices belonging to this sampling - inputs::InputFolders # contains the folder names for this sampling + inputs::InputFolders #! contains the folder names for this sampling - variation_ids::Vector{VariationIDs} # variation_ids associated with each monad + variation_ids::Vector{VariationID} #! variation_ids associated with each monad - function Sampling(id, n_replicates, monad_ids, inputs, variation_ids) + function Sampling(id::Int, n_replicates::Int, monad_ids::AbstractVector{<:Integer}, inputs::InputFolders, variation_ids::AbstractVector{VariationID}) @assert id > 0 "id must be positive" n_monads = length(monad_ids) n_variations = length(variation_ids) @@ -401,86 +442,84 @@ struct Sampling <: AbstractSampling """ throw(ArgumentError(error_message)) end - recordMonadIDs(id, monad_ids) # record the monad ids in a .csv file + recordMonadIDs(id, monad_ids) #! record the monad ids in a .csv file return new(id, n_replicates, monad_ids, inputs, variation_ids) end end -function Sampling(n_replicates::Int, monad_ids::AbstractVector{<:Integer}, inputs::InputFolders, variation_ids::Vector{VariationIDs}) +function Sampling(n_replicates::Int, monad_ids::AbstractVector{<:Integer}, inputs::InputFolders, variation_ids::Vector{VariationID}) id = -1 sampling_ids = constructSelectQuery( "samplings", """ - WHERE (physicell_version_id,\ - config_id,custom_code_id,\ - rulesets_collection_id,\ - ic_cell_id,ic_substrate_id,ic_ecm_id,ic_dc_id)=\ + WHERE (\ + physicell_version_id,\ + $(join(locationIDNames(), ","))\ + )=\ (\ - $(physicellVersionDBEntry()),\ - $(inputs.config.id),$(inputs.custom_code.id),\ - $(inputs.rulesets_collection.id),\ - $(inputs.ic_cell.id),$(inputs.ic_substrate.id),$(inputs.ic_ecm.id),$(inputs.ic_dc.id)\ + $(physicellVersionDBEntry()),\ + $(join([inputs[loc].id for loc in project_locations.all], ","))\ );\ """; selection="sampling_id" ) |> queryToDataFrame |> x -> x.sampling_id - if !isempty(sampling_ids) # if there are previous samplings with the same parameters - for sampling_id in sampling_ids # check if the monad_ids are the same with any previous monad_ids - monad_ids_in_db = readSamplingMonadIDs(sampling_id) # get the monad_ids belonging to this sampling - if symdiff(monad_ids_in_db, monad_ids) |> isempty # if the monad_ids are the same - id = sampling_id # use the existing sampling_id + if !isempty(sampling_ids) #! if there are previous samplings with the same parameters + for sampling_id in sampling_ids #! check if the monad_ids are the same with any previous monad_ids + monad_ids_in_db = readSamplingMonadIDs(sampling_id) #! get the monad_ids belonging to this sampling + if symdiff(monad_ids_in_db, monad_ids) |> isempty #! if the monad_ids are the same + id = sampling_id #! use the existing sampling_id break end end end - - if id==-1 # if no previous sampling was found matching these parameters + + if id==-1 #! if no previous sampling was found matching these parameters id = DBInterface.execute(db, - """ - INSERT INTO samplings \ - (physicell_version_id,\ - config_id,custom_code_id,\ - rulesets_collection_id,\ - ic_cell_id,ic_substrate_id,ic_ecm_id,ic_dc_id) \ - VALUES($(physicellVersionDBEntry()),\ - $(inputs.config.id),$(inputs.custom_code.id),\ - $(inputs.rulesets_collection.id),\ - $(inputs.ic_cell.id),$(inputs.ic_substrate.id),\ - $(inputs.ic_ecm.id),$(inputs.ic_dc.id)) RETURNING sampling_id; - """ - ) |> DataFrame |> x -> x.sampling_id[1] # get the sampling_id + """ + INSERT INTO samplings \ + (\ + physicell_version_id,\ + $(join(locationIDNames(), ","))\ + ) \ + VALUES(\ + $(physicellVersionDBEntry()),\ + $(join([inputs[loc].id for loc in project_locations.all], ","))\ + ) RETURNING sampling_id; + """ + ) |> DataFrame |> x -> x.sampling_id[1] #! get the sampling_id end return Sampling(id, n_replicates, monad_ids, inputs, variation_ids) end -function Sampling(n_replicates::Int, inputs::InputFolders, variation_ids::AbstractArray{VariationIDs}; use_previous::Bool=true) +function Sampling(n_replicates::Int, inputs::InputFolders, variation_ids::AbstractArray{VariationID}; use_previous::Bool=true) monad_ids = createMonadIDs(n_replicates, inputs, variation_ids; use_previous=use_previous) return Sampling(n_replicates, monad_ids, inputs, variation_ids) end function Sampling(inputs::InputFolders; n_replicates::Integer=0, - config_variation_ids::Union{Int,AbstractArray{<:Integer}}=Int[], - rulesets_collection_variation_ids::Union{Int,AbstractArray{<:Integer}}=fill(inputs.rulesets_collection.folder=="" ? -1 : 0, size(config_variation_ids)), - ic_cell_variation_ids::Union{Int,AbstractArray{<:Integer}}=fill(inputs.ic_cell.folder=="" ? -1 : 0, size(config_variation_ids)), - ic_ecm_variation_ids::Union{Int,AbstractArray{<:Integer}}=fill(inputs.ic_ecm.folder=="" ? -1 : 0, size(config_variation_ids)), - use_previous::Bool=true) - # allow for passing in a single config_variation_id and/or rulesets_collection_variation_id - # later, can support passing in (for example) a 3x6 config_variation_ids and a 3x1 rulesets_collection_variation_ids and expanding the rulesets_collection_variation_ids to 3x6, but that can get tricky fast - if all(x->x isa Integer, [config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids]) - config_variation_ids = [config_variation_ids] - rulesets_collection_variation_ids = [rulesets_collection_variation_ids] - ic_cell_variation_ids = [ic_cell_variation_ids] - ic_ecm_variation_ids = [ic_ecm_variation_ids] + location_variation_ids::Dict{Symbol,<:Union{Integer,AbstractArray{<:Integer}}}, + use_previous::Bool=true) + #! allow for passing in a single config_variation_id and/or rulesets_collection_variation_id + #! later, can support passing in (for example) a 3x6 config_variation_ids and a 3x1 rulesets_collection_variation_ids and expanding the rulesets_collection_variation_ids to 3x6, but that can get tricky fast + if all(x->x isa Integer, values(location_variation_ids)) + for (loc, loc_var_ids) in pairs(location_variation_ids) + location_variation_ids[loc] = [loc_var_ids] + end else - ns = [length(x) for x in [config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids] if !(x isa Integer)] - @assert all(x->x==ns[1], ns) "config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids must have the same length if they are not integers" - config_variation_ids = config_variation_ids isa Integer ? fill(config_variation_ids, ns[1]) : config_variation_ids - rulesets_collection_variation_ids = rulesets_collection_variation_ids isa Integer ? fill(rulesets_collection_variation_ids, ns[1]) : rulesets_collection_variation_ids - ic_cell_variation_ids = ic_cell_variation_ids isa Integer ? fill(ic_cell_variation_ids, ns[1]) : ic_cell_variation_ids - ic_ecm_variation_ids = ic_ecm_variation_ids isa Integer ? fill(ic_ecm_variation_ids, ns[1]) : ic_ecm_variation_ids + ns = [length(x) for x in values(location_variation_ids) if !(x isa Integer)] + @assert all(x->x==ns[1], ns) "location variation ids must have the same length if they are not integers" + for (loc, loc_var_ids) in pairs(location_variation_ids) + if loc_var_ids isa Integer + location_variation_ids[loc] = fill(loc_var_ids, ns[1]) + end + end + end + n = location_variation_ids |> values |> first |> length + for loc in setdiff(project_locations.varied, keys(location_variation_ids)) + location_variation_ids[loc] = fill(inputs[loc].id==-1 ? -1 : 0, n) end - variation_ids = [VariationIDs(config_variation_ids[i], rulesets_collection_variation_ids[i], ic_cell_variation_ids[i], ic_ecm_variation_ids[i]) for i in 1:length(config_variation_ids)] + variation_ids = [([loc => loc_var_ids[i] for (loc, loc_var_ids) in pairs(location_variation_ids)] |> VariationID) for i in 1:n] return Sampling(n_replicates, inputs, variation_ids; use_previous=use_previous) end @@ -489,18 +528,18 @@ function Sampling(n_replicates::Int, monads::AbstractArray{<:AbstractMonad}) for monad in monads if monad.inputs != inputs error("All monads must have the same inputs") - # could choose to make a trial from these here... + #! could choose to make a trial from these here... end end - variation_ids = [monad.variation_ids for monad in monads] + variation_ids = [monad.variation_id for monad in monads] monad_ids = [monad.id for monad in monads] return Sampling(n_replicates, monad_ids, inputs, variation_ids) end -function createMonadIDs(n_replicates::Int, inputs::InputFolders, variation_ids::AbstractArray{VariationIDs}; use_previous::Bool=true) +function createMonadIDs(n_replicates::Int, inputs::InputFolders, variation_ids::AbstractArray{VariationID}; use_previous::Bool=true) _size = length(variation_ids) monad_ids = -ones(Int, _size) - + for (i, vid) in enumerate(variation_ids) monad = Monad(n_replicates, inputs, vid, use_previous) monad_ids[i] = monad.id @@ -514,9 +553,9 @@ function getSampling(sampling_id::Int, n_replicates::Int) error("Sampling $(sampling_id) not in the database.") end monad_ids = readSamplingMonadIDs(sampling_id) - inputs = InputFolders(df.config_id[1], df.custom_code_id[1], df.rulesets_collection_id[1], df.ic_cell_id[1], df.ic_substrate_id[1], df.ic_ecm_id[1], df.ic_dc_id[1]) + inputs = [loc => df[1, locationIDName(loc)] for loc in project_locations.all] |> InputFolders monad_df = constructSelectQuery("monads", "WHERE monad_id IN ($(join(monad_ids,",")))") |> queryToDataFrame - variation_ids = [VariationIDs(monad_df.config_variation_id[i], monad_df.rulesets_collection_variation_id[i], monad_df.ic_cell_variation_id[i], monad_df.ic_ecm_variation_id[i]) for i in 1:length(monad_ids)] + variation_ids = [([loc => monad_df[i, locationVarIDName(loc)] for loc in project_locations.varied] |> VariationID) for i in 1:length(monad_ids)] return Sampling(sampling_id, n_replicates, monad_ids, inputs, variation_ids) end @@ -531,6 +570,8 @@ function Sampling(monads::Vector{Monad}) return Sampling(n_replicates, monads) end +getVariationIDs(sampling::Sampling) = sampling.variation_ids + ########################################## ############### Trial ################ ########################################## @@ -561,33 +602,33 @@ trial = Trial(trial_id; n_replicates=5) # ensures at least 5 simulations in each - `n_replicates::Int`: minimum number of simulations to ensure are part of each monad in each sampling in this trial. - `sampling_ids::Vector{Int}`: array of sampling IDs belonging to this trial. - `inputs::Vector{InputFolders}`: contains the folder info for each sampling in this trial. -- `variation_ids::Vector{Vector{VariationIDs}}`: contains the variation IDs for each monad in each sampling in this trial. +- `variation_ids::Vector{Vector{VariationID}}`: contains the variation IDs for each monad in each sampling in this trial. """ struct Trial <: AbstractTrial - # trial is a group of samplings with different ICs, custom codes, and rulesets - id::Int # integer uniquely identifying this trial - n_replicates::Int # minimum length of each monad belonging to the samplings in this trial - sampling_ids::Vector{Int} # array of sampling indices belonging to this trial + #! trial is a group of samplings with different ICs, custom codes, rulesets, and/or intracellulars + id::Int #! integer uniquely identifying this trial + n_replicates::Int #! minimum length of each monad belonging to the samplings in this trial + sampling_ids::Vector{Int} #! array of sampling indices belonging to this trial - inputs::Vector{InputFolders} # contains the folder names for the samplings in this trial - variation_ids::Vector{Vector{VariationIDs}} # variation_ids associated with each monad for each sampling + inputs::Vector{InputFolders} #! contains the folder names for the samplings in this trial + variation_ids::Vector{Vector{VariationID}} #! variation_ids associated with each monad for each sampling - function Trial(id::Int, n_replicates::Int, sampling_ids::Vector{Int}, inputs::Vector{InputFolders}, variation_ids::Vector{Vector{VariationIDs}}) + function Trial(id::Int, n_replicates::Int, sampling_ids::Vector{Int}, inputs::Vector{InputFolders}, variation_ids::Vector{Vector{VariationID}}) @assert id > 0 "id must be positive" n_samplings = length(sampling_ids) n_inputs = length(inputs) n_variations = length(variation_ids) - if n_samplings != n_inputs || n_samplings != n_variations # the negation of this is n_samplings == n_inputs && n_samplings == n_folder_names && n_samplings == n_variations, which obviously means they're all the same + if n_samplings != n_inputs || n_samplings != n_variations #! the negation of this is n_samplings == n_inputs && n_samplings == n_folder_names && n_samplings == n_variations, which obviously means they're all the same throw(ArgumentError("Number of samplings, inputs, and variations must be the same")) end - recordSamplingIDs(id, sampling_ids) # record the sampling ids in a .csv file + recordSamplingIDs(id, sampling_ids) #! record the sampling ids in a .csv file return new(id, n_replicates, sampling_ids, inputs, variation_ids) end end -function Trial(n_replicates::Int, sampling_ids::Vector{Int}, inputs::Vector{InputFolders}, variation_ids::Vector{Vector{VariationIDs}}; use_previous::Bool=true) +function Trial(n_replicates::Int, sampling_ids::Vector{Int}, inputs::Vector{InputFolders}, variation_ids::Vector{Vector{VariationID}}; use_previous::Bool=true) id = getTrialID(sampling_ids) return Trial(id, n_replicates, sampling_ids, inputs, variation_ids) end @@ -595,18 +636,18 @@ end function getTrialID(sampling_ids::Vector{Int}) id = -1 trial_ids = constructSelectQuery("trials"; selection="trial_id") |> queryToDataFrame |> x -> x.trial_id - if !isempty(trial_ids) # if there are previous trials - for trial_id in trial_ids # check if the sampling_ids are the same with any previous sampling_ids - sampling_ids_in_db = readTrialSamplingIDs(trial_id) # get the sampling_ids belonging to this trial - if symdiff(sampling_ids_in_db, sampling_ids) |> isempty # if the sampling_ids are the same - id = trial_id # use the existing trial_id + if !isempty(trial_ids) #! if there are previous trials + for trial_id in trial_ids #! check if the sampling_ids are the same with any previous sampling_ids + sampling_ids_in_db = readTrialSamplingIDs(trial_id) #! get the sampling_ids belonging to this trial + if symdiff(sampling_ids_in_db, sampling_ids) |> isempty #! if the sampling_ids are the same + id = trial_id #! use the existing trial_id break end end end - - if id==-1 # if no previous trial was found matching these parameters - id = DBInterface.execute(db, "INSERT INTO trials (datetime) VALUES($(Dates.format(now(),"yymmddHHMM"))) RETURNING trial_id;") |> DataFrame |> x -> x.trial_id[1] # get the trial_id + + if id==-1 #! if no previous trial was found matching these parameters + id = DBInterface.execute(db, "INSERT INTO trials (datetime) VALUES($(Dates.format(now(),"yymmddHHMM"))) RETURNING trial_id;") |> DataFrame |> x -> x.trial_id[1] #! get the trial_id end return id @@ -631,7 +672,7 @@ end Trial(trial_id::Integer; n_replicates::Integer=0) = getTrial(trial_id, n_replicates) -function Sampling(id::Int, n_replicates::Int, inputs::InputFolders, variation_ids::Vector{VariationIDs}; use_previous::Bool=true) +function Sampling(id::Int, n_replicates::Int, inputs::InputFolders, variation_ids::Vector{VariationID}; use_previous::Bool=true) monad_ids = createMonadIDs(n_replicates, inputs, variation_ids; use_previous=use_previous) return Sampling(id, n_replicates, monad_ids, inputs, variation_ids) end diff --git a/src/VCTCompilation.jl b/src/VCTCompilation.jl index 84f1cf1e..27c0b5c6 100644 --- a/src/VCTCompilation.jl +++ b/src/VCTCompilation.jl @@ -1,3 +1,6 @@ +#! This file will likely end up being part of PhysiCellModelManager.jl + +using LightXML """ loadCustomCode(S::AbstractSampling[; force_recompile::Bool=false]) @@ -11,52 +14,56 @@ Move the compiled executable into the `custom_codes` folder and the temporary Ph """ function loadCustomCode(S::AbstractSampling; force_recompile::Bool=false) cflags, recompile, clean = compilerFlags(S) - recompile = writePhysiCellCommitHash(S) || recompile # no matter what, write the PhysiCell version; if it is different, make sure to set recompile to true + recompile = writePhysiCellCommitHash(S) || recompile #! no matter what, write the PhysiCell version; if it is different, make sure to set recompile to true - recompile |= force_recompile # if force_recompile is true, then recompile no matter what + recompile |= force_recompile #! if force_recompile is true, then recompile no matter what if !recompile return true end - if clean - cd(()->run(pipeline(`make clean`; stdout=devnull)), physicell_dir) - end - - rand_suffix = randstring(10) # just to ensure that no two nodes try to compile at the same place at the same time + rand_suffix = randstring(10) #! just to ensure that no two nodes try to compile at the same place at the same time temp_physicell_dir = joinpath(outputFolder(S), "temp_physicell_$(rand_suffix)") - # copy the entire PhysiCell directory to a temporary directory to avoid conflicts with concurrent compilation + #! copy the entire PhysiCell directory to a temporary directory to avoid conflicts with concurrent compilation cp(physicell_dir, temp_physicell_dir; force=true) temp_custom_modules_dir = joinpath(temp_physicell_dir, "custom_modules") if isdir(temp_custom_modules_dir) rm(temp_custom_modules_dir; force=true, recursive=true) end - path_to_input_custom_codes = joinpath(data_dir, "inputs", "custom_codes", S.inputs.custom_code.folder) + path_to_input_custom_codes = locationPath(:custom_code, S) cp(joinpath(path_to_input_custom_codes, "custom_modules"), temp_custom_modules_dir; force=true) cp(joinpath(path_to_input_custom_codes, "main.cpp"), joinpath(temp_physicell_dir, "main.cpp"), force=true) cp(joinpath(path_to_input_custom_codes, "Makefile"), joinpath(temp_physicell_dir, "Makefile"), force=true) - executable_name = baseToExecutable("project_ccid_$(S.inputs.custom_code.id)") - cmd = `make -j 8 CC=$(PHYSICELL_CPP) PROGRAM_NAME=$(executable_name) CFLAGS=$(cflags)` + if clean + cd(()->run(pipeline(`make clean`; stdout=devnull)), temp_physicell_dir) + end + + executable_name = baseToExecutable("project_ccid_$(S.inputs[:custom_code].id)") + cmd = Cmd(`make -j 8 CC=$(PHYSICELL_CPP) PROGRAM_NAME=$(executable_name) CFLAGS=$(cflags)`; env=ENV, dir=temp_physicell_dir) #! compile the custom code in the PhysiCell directory and return to the original directory - println("Compiling custom code for $(S.inputs.custom_code.folder) with flags: $cflags") + println("Compiling custom code for $(S.inputs[:custom_code].folder) using:\n\n$(Cmd(cmd; env=nothing, dir=""))\n") #! print the command to be run, but not all the environment variables and directory info try - cd(() -> run(pipeline(cmd; stdout=joinpath(path_to_input_custom_codes, "compilation.log"), stderr=joinpath(path_to_input_custom_codes, "compilation.err"))), temp_physicell_dir) # compile the custom code in the PhysiCell directory and return to the original directory + run(pipeline(cmd; stdout=joinpath(path_to_input_custom_codes, "compilation.log"), stderr=joinpath(path_to_input_custom_codes, "compilation.err"))) catch e println(""" Compilation failed. Error: $e Check $(joinpath(path_to_input_custom_codes, "compilation.err")) for more information. + Here is the compilation.log: + $(read(joinpath(path_to_input_custom_codes, "compilation.log"), String)) + Here is the compilation.err: + $(read(joinpath(path_to_input_custom_codes, "compilation.err"), String)) """ ) rm(temp_physicell_dir; force=true, recursive=true) return false end - - # check if the error file is empty, if it is, delete it + + #! check if the error file is empty, if it is, delete it if filesize(joinpath(path_to_input_custom_codes, "compilation.err")) == 0 rm(joinpath(path_to_input_custom_codes, "compilation.err"); force=true) else @@ -83,8 +90,8 @@ If the required macros differ from a previous compilation (as stored in macros.t - `clean::Bool`: A boolean indicating whether cleaning is needed. """ function compilerFlags(S::AbstractSampling) - recompile = false # only recompile if need is found - clean = false # only clean if need is found + recompile = false #! only recompile if need is found + clean = false #! only clean if need is found cflags = "-march=$(march_flag) -O3 -fomit-frame-pointer -fopenmp -m64 -std=c++11" if Sys.isapple() if strip(read(`uname -s`, String)) == "Darwin" @@ -99,9 +106,9 @@ function compilerFlags(S::AbstractSampling) cflags *= " -mfpmath=both" end - current_macros = readMacrosFile(S) # this will get all macros already in the macros file + current_macros = readMacrosFile(S) #! this will get all macros already in the macros file addMacrosIfNeeded(S) - updated_macros = readMacrosFile(S) # this will get all macros already in the macros file + updated_macros = readMacrosFile(S) #! this will get all macros already in the macros file if length(updated_macros) != length(current_macros) recompile = true @@ -112,13 +119,22 @@ function compilerFlags(S::AbstractSampling) cflags *= " -D $(macro_flag)" end - recompile = recompile || !executableExists(S.inputs.custom_code.folder) # last chance to recompile: do so if the executable does not exist + if "ADDON_ROADRUNNER" in updated_macros + librr_dir = joinpath(physicell_dir, "addons", "libRoadrunner", "roadrunner") + cflags *= " -I $(joinpath(librr_dir, "include", "rr", "C"))" + cflags *= " -L $(joinpath(librr_dir, "lib"))" + cflags *= " -l roadrunner_c_api" + + prepareLibRoadRunner(physicell_dir) + end + + recompile = recompile || !executableExists(S.inputs[:custom_code].folder) #! last chance to recompile: do so if the executable does not exist return cflags, recompile, clean end function writePhysiCellCommitHash(S::AbstractSampling) - path_to_commit_hash = joinpath(data_dir, "inputs", "custom_codes", S.inputs.custom_code.folder, "physicell_commit_hash.txt") + path_to_commit_hash = joinpath(locationPath(:custom_code, S), "physicell_commit_hash.txt") physicell_commit_hash = physiCellCommitHash() current_commit_hash = "" if isfile(path_to_commit_hash) @@ -139,17 +155,18 @@ function writePhysiCellCommitHash(S::AbstractSampling) return recompile end -executableExists(custom_code_folder::String) = isfile(joinpath(data_dir, "inputs", "custom_codes", custom_code_folder, baseToExecutable("project"))) +executableExists(custom_code_folder::String) = isfile(joinpath(locationPath(:custom_code, custom_code_folder), baseToExecutable("project"))) function addMacrosIfNeeded(S::AbstractSampling) - # else get the macros neeeded + #! else get the macros neeeded addPhysiECMIfNeeded(S) + addRoadRunnerIfNeeded(S) - # check others... + #! check others... end function addMacro(S::AbstractSampling, macro_name::String) - path_to_macros = joinpath(data_dir, "inputs", "custom_codes", S.inputs.custom_code.folder, "macros.txt") + path_to_macros = joinpath(locationPath(:custom_code, S), "macros.txt") open(path_to_macros, "a") do f println(f, macro_name) end @@ -157,37 +174,52 @@ end function addPhysiECMIfNeeded(S::AbstractSampling) if "ADDON_PHYSIECM" in readMacrosFile(S) - # if the custom codes folder for the sampling already has the macro, then we don't need to do anything + #! if the custom codes folder for the sampling already has the macro, then we don't need to do anything return end - if S.inputs.ic_ecm.id != -1 - # if this sampling is providing an ic file for ecm, then we need to add the macro + if S.inputs[:ic_ecm].id != -1 + #! if this sampling is providing an ic file for ecm, then we need to add the macro addMacro(S, "ADDON_PHYSIECM") return end - # check if ecm_setup element has enabled="true" in config files - loadConfiguration(S) + #! check if ecm_setup element has enabled="true" in config files + prepareVariedInputFolder(:config, S) if isPhysiECMInConfig(S) - # if the base config file says that the ecm is enabled, then we need to add the macro + #! if the base config file says that the ecm is enabled, then we need to add the macro addMacro(M, "ADDON_PHYSIECM") end end +function addRoadRunnerIfNeeded(S::AbstractSampling) + if "ADDON_ROADRUNNER" in readMacrosFile(S) + #! if the custom codes folder for the sampling already has the macro, then we don't need to do anything + return + end + + need_to_add = false + prepareVariedInputFolder(:config, S) + need_to_add = isRoadRunnerInInputs(S) || isRoadRunnerInConfig(S) + if need_to_add + addMacro(S, "ADDON_ROADRUNNER") + end + return +end + function isPhysiECMInConfig(M::AbstractMonad) - path_to_xml = joinpath(data_dir, "inputs", "configs", M.inputs.config.folder, "config_variations", "config_variation_$(M.variation_ids.config).xml") + path_to_xml = joinpath(locationPath(:config, M), "config_variations", "config_variation_$(M.variation_id[:config]).xml") xml_doc = openXML(path_to_xml) xml_path = ["microenvironment_setup", "ecm_setup"] ecm_setup_element = retrieveElement(xml_doc, xml_path; required=false) - physi_ecm_in_config = !isnothing(ecm_setup_element) && attribute(ecm_setup_element, "enabled") == "true" # note: attribute returns nothing if the attribute does not exist + physi_ecm_in_config = !isnothing(ecm_setup_element) && attribute(ecm_setup_element, "enabled") == "true" #! note: attribute returns nothing if the attribute does not exist closeXML(xml_doc) return physi_ecm_in_config end function isPhysiECMInConfig(sampling::Sampling) - # otherwise, no previous sampling saying to use the macro, no ic file for ecm, and the base config file does not have ecm enabled, - # now just check that the variation is not enabling the ecm + #! otherwise, no previous sampling saying to use the macro, no ic file for ecm, and the base config file does not have ecm enabled, + #! now just check that the variation is not enabling the ecm for index in eachindex(sampling.variation_ids) - monad = Monad(sampling, index) # instantiate a monad with the variation_id and the simulation ids already found + monad = Monad(sampling, index) #! instantiate a monad with the variation_id and the simulation ids already found if isPhysiECMInConfig(monad) return true end @@ -195,8 +227,85 @@ function isPhysiECMInConfig(sampling::Sampling) return false end +function isRoadRunnerInConfig(S::AbstractSampling) + path_to_xml = joinpath(locationPath(:config, S), "PhysiCell_settings.xml") + xml_doc = openXML(path_to_xml) + cell_definitions_element = retrieveElement(xml_doc, ["cell_definitions"]) + ret_val = false + for child in child_elements(cell_definitions_element) + phenotype_element = find_element(child, "phenotype") + intracellular_element = find_element(phenotype_element, "intracellular") + if isnothing(intracellular_element) + continue + end + if attribute(intracellular_element, "type") == "roadrunner" + ret_val = true + break + end + end + closeXML(xml_doc) + return ret_val +end + +function isRoadRunnerInInputs(S::AbstractSampling) + if S.inputs[:intracellular].id == -1 + return false + end + path_to_xml = joinpath(locationPath(:intracellular, S), S.inputs[:intracellular].basename) + xml_doc = openXML(path_to_xml) + is_nothing = retrieveElement(xml_doc, ["intracellulars"; "intracellular:type:roadrunner"]) |> isnothing + closeXML(xml_doc) + return !is_nothing +end + +function prepareLibRoadRunner(physicell_dir::String) + #! this is how PhysiCell handles downloading libRoadrunner + librr_file = joinpath(physicell_dir, "addons", "libRoadrunner", "roadrunner", "include", "rr", "C", "rrc_api.h") + if !isfile(librr_file) + python = Sys.iswindows() ? "python" : "python3" + cd(() -> run(pipeline(`$(python) ./beta/setup_libroadrunner.py`; stdout=devnull, stderr=devnull)), physicell_dir) + @assert isfile(librr_file) "libRoadrunner was not downloaded properly." + + #! remove the downloaded binary (I would think the script would handle this, but it does not) + files = readdir(joinpath(physicell_dir, "addons", "libRoadrunner"); join=true, sort=false) + for path_to_file in files + if isfile(path_to_file) && + ( + endswith(path_to_file, "roadrunner_macos_arm64.tar.gz") || + endswith(path_to_file, "roadrunner-osx-10.9-cp36m.tar.gz") || + endswith(path_to_file, "roadrunner-win64-vs14-cp35m.zip") || + endswith(path_to_file, "cpplibroadrunner-1.3.0-linux_x86_64.tar.gz") + ) + #! remove the downloaded binary + rm(path_to_file; force=true) + end + end + end + + if Sys.iswindows() + return + end + + env_var = Sys.isapple() ? "DYLD_LIBRARY_PATH" : "LD_LIBRARY_PATH" + env_file = (haskey(ENV, "SHELL") && contains(ENV["SHELL"], "zsh")) ? ".zshenv" : ".bashrc" + path_to_env_file = "~/$(env_file)" + path_to_add = joinpath(physicell_dir, "addons", "libRoadrunner", "roadrunner", "lib") + + if !haskey(ENV, env_var) || !contains(ENV[env_var], path_to_add) + println(""" + Warning: Shell environment variable $(env_var) either not found or does not include the path to an installation of libRoadrunner. + For now, we will add this path to your ENV variable in this Julia session. + Run this command in your terminal to add it to your $(env_file) as a relative path and this should be resolved permanently: + + echo "export $env_var=$env_var:./addons/libRoadrunner/roadrunner/lib" > $(path_to_env_file) + + """) + ENV[env_var] = ":./addons/libRoadrunner/roadrunner/lib" + end +end + function readMacrosFile(S::AbstractSampling) - path_to_macros = joinpath(data_dir, "inputs", "custom_codes", S.inputs.custom_code.folder, "macros.txt") + path_to_macros = joinpath(locationPath(:custom_code, S), "macros.txt") if !isfile(path_to_macros) return [] end diff --git a/src/VCTComponents.jl b/src/VCTComponents.jl new file mode 100644 index 00000000..5fcf5fb9 --- /dev/null +++ b/src/VCTComponents.jl @@ -0,0 +1,237 @@ +using LightXML, TOML, AutoHashEquals + +export assembleIntracellular!, PhysiCellComponent + +""" + PhysiCellComponent + +A struct to hold the information about a component that is used to assemble an input of PhysiCell. + +The `type` and `name` are the only fields that are compared for equality. +The `type` represents the type of component that it is. +Currently, only "roadrunner" is supported. +The `name` is the name of the file inside the `components/type/` directory. +The `path_from_components` is the path from the components directory to the file. +The `id` is the id of the component, which will be -1 to indicate it is not yet set. +The `id` is used to link which cell definition(s) use which component(s). +""" +@auto_hash_equals fields = (type, name) struct PhysiCellComponent #! only compare the name and type for equality + type::String #! type of the file (currently going to be "roadrunner", "dfba", or "maboss") + name::String #! name of the file + path_from_components::String #! path from the components directory to the file + id::Int #! id of the component (will be -1 to indicate it is not yet known) + + function PhysiCellComponent(type::String, name::String) + return new(type, name, joinpath(type, name), -1) + end + + function PhysiCellComponent(name::String, type::String, path_from_components::String) + new(type, name, path_from_components, -1) + end + + function PhysiCellComponent(component::PhysiCellComponent, id::Int) + new(component.type, component.name, component.path_from_components, id) + end +end + +function assembleIntracellular!(cell_to_components_dict::Dict{String,<:Union{PhysiCellComponent,Vector{PhysiCellComponent}}}; kwargs...) + cell_to_vec_components_dict = Dict{String,Vector{PhysiCellComponent}}() + for (cell_type, components) in cell_to_components_dict + if components isa PhysiCellComponent + cell_to_vec_components_dict[cell_type] = [components] + else + cell_to_vec_components_dict[cell_type] = components + end + end + return assembleIntracellular!(cell_to_vec_components_dict; kwargs...) +end + +function assembleIntracellular!(cell_to_components_dict::Dict{String,Vector{PhysiCellComponent}}; name::String="assembled") + #! get all components to assign IDs + unique_components = PhysiCellComponent[] + for components in values(cell_to_components_dict) + for component in components + if component in unique_components + continue + end + push!(unique_components, component) + end + end + temp_ids = Dict{PhysiCellComponent,Int}() + for (i, c) in enumerate(unique_components) + temp_ids[c] = i + end + + #! create the assembly record first to compare and then to save (if assembling now) + assembly_manifest = Dict{String,Dict{String,Any}}() + assembly_manifest["cell_definitions"] = Dict{String,Any}() + assembly_manifest["intracellulars"] = Dict{String,Any}() + for (cell_type, components) in cell_to_components_dict + if isempty(components) + continue + end + assembly_manifest["cell_definitions"][cell_type] = Int[] + for component in components + id_str = string(temp_ids[component]) + push!(assembly_manifest["cell_definitions"][cell_type], temp_ids[component]) + assembly_manifest["intracellulars"][id_str] = Dict{String,Any}() + assembly_manifest["intracellulars"][id_str]["type"] = component.type + assembly_manifest["intracellulars"][id_str]["name"] = component.name + end + end + + #! compare against previously-assembled intracellulars + path_to_folder = getIntracellularFolder(assembly_manifest) + if !isnothing(path_to_folder) + updateIntracellularComponentIDs!(cell_to_components_dict, path_to_folder) + return splitpath(path_to_folder)[end] + end + + #! pick a folder name, adding a number if it already exists + path_to_folders = locationPath(:intracellular) + folder = name + n = 0 + while isdir(joinpath(path_to_folders, folder)) + n += 1 + folder = "$(name)_$(n)" + end + path_to_folder = joinpath(path_to_folders, folder) + mkdir(path_to_folder) + + #! since we're here and creating a new folder, it is possible that prevoiusly defined ids could conflict, so let's no rely on them. + for (cell_type, components) in cell_to_components_dict + updated_components = PhysiCellComponent[] + for component in components + push!(updated_components, PhysiCellComponent(component, temp_ids[component])) + end + cell_to_components_dict[cell_type] = updated_components + end + + xml_doc = XMLDocument() + xml_root = create_root(xml_doc, "PhysiCell_intracellular_mappings") + + #! create cell definitions element + e_cell_definitions = new_child(xml_root, "cell_definitions") + for (cell_type, components) in cell_to_components_dict + e_cell_definition = new_child(e_cell_definitions, "cell_definition") + set_attribute(e_cell_definition, "name", cell_type) + e_intracellular_ids = new_child(e_cell_definition, "intracellular_ids") + for component in components + e_intracellular_id = new_child(e_intracellular_ids, "ID") + set_content(e_intracellular_id, string(component.id)) + end + end + + #! create intracellulars element + e_intracellulars = new_child(xml_root, "intracellulars") + for (component, i) in temp_ids + e_intracellular = new_child(e_intracellulars, "intracellular") + set_attribute(e_intracellular, "ID", string(i)) + set_attribute(e_intracellular, "type", component.type) + + path_to_component_xml = joinpath(data_dir, "components", component.path_from_components) + component_xml_doc = openXML(path_to_component_xml) + component_xml_root = root(component_xml_doc) + add_child(e_intracellular, component_xml_root) + closeXML(component_xml_doc) + end + + save_file(xml_doc, joinpath(path_to_folder, "intracellular.xml")) + closeXML(xml_doc) + + #! record the assembly of the document + open(joinpath(path_to_folder, "assembly.toml"), "w") do io + TOML.print(io, assembly_manifest) + end + + #! make sure the database is updated, variations.db intialized + if initialized + insertFolder(:intracellular, splitpath(folder)[end]) + end + + #! return just the folder name + return folder +end + +function getIntracellularFolder(assembly_manifest::Dict) + path_to_location_folders = locationPath(:intracellular) + + for folder in readdir(path_to_location_folders; join=true, sort=false) + #! only look in folders that have an assembly.toml file + if !isdir(folder) || !isfile(joinpath(folder, "assembly.toml")) + continue + end + previous_assembly_manifest = TOML.parsefile(joinpath(folder, "assembly.toml")) + if intracellularAssemblyManifestsEquivalent(previous_assembly_manifest, assembly_manifest) + return folder + end + end + return +end + +function intracellularAssemblyManifestsEquivalent(A::Dict, B::Dict) + + function _get_cell_to_components_dict(d::Dict) + cell_to_components_dict = Dict{String,Vector{PhysiCellComponent}}() + for (cell_type, ids) in d["cell_definitions"] + cell_to_components_dict[cell_type] = PhysiCellComponent[] + for id in string.(ids) + component = PhysiCellComponent(d["intracellulars"][id]["type"], d["intracellulars"][id]["name"]) #! all will have PhysiCellComponent id -1 + @assert !(component in cell_to_components_dict[cell_type]) "Duplicate component in cell type $cell_type: $component" + push!(cell_to_components_dict[cell_type], component) + end + end + return cell_to_components_dict + end + + dict_assembly_A = _get_cell_to_components_dict(A) + dict_assembly_B = _get_cell_to_components_dict(B) + all_cell_types = Set(union(keys(dict_assembly_A), keys(dict_assembly_B))) + + for cell_type in all_cell_types + #! first check if the cell type actually has components in A or B + has_intracellular_A = haskey(dict_assembly_A, cell_type) && !isempty(dict_assembly_A[cell_type]) + has_intracellular_B = haskey(dict_assembly_B, cell_type) && !isempty(dict_assembly_B[cell_type]) + if !has_intracellular_A && !has_intracellular_B + continue + end + + #! if one has the cell type and the other does not, then these are not the same (⊻ = XOR) + if (has_intracellular_A ⊻ has_intracellular_B) + return false + end + + #! otherwise, both have it, check that these are the same + if Set{PhysiCellComponent}(dict_assembly_A[cell_type]) != Set{PhysiCellComponent}(dict_assembly_B[cell_type]) + return false + end + end + return true +end + +function updateIntracellularComponentIDs!(cell_to_components_dict::Dict{String,Vector{PhysiCellComponent}}, path_to_folder::String) + path_to_file = joinpath(path_to_folder, "assembly.toml") + @assert isfile(path_to_file) "Assembly file does not exist: $path_to_file" + assembly_manifest = TOML.parsefile(path_to_file) + + for (cell_type, components) in cell_to_components_dict + if isempty(components) + continue + end + new_components = PhysiCellComponent[] + for component in components + component_id = findComponentID(assembly_manifest, component) + push!(new_components, PhysiCellComponent(component, parse(Int, component_id))) + end + cell_to_components_dict[cell_type] = new_components + end +end + +function findComponentID(assembly_manifest::Dict, component::PhysiCellComponent) + for (id, component_dict) in assembly_manifest["intracellulars"] + if component_dict["name"] == component.name && component_dict["type"] == component.type + return id + end + end + @assert false "Component not found in assembly manifest: $component" +end \ No newline at end of file diff --git a/src/VCTConfiguration.jl b/src/VCTConfiguration.jl index d7ba385c..c5bd348f 100644 --- a/src/VCTConfiguration.jl +++ b/src/VCTConfiguration.jl @@ -29,7 +29,7 @@ function retrieveElement(xml_doc::XMLDocument, xml_path::Vector{<:AbstractString end continue end - # Deal with checking attributes + #! Deal with checking attributes current_element = getChildByAttribute(current_element, split(path_element, ":")) if isnothing(current_element) required ? retrieveElementError(xml_path, path_element) : return nothing @@ -44,7 +44,7 @@ function retrieveElementError(xml_path::Vector{<:AbstractString}, path_element:: throw(ArgumentError(error_msg)) end -function getField(xml_doc::XMLDocument, xml_path::Vector{<:AbstractString}; required::Bool=true) +function getContent(xml_doc::XMLDocument, xml_path::Vector{<:AbstractString}; required::Bool=true) return retrieveElement(xml_doc, xml_path; required=required) |> content end @@ -83,7 +83,7 @@ function makeXMLPath(xml_doc::XMLDocument, xml_path::Vector{<:AbstractString}) end continue end - # Deal with checking attributes + #! Deal with checking attributes path_element_split = split(path_element, ":") child_element = getChildByAttribute(current_element, path_element_split) if isnothing(child_element) @@ -98,190 +98,130 @@ end ################## Configuration Functions ################## -function loadConfiguration(M::AbstractMonad) - path_to_xml = joinpath(data_dir, "inputs", "configs", M.inputs.config.folder, "config_variations", "config_variation_$(M.variation_ids.config).xml") +""" + createXMLFile(location::Symbol, M::AbstractMonad) + +Create XML file for the given location and variation_id in the given monad. + +The file is placed in `\$(location)_variations` and can be accessed from there to run the simulation(s). +""" +function createXMLFile(location::Symbol, M::AbstractMonad) + path_to_folder = locationPath(location, M) + path_to_xml = joinpath(path_to_folder, variationsTableName(location), "$(location)_variation_$(M.variation_id[location]).xml") if isfile(path_to_xml) - return + return path_to_xml end mkpath(dirname(path_to_xml)) - path_to_xml_src = joinpath(data_dir, "inputs", "configs", M.inputs.config.folder, "PhysiCell_settings.xml") - cp(path_to_xml_src, path_to_xml, force=true) - - xml_doc = openXML(path_to_xml) - query = constructSelectQuery("config_variations", "WHERE config_variation_id=$(M.variation_ids.config);") - variation_row = queryToDataFrame(query; db=configDB(M.inputs.config.folder), is_row=true) - for column_name in names(variation_row) - if column_name == "config_variation_id" - continue - end - xml_path = columnNameToXMLPath(column_name) - updateField(xml_doc, xml_path, variation_row[1, column_name]) - end - save_file(xml_doc, path_to_xml) - closeXML(xml_doc) - return -end -function loadConfiguration(sampling::Sampling) - for index in eachindex(sampling.variation_ids) - monad = Monad(sampling, index) # instantiate a monad with the variation_id and the simulation ids already found - loadConfiguration(monad) - end -end + path_to_base_xml = prepareBaseFile(M.inputs[location]) + @assert endswith(path_to_base_xml, ".xml") "Base XML file for $(location) must end with .xml. Got $(path_to_base_xml)" + @assert isfile(path_to_base_xml) "Base XML file not found: $(path_to_base_xml)" -function loadRulesets(M::AbstractMonad) - if M.variation_ids.rulesets_collection == -1 # no rules being used - return - end - path_to_rulesets_collections_folder = joinpath(data_dir, "inputs", "rulesets_collections", M.inputs.rulesets_collection.folder) - path_to_rulesets_xml = joinpath(path_to_rulesets_collections_folder, "rulesets_collections_variations", "rulesets_variation_$(M.variation_ids.rulesets_collection).xml") - if isfile(path_to_rulesets_xml) # already have the rulesets_collection variation created - return - end - mkpath(dirname(path_to_rulesets_xml)) # ensure the directory exists - - # create xml file using LightXML - path_to_base_xml = joinpath(path_to_rulesets_collections_folder, "base_rulesets.xml") - if !isfile(path_to_base_xml) - # this could happen if the rules are not being varied (so no call to addRulesetsVariationsColumns) and then a sim runs without the base_rulesets.xml being created yet - writeRules(path_to_base_xml, joinpath(path_to_rulesets_collections_folder, "base_rulesets.csv")) - end - - xml_doc = parse_file(path_to_base_xml) - if M.variation_ids.rulesets_collection != 0 # only update if not using the base variation for the ruleset - query = constructSelectQuery("rulesets_collection_variations", "WHERE rulesets_collection_variation_id=$(M.variation_ids.rulesets_collection);") - variation_row = queryToDataFrame(query; db=rulesetsCollectionDB(M), is_row=true) + xml_doc = openXML(path_to_base_xml) + if M.variation_id[location] != 0 #! only update if not using the base variation for the location + query = constructSelectQuery(variationsTableName(location), "WHERE $(locationVarIDName(location))=$(M.variation_id[location])") + variation_row = queryToDataFrame(query; db=variationsDatabase(location, M), is_row=true) for column_name in names(variation_row) - if column_name == "rulesets_collection_variation_id" + if column_name == locationVarIDName(location) continue end xml_path = columnNameToXMLPath(column_name) updateField(xml_doc, xml_path, variation_row[1, column_name]) end end - save_file(xml_doc, path_to_rulesets_xml) + save_file(xml_doc, path_to_xml) closeXML(xml_doc) return end -function loadICCells(M::AbstractMonad) - if M.inputs.ic_cell.id == -1 # no ic cells being used - return +function prepareBaseFile(input_folder::InputFolder) + if input_folder.location == :rulesets_collection + return prepareBaseRulesetsCollectionFile(input_folder) end - path_to_ic_cells_folder = joinpath(data_dir, "inputs", "ics", "cells", M.inputs.ic_cell.folder) - if isfile(joinpath(path_to_ic_cells_folder, "cells.csv")) # ic already given by cells.csv - return + return joinpath(locationPath(input_folder), input_folder.basename) +end + +function prepareBaseRulesetsCollectionFile(input_folder::InputFolder) + path_to_rulesets_collection_folder = locationPath(:rulesets_collection, input_folder.folder) + path_to_base_xml = joinpath(path_to_rulesets_collection_folder, "base_rulesets.xml") + if !isfile(path_to_base_xml) + #! this could happen if the rules are not being varied (so no call to addRulesetsVariationsColumns) and then a sim runs without the base_rulesets.xml being created yet + writeRules(path_to_base_xml, joinpath(path_to_rulesets_collection_folder, "base_rulesets.csv")) end - path_to_ic_cells_xml = joinpath(path_to_ic_cells_folder, "ic_cell_variations", "ic_cell_variation_$(M.variation_ids.ic_cell).xml") - if isfile(path_to_ic_cells_xml) # already have the ic cell variation created + return path_to_base_xml +end + +function prepareVariedInputFolder(location::Symbol, M::AbstractMonad) + if !M.inputs[location].varied #! this input is not being varied (either unused or static) return end - mkpath(dirname(path_to_ic_cells_xml)) + createXMLFile(location, M) +end - path_to_base_xml = joinpath(path_to_ic_cells_folder, "cells.xml") - xml_doc = parse_file(path_to_base_xml) - if M.variation_ids.ic_cell != 0 # only update if not using the base variation for the ic cells - query = constructSelectQuery("ic_cell_variations", "WHERE ic_cell_variation_id=$(M.variation_ids.ic_cell);") - variation_row = queryToDataFrame(query; db=icCellDB(M.inputs.ic_cell.folder), is_row=true) - for column_name in names(variation_row) - if column_name == "ic_cell_variation_id" - continue - end - xml_path = columnNameToXMLPath(column_name) - updateField(xml_doc, xml_path, variation_row[1, column_name]) - end +function prepareVariedInputFolder(location::Symbol, sampling::Sampling) + if !sampling.inputs[location].varied #! this input is not being varied (either unused or static) + return + end + for index in eachindex(sampling.variation_ids) + monad = Monad(sampling, index) #! instantiate a monad with the variation_id and the simulation ids already found + prepareVariedInputFolder(location, monad) end - save_file(xml_doc, path_to_ic_cells_xml) - closeXML(xml_doc) - return end function pathToICCell(simulation::Simulation) - @assert simulation.inputs.ic_cell.id != -1 "No IC cell variation being used" # we should have already checked this before calling this function - path_to_ic_cell_folder = joinpath(data_dir, "inputs", "ics", "cells", simulation.inputs.ic_cell.folder) - if isfile(joinpath(path_to_ic_cell_folder, "cells.csv")) # ic already given by cells.csv + @assert simulation.inputs[:ic_cell].id != -1 "No IC cell variation being used" #! we should have already checked this before calling this function + path_to_ic_cell_folder = locationPath(:ic_cell, simulation) + if isfile(joinpath(path_to_ic_cell_folder, "cells.csv")) #! ic already given by cells.csv return joinpath(path_to_ic_cell_folder, "cells.csv") end - path_to_config_xml = joinpath(data_dir, "inputs", "configs", simulation.inputs.config.folder, "config_variations", "config_variation_$(simulation.variation_ids.config).xml") + path_to_config_xml = joinpath(locationPath(:config, simulation), "config_variations", "config_variation_$(simulation.variation_id[:config]).xml") xml_doc = openXML(path_to_config_xml) domain_dict = Dict{String,Float64}() for d in ["x", "y", "z"] for side in ["min", "max"] key = "$(d)_$(side)" xml_path = ["domain"; key] - domain_dict[key] = getField(xml_doc, xml_path) |> x -> parse(Float64, x) + domain_dict[key] = getContent(xml_doc, xml_path) |> x -> parse(Float64, x) end end closeXML(xml_doc) path_to_ic_cell_variations = joinpath(path_to_ic_cell_folder, "ic_cell_variations") - path_to_ic_cell_xml = joinpath(path_to_ic_cell_variations, "ic_cell_variation_$(simulation.variation_ids.ic_cell).xml") - path_to_ic_cell_file = joinpath(path_to_ic_cell_variations, "ic_cell_variation_$(simulation.variation_ids.ic_cell)_s$(simulation.id).csv") + path_to_ic_cell_xml = joinpath(path_to_ic_cell_variations, "ic_cell_variation_$(simulation.variation_id[:ic_cell]).xml") + path_to_ic_cell_file = joinpath(path_to_ic_cell_variations, "ic_cell_variation_$(simulation.variation_id[:ic_cell])_s$(simulation.id).csv") generateICCell(path_to_ic_cell_xml, path_to_ic_cell_file, domain_dict) return path_to_ic_cell_file end -function loadICECM(M::AbstractMonad) - if M.inputs.ic_ecm.id == -1 # no ic ecm being used - return - end - path_to_ic_ecm_folder = joinpath(data_dir, "inputs", "ics", "ecms", M.inputs.ic_ecm.folder) - if isfile(joinpath(path_to_ic_ecm_folder, "ecm.csv")) # ic already given by ecm.csv - return - end - path_to_ic_ecm_xml = joinpath(path_to_ic_ecm_folder, "ic_ecm_variations", "ic_ecm_variation_$(M.variation_ids.ic_ecm).xml") - if isfile(path_to_ic_ecm_xml) # already have the ic ecm variation created - return - end - mkpath(dirname(path_to_ic_ecm_xml)) - - path_to_base_xml = joinpath(path_to_ic_ecm_folder, "ecm.xml") - xml_doc = parse_file(path_to_base_xml) - if M.variation_ids.ic_ecm != 0 # only update if not using the base variation for the ic ecm - query = constructSelectQuery("ic_ecm_variations", "WHERE ic_ecm_variation_id=$(M.variation_ids.ic_ecm);") - variation_row = queryToDataFrame(query; db=icECMDB(M.inputs.ic_ecm.folder), is_row=true) - for column_name in names(variation_row) - if column_name == "ic_ecm_variation_id" - continue - end - xml_path = columnNameToXMLPath(column_name) - updateField(xml_doc, xml_path, variation_row[1, column_name]) - end - end - save_file(xml_doc, path_to_ic_ecm_xml) - closeXML(xml_doc) - return -end - function pathToICECM(simulation::Simulation) - @assert simulation.inputs.ic_ecm.id != -1 "No IC ecm variation being used" # we should have already checked this before calling this function - path_to_ic_ecm_folder = joinpath(data_dir, "inputs", "ics", "ecms", simulation.inputs.ic_ecm.folder) - if isfile(joinpath(path_to_ic_ecm_folder, "ecm.csv")) # ic already given by ecm.csv + @assert simulation.inputs[:ic_ecm].id != -1 "No IC ecm variation being used" #! we should have already checked this before calling this function + path_to_ic_ecm_folder = locationPath(:ic_ecm, simulation) + if isfile(joinpath(path_to_ic_ecm_folder, "ecm.csv")) #! ic already given by ecm.csv return joinpath(path_to_ic_ecm_folder, "ecm.csv") end - path_to_config_xml = joinpath(data_dir, "inputs", "configs", simulation.inputs.config.folder, "config_variations", "config_variation_$(simulation.variation_ids.config).xml") + path_to_config_xml = joinpath(locationPath(:config, simulation), "config_variations", "config_variation_$(simulation.variation_id[:config]).xml") xml_doc = openXML(path_to_config_xml) config_dict = Dict{String,Float64}() - for d in ["x", "y"] # does not (yet?) support 3D + for d in ["x", "y"] #! does not (yet?) support 3D for side in ["min", "max"] key = "$(d)_$(side)" xml_path = ["domain"; key] - config_dict[key] = getField(xml_doc, xml_path) |> x -> parse(Float64, x) + config_dict[key] = getContent(xml_doc, xml_path) |> x -> parse(Float64, x) end - key = "d$(d)" # d$(d) looks funny but it's just dx and dy + key = "d$(d)" #! d$(d) looks funny but it's just dx and dy xml_path = ["domain"; key] - config_dict[key] = getField(xml_doc, xml_path) |> x -> parse(Float64, x) + config_dict[key] = getContent(xml_doc, xml_path) |> x -> parse(Float64, x) end closeXML(xml_doc) path_to_ic_ecm_variations = joinpath(path_to_ic_ecm_folder, "ic_ecm_variations") - path_to_ic_ecm_xml = joinpath(path_to_ic_ecm_variations, "ic_ecm_variation_$(simulation.variation_ids.ic_ecm).xml") - path_to_ic_ecm_file = joinpath(path_to_ic_ecm_variations, "ic_ecm_variation_$(simulation.variation_ids.ic_ecm)_s$(simulation.id).csv") + path_to_ic_ecm_xml = joinpath(path_to_ic_ecm_variations, "ic_ecm_variation_$(simulation.variation_id[:ic_ecm]).xml") + path_to_ic_ecm_file = joinpath(path_to_ic_ecm_variations, "ic_ecm_variation_$(simulation.variation_id[:ic_ecm])_s$(simulation.id).csv") generateICECM(path_to_ic_ecm_xml, path_to_ic_ecm_file, config_dict) return path_to_ic_ecm_file end ################## XML Path Helper Functions ################## -# can I define my own macro that takes all these functions and adds methods for FN(cell_def, node::String) and FN(cell_def, path_suffix::Vector{String})?? +#! can I define my own macro that takes all these functions and adds methods for FN(cell_def, node::String) and FN(cell_def, path_suffix::Vector{String})?? function cellDefinitionPath(cell_definition::String)::Vector{String} return ["cell_definitions", "cell_definition:name:$(cell_definition)"] end @@ -320,9 +260,47 @@ end ################## Simplify Name Functions ################## -function simpleConfigVariationNames(name::String) +function shortLocationVariationID(fieldname::Symbol) + if fieldname == :config + return :ConfigVarID + elseif fieldname == :rulesets_collection + return :RulesVarID + elseif fieldname == :intracellular + return :IntraVarID + elseif fieldname == :ic_cell + return :ICCellVarID + elseif fieldname == :ic_ecm + return :ICECMVarID + else + throw(ArgumentError("Got fieldname $(fieldname). However, it must be 'config', 'rulesets_collection', 'intracellular', 'ic_cell', or 'ic_ecm'.")) + end +end + +shortLocationVariationID(fieldname::String) = shortLocationVariationID(Symbol(fieldname)) + +function shortLocationVariationID(type::Type, fieldname::Union{String, Symbol}) + return type(shortLocationVariationID(fieldname)) +end + +function shortVariationName(location::Symbol, name::String) + if location == :config + return shortConfigVariationName(name) + elseif location == :rulesets_collection + return shortRulesetsVariationName(name) + elseif location == :intracellular + return shortIntracellularVariationName(name) + elseif location == :ic_cell + return shortICCellVariationName(name) + elseif location == :ic_ecm + return shortICECMVariationName(name) + else + throw(ArgumentError("location must be 'config', 'rulesets_collection', 'intracellular', 'ic_cell', or 'ic_ecm'.")) + end +end + +function shortConfigVariationName(name::String) if name == "config_variation_id" - return "ConfigVarID" + return shortLocationVariationID(String, "config") elseif name == "overall/max_time" return "Max Time" elseif name == "save/full_data/interval" @@ -336,9 +314,9 @@ function simpleConfigVariationNames(name::String) end end -function simpleRulesetsVariationNames(name::String) +function shortRulesetsVariationName(name::String) if name == "rulesets_collection_variation_id" - return "RulesVarID" + return shortLocationVariationID(String, "rulesets_collection") elseif startswith(name, "hypothesis_ruleset") return getRuleParameterName(name) else @@ -346,9 +324,17 @@ function simpleRulesetsVariationNames(name::String) end end -function simpleICCellVariationNames(name::String) +function shortIntracellularVariationName(name::String) + if name == "intracellular_variation_id" + return shortLocationVariationID(String, "intracellular") + else + return name + end +end + +function shortICCellVariationName(name::String) if name == "ic_cell_variation_id" - return "ICCellVarID" + return shortLocationVariationID(String, "ic_cell") elseif startswith(name, "cell_patches") return getICCellParameterName(name) else @@ -356,9 +342,9 @@ function simpleICCellVariationNames(name::String) end end -function simpleICECMVariationNames(name::String) +function shortICECMVariationName(name::String) if name == "ic_ecm_variation_id" - return "ICECMVarID" + return shortLocationVariationID(String, "ic_ecm") elseif startswith(name, "layer") return getICECMParameterName(name) else diff --git a/src/VCTCreation.jl b/src/VCTCreation.jl index 13fa01ab..32a5cb31 100644 --- a/src/VCTCreation.jl +++ b/src/VCTCreation.jl @@ -32,12 +32,13 @@ function createProject(project_dir::String="."; clone_physicell::Bool=true, temp data_dir = joinpath(project_dir, "data") setUpInputs(data_dir, physicell_dir, template_as_default) + setUpComponents(data_dir, physicell_dir) setUpVCT(project_dir, physicell_dir, data_dir, template_as_default, terse) end function getLatestReleaseTag(repo_url::String) api_url = replace(repo_url, "github.com" => "api.github.com/repos") * "/releases/latest" - # include this header for CI testing to not exceed request limit (I think?): macos for some reason raised a `RequestError: HTTP/2 403`; users should not need to set this ENV variable + #! include this header for CI testing to not exceed request limit (I think?): macos for some reason raised a `RequestError: HTTP/2 403`; users should not need to set this ENV variable headers = haskey(ENV, "PCVCT_PUBLIC_REPO_AUTH") ? Dict("Authorization" => "token $(ENV["PCVCT_PUBLIC_REPO_AUTH"])") : Pair{String,String}[] response = Downloads.download(api_url; headers=headers) release_info = JSON3.read(response, Dict{String, Any}) @@ -63,7 +64,7 @@ function setUpPhysiCell(project_dir::String, clone_physicell::Bool) run(`git clone --branch $latest_tag --depth 1 https://github.com/drbergman/PhysiCell $(physicell_dir)`) end else - # download drbergman/Pysicell main branch + #! download drbergman/Pysicell main branch println("Downloading PhysiCell repository") url = "https://api.github.com/repos/drbergman/PhysiCell/releases/latest" headers = haskey(ENV, "PCVCT_PUBLIC_REPO_AUTH") ? Dict("Authorization" => "token $(ENV["PCVCT_PUBLIC_REPO_AUTH"])") : Pair{String,String}[] @@ -83,11 +84,25 @@ function setUpPhysiCell(project_dir::String, clone_physicell::Bool) return physicell_dir end +function setUpComponents(data_dir::String, physicell_dir::String) + components_dir = joinpath(data_dir, "components") + mkpath(components_dir) + + #! make sbml roadrunner components and populate with an example sbml for a roadrunner model + roadrunner_components_dir = joinpath(components_dir, "roadrunner") + mkpath(roadrunner_components_dir) + cp(joinpath(physicell_dir, "sample_projects_intracellular", "ode", "ode_energy", "config", "Toy_Metabolic_Model.xml"), joinpath(roadrunner_components_dir, "Toy_Metabolic_Model.xml"); force=true) +end + function setUpInputs(data_dir::String, physicell_dir::String, template_as_default::Bool) if isdir(data_dir) println("Data directory already exists ($(data_dir)). Skipping setup of data directory.") return end + + mkpath(data_dir) + createInputsTOMLTemplate(joinpath(data_dir, "inputs.toml")) + inputs_dir = joinpath(data_dir, "inputs") mkpath(inputs_dir) @@ -97,6 +112,7 @@ function setUpInputs(data_dir::String, physicell_dir::String, template_as_defaul mkpath(joinpath(inputs_dir, "ics", ic)) end mkpath(joinpath(inputs_dir, "rulesets_collections")) + mkpath(joinpath(inputs_dir, "intracellulars")) if template_as_default setUpTemplate(physicell_dir, inputs_dir) @@ -115,6 +131,20 @@ function setUpRequiredFolders(path_to_template::String, inputs_dir::String, fold cp(joinpath(path_to_template, "Makefile"), joinpath(custom_codes_folder, "Makefile")) end +function icFilename(table_name::String) + if table_name == "cells" + return "cells.csv" + elseif table_name == "substrates" + return "substrates.csv" + elseif table_name == "ecms" + return "ecm.csv" + elseif table_name == "dcs" + return "dcs.csv" + else + throw(ArgumentError("table_name must be 'cells', 'substrates', 'ecms', or `dcs`.")) + end +end + function setUpICFolder(path_to_template::String, inputs_dir::String, ic_name::String, folder::String) ic_folder = joinpath(inputs_dir, "ics", ic_name, folder) mkpath(ic_folder) @@ -130,13 +160,13 @@ function setUpTemplate(physicell_dir::String, inputs_dir::String) rulesets_collection_folder = joinpath(inputs_dir, "rulesets_collections", "0_template") mkpath(rulesets_collection_folder) open(joinpath(rulesets_collection_folder, "base_rulesets.csv"), "w") do f - write(f, "default,pressure,decreases,cycle entry,0.0,0.5,4,0") # actually add a rule for example's sake + write(f, "default,pressure,decreases,cycle entry,0.0,0.5,4,0") #! actually add a rule for example's sake end setUpICFolder(path_to_template, inputs_dir, "cells", "0_template") setUpICFolder(path_to_template, inputs_dir, "substrates", "0_template") - # also set up a ic cell folder using the xml-based version + #! also set up a ic cell folder using the xml-based version pcvct.createICCellXMLTemplate(joinpath(inputs_dir, "ics", "cells", "1_xml")) end @@ -165,14 +195,16 @@ function setUpVCT(project_dir::String, physicell_dir::String, data_dir::String, tersify(s::String) = (terse ? "" : s) generate_data_lines = """ using pcvct - initializeVCT(\"$(abspath(physicell_dir))\", \"$(abspath(data_dir))\") - + initializeModelManager() # this works if launching from the project directory, i.e. the directory containing the VCT folder + # initializeModelManager(\"$(abspath(physicell_dir))\", \"$(abspath(data_dir))\") # use this if not calling this from the project directory + ############ set up ############ config_folder = $(config_folder) - rulesets_collection_folder = $(rulesets_collection_folder) custom_code_folder = $(custom_code_folder) - + rulesets_collection_folder = $(rulesets_collection_folder) + intracellular_folder = \"\" # optionally add this folder with intracellular.xml to $(joinpath(path_to_ics, "intracellulars")) + ic_cell_folder = $(ic_cell_folder) ic_substrate_folder = \"\" # optionally add this folder with substrates.csv to $(joinpath(path_to_ics, "substrates")) ic_ecm_folder = \"\" # optionally add this folder with ecms.csv to $(joinpath(path_to_ics, "ecms")) @@ -183,6 +215,7 @@ function setUpVCT(project_dir::String, physicell_dir::String, data_dir::String, """))\ inputs = InputFolders(config_folder, custom_code_folder; rulesets_collection=rulesets_collection_folder, + intracellular=intracellular_folder, ic_cell=ic_cell_folder, ic_substrate=ic_substrate_folder, ic_ecm=ic_ecm_folder, @@ -229,7 +262,7 @@ function setUpVCT(project_dir::String, physicell_dir::String, data_dir::String, $(tersify(""" # assume you have the template project with \"default\" as a cell type... # ...let's vary their cycle durations and apoptosis rates - + # get the xml path to duration of phase 0 of the default cell type # this is a list of strings in which each string is either... # \t1) the name of a tag in the xml file OR @@ -272,7 +305,7 @@ function setUpVCT(project_dir::String, physicell_dir::String, data_dir::String, $(tersify(""" # you can change this default behavior on your machine by setting an environment variable... # called PCVCT_NUM_PARALLEL_SIMS - # this is read during `initializeVCT`... + # this is read during `initializeModelManager`... # meaning subsequent calls to `setNumberOfParallelSims` will overwrite the value # A simple way to use this when running the script is to run in your shell: # `PCVCT_NUM_PARALLEL_SIMS=4 julia $(path_to_generate_data)` @@ -289,9 +322,9 @@ function setUpVCT(project_dir::String, physicell_dir::String, data_dir::String, """))\ """ - # Remove leading whitespace + #! Remove leading whitespace generate_data_lines = join(map(x -> lstrip(c->c==' ', x), split(generate_data_lines, '\n')), '\n') - + open(path_to_generate_data, "w") do f write(f, generate_data_lines) end diff --git a/src/VCTDatabase.jl b/src/VCTDatabase.jl index 3f959cd6..e5b33fe5 100644 --- a/src/VCTDatabase.jl +++ b/src/VCTDatabase.jl @@ -1,4 +1,4 @@ -export printSimulationsTable, printVariationsTable, simulationsTable +export printSimulationsTable, simulationsTable db::SQLite.DB = SQLite.DB() @@ -6,7 +6,7 @@ db::SQLite.DB = SQLite.DB() function initializeDatabase(path_to_database::String; auto_upgrade::Bool=false) if db.file == ":memory:" || abspath(db.file) != abspath(path_to_database) - println(rpad("Path to database:", 20, ' ') * path_to_database) + println(rpad("Path to database:", 25, ' ') * path_to_database) end is_new_db = !isfile(path_to_database) global db = SQLite.DB(path_to_database) @@ -34,7 +34,7 @@ function reinitializeDatabase() return end global initialized = false - if db.file == ":memory:" # if the database is in memory, re-initialize it + if db.file == ":memory:" #! if the database is in memory, re-initialize it initializeDatabase() else initializeDatabase(db.file; auto_upgrade=true) @@ -42,168 +42,60 @@ function reinitializeDatabase() end function createSchema(is_new_db::Bool; auto_upgrade::Bool=false) - # make sure necessary directories are present - data_dir_contents = readdir(joinpath(data_dir, "inputs"); sort=false) - if !necessaryInputsPresent(data_dir_contents) + #! make sure necessary directories are present + if !necessaryInputsPresent() return false end - # start with pcvct version info + #! start with pcvct version info if !resolvePCVCTVersion(is_new_db, auto_upgrade) println("Could not successfully upgrade database. Please check the logs for more information.") return false end - # initialize and populate physicell_versions table + #! initialize and populate physicell_versions table createPCVCTTable("physicell_versions", physicellVersionsSchema()) global current_physicell_version_id = physicellVersionID() - # initialize and populate custom_codes table - custom_codes_schema = """ - custom_code_id INTEGER PRIMARY KEY, - folder_name UNIQUE, - description TEXT - """ - createPCVCTTable("custom_codes", custom_codes_schema) - - custom_codes_folders = readdir(joinpath(data_dir, "inputs", "custom_codes"); sort=false) |> filter(x->isdir(joinpath(data_dir, "inputs", "custom_codes", x))) - if isempty(custom_codes_folders) - println("No folders in $(joinpath(data_dir, "inputs", "custom_codes")) found. Add custom_modules, main.cpp, and Makefile to a folder here to move forward.") - return false - end - for custom_codes_folder in custom_codes_folders - DBInterface.execute(db, "INSERT OR IGNORE INTO custom_codes (folder_name) VALUES ('$(custom_codes_folder)');") - end - - # initialize and populate ics tables - createICTable("cells"; data_dir_contents=data_dir_contents) - createICTable("substrates"; data_dir_contents=data_dir_contents) - createICTable("ecms"; data_dir_contents=data_dir_contents) - createICTable("dcs"; data_dir_contents=data_dir_contents) - - # initialize and populate configs table - configs_schema = """ - config_id INTEGER PRIMARY KEY, - folder_name UNIQUE, - description TEXT - """ - createPCVCTTable("configs", configs_schema) - - config_folders = readdir(joinpath(data_dir, "inputs", "configs"); sort=false) |> filter(x -> isdir(joinpath(data_dir, "inputs", "configs", x))) - if isempty(config_folders) - println("No folders in $(joinpath(data_dir, "inputs", "configs")) found. Add PhysiCell_settings.xml and rules files here.") - return false - end - for config_folder in config_folders - description = metadataDescription(joinpath(data_dir, "inputs", "configs", config_folder)) - DBInterface.execute(db, "INSERT OR IGNORE INTO configs (folder_name, description) VALUES ('$(config_folder)', '$(description)');") - db_config_variations = joinpath(data_dir, "inputs", "configs", config_folder, "config_variations.db") |> SQLite.DB - createPCVCTTable("config_variations", "config_variation_id INTEGER PRIMARY KEY"; db=db_config_variations) - DBInterface.execute(db_config_variations, "INSERT OR IGNORE INTO config_variations (config_variation_id) VALUES(0);") - end - - # initialize and populate rulesets_collections table - rulesets_collections_schema = """ - rulesets_collection_id INTEGER PRIMARY KEY, - folder_name UNIQUE, - description TEXT - """ - createPCVCTTable("rulesets_collections", rulesets_collections_schema) - - if "rulesets_collections" in data_dir_contents - rulesets_collections_folders = readdir(joinpath(data_dir, "inputs", "rulesets_collections"); sort=false) |> filter(x -> isdir(joinpath(data_dir, "inputs", "rulesets_collections", x))) - for rulesets_collection_folder in rulesets_collections_folders - description = metadataDescription(joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder)) - DBInterface.execute(db, "INSERT OR IGNORE INTO rulesets_collections (folder_name, description) VALUES ('$(rulesets_collection_folder)', '$(description)');") - db_rulesets_variations = joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder, "rulesets_collection_variations.db") |> SQLite.DB - createPCVCTTable("rulesets_collection_variations", "rulesets_collection_variation_id INTEGER PRIMARY KEY"; db=db_rulesets_variations) - DBInterface.execute(db_rulesets_variations, "INSERT OR IGNORE INTO rulesets_collection_variations (rulesets_collection_variation_id) VALUES(0);") + #! initialize tables for all inputs + for (location, location_dict) in pairs(inputs_dict) + table_name = tableName(location) + table_schema = """ + $(locationIDName(location)) INTEGER PRIMARY KEY, + folder_name UNIQUE, + description TEXT + """ + createPCVCTTable(table_name, table_schema) + + folders = readdir(locationPath(location); sort=false) |> filter(x -> isdir(joinpath(locationPath(location), x))) + if location_dict["required"] && isempty(folders) + println("No folders in $(locationPath(location)) found. This is where to put the folders for $(tableName(location)).") + return false end - end - - # initialize and populate ic_cells variations dbs - path_to_ics = joinpath(data_dir, "inputs", "ics") - path_to_ic_cells = joinpath(path_to_ics, "cells") - if "ics" in data_dir_contents && "cells" in readdir(path_to_ics, sort=false) - ic_cells_folders = readdir(path_to_ic_cells, sort=false) |> filter(x -> isdir(joinpath(path_to_ic_cells, x))) - for ic_cell_folder in ic_cells_folders - DBInterface.execute(db, "INSERT OR IGNORE INTO ic_cells (folder_name) VALUES ('$(ic_cell_folder)');") - path_to_folder = joinpath(path_to_ic_cells, ic_cell_folder) - is_csv = isfile(joinpath(path_to_folder, "cells.csv")) - # ⊻ = XOR (make sure exactly one of the files is present) - @assert is_csv ⊻ isfile(joinpath(path_to_folder, "cells.xml")) "Must have one of cells.csv or cells.xml in $(joinpath(path_to_folder))" - if is_csv - continue # no variations allowed on csv files - end - db_ic_cell = joinpath(path_to_folder, "ic_cell_variations.db") |> SQLite.DB - createPCVCTTable("ic_cell_variations", "ic_cell_variation_id INTEGER PRIMARY KEY"; db=db_ic_cell) - DBInterface.execute(db_ic_cell, "INSERT OR IGNORE INTO ic_cell_variations (ic_cell_variation_id) VALUES(0);") + for folder in folders + insertFolder(location, folder) end end - # initialize and populate ic_ecms variations dbs - path_to_ic_ecms = joinpath(path_to_ics, "ecms") - if "ics" in data_dir_contents && "ecms" in readdir(path_to_ics, sort=false) - ic_ecms_folders = readdir(path_to_ic_ecms, sort=false) |> filter(x -> isdir(joinpath(path_to_ic_ecms, x))) - for ic_ecm_folder in ic_ecms_folders - DBInterface.execute(db, "INSERT OR IGNORE INTO ic_ecms (folder_name) VALUES ('$(ic_ecm_folder)');") - path_to_folder = joinpath(path_to_ic_ecms, ic_ecm_folder) - is_csv = isfile(joinpath(path_to_folder, "ecm.csv")) - # ⊻ = XOR (make sure exactly one of the files is present) - @assert is_csv ⊻ isfile(joinpath(path_to_folder, "ecm.xml")) "Must have one of ecm.csv or ecm.xml in $(joinpath(path_to_folder))" - if is_csv - continue # no variations allowed on csv files - end - db_ic_ecm = joinpath(path_to_folder, "ic_ecm_variations.db") |> SQLite.DB - createPCVCTTable("ic_ecm_variations", "ic_ecm_variation_id INTEGER PRIMARY KEY"; db=db_ic_ecm) - DBInterface.execute(db_ic_ecm, "INSERT OR IGNORE INTO ic_ecm_variations (ic_ecm_variation_id) VALUES(0);") - end - end - - # initialize simulations table simulations_schema = """ simulation_id INTEGER PRIMARY KEY, physicell_version_id INTEGER, - custom_code_id INTEGER, - ic_cell_id INTEGER, - ic_substrate_id INTEGER, - ic_ecm_id INTEGER, - ic_dc_id INTEGER, - config_id INTEGER, - rulesets_collection_id INTEGER, - config_variation_id INTEGER, - rulesets_collection_variation_id INTEGER, - ic_cell_variation_id INTEGER, - ic_ecm_variation_id INTEGER, + $(inputIDsSubSchema()), + $(inputVariationIDsSubSchema()), status_code_id INTEGER, - FOREIGN KEY (physicell_version_id) - REFERENCES physicell_versions (physicell_version_id), - FOREIGN KEY (custom_code_id) - REFERENCES custom_codes (custom_code_id), - FOREIGN KEY (ic_cell_id) - REFERENCES ic_cells (ic_cell_id), - FOREIGN KEY (ic_substrate_id) - REFERENCES ic_substrates (ic_substrate_id), - FOREIGN KEY (ic_ecm_id) - REFERENCES ic_ecms (ic_ecm_id), - FOREIGN KEY (ic_dc_id) - REFERENCES ic_dcs (ic_dc_id), - FOREIGN KEY (config_id) - REFERENCES configs (config_id), - FOREIGN KEY (rulesets_collection_id) - REFERENCES rulesets_collections (rulesets_collection_id), + $(abstractSamplingForeignReferenceSubSchema()), FOREIGN KEY (status_code_id) REFERENCES status_codes (status_code_id) """ createPCVCTTable("simulations", simulations_schema) - # initialize monads table + #! initialize monads table createPCVCTTable("monads", monadsSchema()) - # initialize samplings table + #! initialize samplings table createPCVCTTable("samplings", samplingsSchema()) - # initialize trials table + #! initialize trials table trials_schema = """ trial_id INTEGER PRIMARY KEY, datetime TEXT, @@ -216,15 +108,17 @@ function createSchema(is_new_db::Bool; auto_upgrade::Bool=false) return true end -function necessaryInputsPresent(data_dir_contents::Vector{String}) +function necessaryInputsPresent() success = true - if "custom_codes" ∉ data_dir_contents - println("No $(joinpath(data_dir, "inputs", "custom_codes")) found. This is where to put the folders for custom_modules, main.cpp, and Makefile.") - success = false - end - if "configs" ∉ data_dir_contents - println("No $(joinpath(data_dir, "inputs", "configs")) found. This is where to put the folders for config files and rules files.") - success = false + for (location, location_dict) in pairs(inputs_dict) + if !location_dict["required"] + continue + end + + if !(locationPath(location) |> isdir) + println("No $(locationPath(location)) found. This is where to put the folders for $(tableName(location)).") + success = false + end end return success end @@ -243,88 +137,42 @@ function monadsSchema() return """ monad_id INTEGER PRIMARY KEY, physicell_version_id INTEGER, - custom_code_id INTEGER, - ic_cell_id INTEGER, - ic_substrate_id INTEGER, - ic_ecm_id INTEGER, - ic_dc_id INTEGER, - config_id INTEGER, - rulesets_collection_id INTEGER, - config_variation_id INTEGER, - rulesets_collection_variation_id INTEGER, - ic_cell_variation_id INTEGER, - ic_ecm_variation_id INTEGER, - FOREIGN KEY (physicell_version_id) - REFERENCES physicell_versions (physicell_version_id), - FOREIGN KEY (custom_code_id) - REFERENCES custom_codes (custom_code_id), - FOREIGN KEY (ic_cell_id) - REFERENCES ic_cells (ic_cell_id), - FOREIGN KEY (ic_substrate_id) - REFERENCES ic_substrates (ic_substrate_id), - FOREIGN KEY (ic_ecm_id) - REFERENCES ic_ecms (ic_ecm_id), - FOREIGN KEY (ic_dc_id) - REFERENCES ic_dcs (ic_dc_id), - FOREIGN KEY (config_id) - REFERENCES configs (config_id), - FOREIGN KEY (rulesets_collection_id) - REFERENCES rulesets_collections (rulesets_collection_id), - UNIQUE (physicell_version_id,custom_code_id,ic_cell_id,ic_substrate_id,ic_ecm_id,ic_dc_id,config_id,rulesets_collection_id,config_variation_id,rulesets_collection_variation_id,ic_cell_variation_id,ic_ecm_variation_id) + $(inputIDsSubSchema()), + $(inputVariationIDsSubSchema()), + $(abstractSamplingForeignReferenceSubSchema()), + UNIQUE (physicell_version_id, + $(join([locationIDName(k) for k in keys(inputs_dict)], ",\n")), + $(join([locationVarIDName(k) for (k, d) in pairs(inputs_dict) if any(d["varied"])], ",\n")) + ) """ end -function samplingsSchema() +function inputIDsSubSchema() + return join(["$(locationIDName(k)) INTEGER" for k in keys(inputs_dict)], ",\n") +end + +function inputVariationIDsSubSchema() + return join(["$(locationVarIDName(k)) INTEGER" for (k, d) in pairs(inputs_dict) if any(d["varied"])], ",\n") +end + +function abstractSamplingForeignReferenceSubSchema() return """ - sampling_id INTEGER PRIMARY KEY, - physicell_version_id INTEGER, - custom_code_id INTEGER, - ic_cell_id INTEGER, - ic_substrate_id INTEGER, - ic_ecm_id INTEGER, - ic_dc_id INTEGER, - config_id INTEGER, - rulesets_collection_id INTEGER, FOREIGN KEY (physicell_version_id) REFERENCES physicell_versions (physicell_version_id), - FOREIGN KEY (custom_code_id) - REFERENCES custom_codes (custom_code_id), - FOREIGN KEY (ic_cell_id) - REFERENCES ic_cells (ic_cell_id), - FOREIGN KEY (ic_substrate_id) - REFERENCES ic_substrates (ic_substrate_id), - FOREIGN KEY (ic_ecm_id) - REFERENCES ic_ecms (ic_ecm_id), - FOREIGN KEY (ic_dc_id) - REFERENCES ic_dcs (ic_dc_id), - FOREIGN KEY (config_id) - REFERENCES configs (config_id), - FOREIGN KEY (rulesets_collection_id) - REFERENCES rulesets_collections (rulesets_collection_id) + $(join([""" + FOREIGN KEY ($(locationIDName(k))) + REFERENCES $(tableName(k)) ($(locationIDName(k)))\ + """ for k in keys(inputs_dict)], ",\n")) """ end -function createICTable(ic_name::String; data_dir_contents=String[]) - table_name = "ic_$(ic_name)" - schema = """ - $(table_name[1:end-1])_id INTEGER PRIMARY KEY, - folder_name UNIQUE, - description TEXT +function samplingsSchema() + return """ + sampling_id INTEGER PRIMARY KEY, + physicell_version_id INTEGER, + $(inputIDsSubSchema()), + $(abstractSamplingForeignReferenceSubSchema()) """ - createPCVCTTable(table_name, schema) - if "ics" in data_dir_contents && ic_name in readdir(joinpath(data_dir, "inputs", "ics"), sort=false) - ic_folders = readdir(joinpath(data_dir, "inputs", "ics", ic_name), sort=false) |> filter(x -> isdir(joinpath(data_dir, "inputs", "ics", ic_name, x))) - if !isempty(ic_folders) - for ic_folder in ic_folders - if !isfile(joinpath(data_dir, "inputs", "ics", ic_name, ic_folder, icFilename(ic_name))) - continue - end - description = metadataDescription(joinpath(data_dir, "inputs", "ics", ic_name, ic_folder)) - DBInterface.execute(db, "INSERT OR IGNORE INTO $(table_name) (folder_name, description) VALUES ('$(ic_folder)', '$(description)');") - end - end - end - return end function metadataDescription(path_to_folder::AbstractString) @@ -342,35 +190,22 @@ function metadataDescription(path_to_folder::AbstractString) return description end -function icFilename(table_name::String) - if table_name == "cells" - return "cells.csv" - elseif table_name == "substrates" - return "substrates.csv" - elseif table_name == "ecms" - return "ecm.csv" - elseif table_name == "dcs" - return "dcs.csv" - else - throw(ArgumentError("table_name must be 'cells', 'substrates', 'ecms', or `dcs`.")) - end -end - function createPCVCTTable(table_name::String, schema::String; db::SQLite.DB=db) - # check that table_name ends in "s" + #! check that table_name ends in "s" if last(table_name) != 's' s = "Table name must end in 's'." s *= "\n\tThis helps to normalize what the id names are for these entries." s *= "\n\tYour table $(table_name) does not end in 's'." - s *= "\n\tSee retrieveID(table_name::String, folder_name::String; db::SQLite.DB=db)." + s *= "\n\tSee retrieveID(location::Symbol, folder_name::String; db::SQLite.DB=db)." error(s) end - # check that schema has PRIMARY KEY named as table_name without the s followed by _id - if !occursin("$(table_name[1:end-1])_id INTEGER PRIMARY KEY", schema) - s = "Schema must have PRIMARY KEY named as $(table_name[1:end-1])_id." + #! check that schema has PRIMARY KEY named as table_name without the s followed by _id + id_name = locationIDName(Symbol(table_name[1:end-1])) + if !occursin("$(id_name) INTEGER PRIMARY KEY", schema) + s = "Schema must have PRIMARY KEY named as $(id_name)." s *= "\n\tThis helps to normalize what the id names are for these entries." - s *= "\n\tYour schema $(schema) does not have \"$(table_name[1:end-1])_id INTEGER PRIMARY KEY\"." - s *= "\n\tSee retrieveID(table_name::String, folder_name::String; db::SQLite.DB=db)." + s *= "\n\tYour schema $(schema) does not have \"$(id_name) INTEGER PRIMARY KEY\"." + s *= "\n\tSee retrieveID(location::Symbol, folder_name::String; db::SQLite.DB=db)." error(s) end SQLite.execute(db, "CREATE TABLE IF NOT EXISTS $(table_name) ( @@ -380,6 +215,22 @@ function createPCVCTTable(table_name::String, schema::String; db::SQLite.DB=db) return end +function insertFolder(location::Symbol, folder::String, description::String="") + path_to_folder = locationPath(location, folder) + old_description = metadataDescription(path_to_folder) + description = isempty(old_description) ? description : old_description + query = "INSERT OR IGNORE INTO $(tableName(location)) (folder_name, description) VALUES ('$folder', '$description') RETURNING $(locationIDName(location));" + df = queryToDataFrame(query) + if !folderIsVaried(location, folder) + return + end + db_variations = joinpath(locationPath(location, folder), "$(location)_variations.db") |> SQLite.DB + createPCVCTTable(variationsTableName(location), "$(locationVarIDName(location)) INTEGER PRIMARY KEY"; db=db_variations) + DBInterface.execute(db_variations, "INSERT OR IGNORE INTO $(location)_variations ($(locationVarIDName(location))) VALUES(0);") + input_folder = InputFolder(location, folder) + prepareBaseFile(input_folder) +end + function createDefaultStatusCodesTable() status_codes_schema = """ status_code_id INTEGER PRIMARY KEY, @@ -407,7 +258,7 @@ The check and status update are done in a transaction to ensure that the status """ function isStarted(simulation_id::Int; new_status_code::Union{Missing,String}=missing) query = constructSelectQuery("simulations", "WHERE simulation_id=$(simulation_id)"; selection="status_code_id") - mode = ismissing(new_status_code) ? "DEFERRED" : "EXCLUSIVE" # if we are possibly going to update, then set to exclusive mode + mode = ismissing(new_status_code) ? "DEFERRED" : "EXCLUSIVE" #! if we are possibly going to update, then set to exclusive mode SQLite.transaction(db, mode) status_code = queryToDataFrame(query; is_row=true) |> x -> x[1,:status_code_id] is_started = status_code != getStatusCodeID("Not Started") @@ -424,45 +275,26 @@ isStarted(simulation::Simulation; new_status_code::Union{Missing,String}=missing ################## DB Interface Functions ################## -configDB(config_folder::String) = joinpath(data_dir, "inputs", "configs", config_folder, "config_variations.db") |> SQLite.DB -configDB(config_id::Int) = configFolder(config_id) |> configDB -configDB(S::AbstractSampling) = configDB(S.inputs.config.folder) - -function rulesetsCollectionDB(rulesets_collection_folder::String) - if rulesets_collection_folder == "" - return nothing - end - path_to_folder = joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder) - return joinpath(path_to_folder, "rulesets_collection_variations.db") |> SQLite.DB -end -rulesetsCollectionDB(S::AbstractSampling) = rulesetsCollectionDB(S.inputs.rulesets_collection.folder) -rulesetsCollectionDB(rulesets_collection_id::Int) = rulesetsCollectionFolder(rulesets_collection_id) |> rulesetsCollectionDB - -function icCellDB(ic_cell_folder::String) - if ic_cell_folder == "" +function variationsDatabase(location::Symbol, folder::String) + if folder == "" return nothing end - path_to_folder = joinpath(data_dir, "inputs", "ics", "cells", ic_cell_folder) - if isfile(joinpath(path_to_folder, "cells.csv")) + path_to_db = joinpath(locationPath(location, folder), "$(location)_variations.db") + if !isfile(path_to_db) return missing end - return joinpath(path_to_folder, "ic_cell_variations.db") |> SQLite.DB + return path_to_db |> SQLite.DB end -icCellDB(ic_cell_id::Int) = icCellFolder(ic_cell_id) |> icCellDB -icCellDB(S::AbstractSampling) = icCellDB(S.inputs.ic_cell.folder) -function icECMDB(ic_ecm_folder::String) - if ic_ecm_folder == "" - return nothing - end - path_to_folder = joinpath(data_dir, "inputs", "ics", "ecms", ic_ecm_folder) - if isfile(joinpath(path_to_folder, "ecm.csv")) - return missing - end - return joinpath(path_to_folder, "ic_ecm_variations.db") |> SQLite.DB +function variationsDatabase(location::Symbol, id::Int) + folder = inputFolderName(location, id) + return variationsDatabase(location, folder) +end + +function variationsDatabase(location::Symbol, S::AbstractSampling) + folder = S.inputs[location].folder + return variationsDatabase(location, folder) end -icECMDB(ic_ecm_id::Int) = icECMFolder(ic_ecm_id) |> icECMDB -icECMDB(S::AbstractSampling) = icECMDB(S.inputs.ic_ecm.folder) ########### Retrieving Database Information Functions ########### @@ -478,44 +310,29 @@ end constructSelectQuery(table_name::String, condition_stmt::String=""; selection::String="*") = "SELECT $(selection) FROM $(table_name) $(condition_stmt);" -function getFolder(table_name::String, id_name::String, id::Int; db::SQLite.DB=db) - query = constructSelectQuery(table_name, "WHERE $(id_name)=$(id);"; selection="folder_name") +function inputFolderName(location::Symbol, id::Int) + if id == -1 + return "" + end + + query = constructSelectQuery(tableName(location), "WHERE $(locationIDName(location))=$(id)"; selection="folder_name") return queryToDataFrame(query; is_row=true) |> x -> x.folder_name[1] end -getOptionalFolder(table_name::String, id_name::String, id::Int; db::SQLite.DB=db) = id == -1 ? "" : getFolder(table_name, id_name, id; db=db) - -configFolder(config_id::Int) = getFolder("configs", "config_id", config_id) -icCellFolder(ic_cell_id::Int) = getOptionalFolder("ic_cells", "ic_cell_id", ic_cell_id) -icSubstrateFolder(ic_substrate_id::Int) = getOptionalFolder("ic_substrates", "ic_substrate_id", ic_substrate_id) -icECMFolder(ic_ecm_id::Int) = getOptionalFolder("ic_ecms", "ic_ecm_id", ic_ecm_id) -icDCFolder(ic_dc_id::Int) = getOptionalFolder("ic_dcs", "ic_dc_id", ic_dc_id) -rulesetsCollectionFolder(rulesets_collection_id::Int) = getOptionalFolder("rulesets_collections", "rulesets_collection_id", rulesets_collection_id) -customCodesFolder(custom_code_id::Int) = getFolder("custom_codes", "custom_code_id", custom_code_id) - -function retrieveID(table_name::String, folder_name::String; db::SQLite.DB=db) +function retrieveID(location::Symbol, folder_name::String; db::SQLite.DB=db) if folder_name == "" return -1 end - primary_key_string = "$(rstrip(table_name,'s'))_id" - query = constructSelectQuery(table_name, "WHERE folder_name='$(folder_name)'"; selection=primary_key_string) + primary_key_string = locationIDName(location) + query = constructSelectQuery(tableName(location), "WHERE folder_name='$(folder_name)'"; selection=primary_key_string) df = queryToDataFrame(query; is_row=true) return df[1, primary_key_string] end ########### Summarizing Database Functions ########### -configVariationIDs(M::AbstractMonad) = [M.variation_ids.config] -configVariationIDs(sampling::Sampling) = [vid.config for vid in sampling.variation_ids] - -rulesetsVariationIDs(M::AbstractMonad) = [M.variation_ids.rulesets_collection] -rulesetsVariationIDs(sampling::Sampling) = [vid.rulesets_collection for vid in sampling.variation_ids] - -icCellVariationIDs(M::AbstractMonad) = [M.variation_ids.ic_cell] -icCellVariationIDs(sampling::Sampling) = [vid.ic_cell for vid in sampling.variation_ids] - -icECMVariationIDs(M::AbstractMonad) = [M.variation_ids.ic_ecm] -icECMVariationIDs(sampling::Sampling) = [vid.ic_ecm for vid in sampling.variation_ids] +variationIDs(location::Symbol, M::AbstractMonad) = [M.variation_id[location]] +variationIDs(location::Symbol, sampling::Sampling) = [variation_id[location] for variation_id in sampling.variation_ids] function variationsTable(query::String, db::SQLite.DB; remove_constants::Bool=false) df = queryToDataFrame(query, db=db) @@ -527,143 +344,64 @@ function variationsTable(query::String, db::SQLite.DB; remove_constants::Bool=fa return df end -function configVariationsTable(config_variations_db::SQLite.DB, config_variation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - query = constructSelectQuery("config_variations", "WHERE config_variation_id IN ($(join(config_variation_ids,",")));") - df = variationsTable(query, config_variations_db; remove_constants=remove_constants) - rename!(simpleConfigVariationNames, df) - return df -end - -configVariationsTable(S::AbstractSampling; remove_constants::Bool=false) = configVariationsTable(configDB(S), configVariationIDs(S); remove_constants=remove_constants) - -function rulesetsVariationsTable(rulesets_variations_db::SQLite.DB, rulesets_collection_variation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - rulesets_collection_variation_ids = filter(x -> x != -1, rulesets_collection_variation_ids) # rulesets_collection_variation_id = -1 means no ruleset being used - query = constructSelectQuery("rulesets_collection_variations", "WHERE rulesets_collection_variation_id IN ($(join(rulesets_collection_variation_ids,",")));") - df = variationsTable(query, rulesets_variations_db; remove_constants=remove_constants) - rename!(simpleRulesetsVariationNames, df) - return df -end - -function rulesetsVariationsTable(::Nothing, rulesets_collection_variation_ids::AbstractVector{<:Integer}; kwargs...) - @assert all(x -> x == -1, rulesets_collection_variation_ids) "If the rulesets_collection_variation_id is missing, then all rulesets_collection_variation_ids must be -1." - return DataFrame(RulesVarID=rulesets_collection_variation_ids) -end - -rulesetsVariationsTable(S::AbstractSampling; remove_constants::Bool=false) = rulesetsVariationsTable(rulesetsCollectionDB(S), rulesetsVariationIDs(S); remove_constants=remove_constants) - -function icCellVariationsTable(ic_cell_variations_db::SQLite.DB, ic_cell_variation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - query = constructSelectQuery("ic_cell_variations", "WHERE ic_cell_variation_id IN ($(join(ic_cell_variation_ids,",")));") - df = variationsTable(query, ic_cell_variations_db; remove_constants=remove_constants) - rename!(simpleICCellVariationNames, df) - return df -end - -function icCellVariationsTable(::Nothing, ic_cell_variation_ids::AbstractVector{<:Integer}; kwargs...) - @assert all(x -> x == -1, ic_cell_variation_ids) "If no ic_cell_folder is given, then all ic_cell_variation_ids must be -1." - return DataFrame(ICCellVarID=ic_cell_variation_ids) -end - -function icCellVariationsTable(::Missing, ic_cell_variation_ids::AbstractVector{<:Integer}; kwargs...) - @assert all(x -> x == 0, ic_cell_variation_ids) "If the ic_cell_folder contains a cells.csv, then all ic_cell_variation_ids must be 0." - return DataFrame(ICCellVarID=ic_cell_variation_ids) -end - -icCellVariationsTable(S::AbstractSampling; remove_constants::Bool=false) = icCellVariationsTable(icCellDB(S), icCellVariationIDs(S); remove_constants=remove_constants) - -function icECMVariationsTable(ic_ecm_variations_db::SQLite.DB, ic_ecm_variation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - query = constructSelectQuery("ic_ecm_variations", "WHERE ic_ecm_variation_id IN ($(join(ic_ecm_variation_ids,",")));") - df = variationsTable(query, ic_ecm_variations_db; remove_constants=remove_constants) - rename!(simpleICECMVariationNames, df) +function variationsTable(location::Symbol, variations_database::SQLite.DB, variation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) + used_variation_ids = filter(x -> x != -1, variation_ids) #! variation_id = -1 means this input is not even being used + query = constructSelectQuery(variationsTableName(location), "WHERE $(locationVarIDName(location)) IN ($(join(used_variation_ids,",")))") + df = variationsTable(query, variations_database; remove_constants=remove_constants) + rename!(name -> shortVariationName(location, name), df) return df end -function icECMVariationsTable(::Nothing, ic_ecm_variation_ids::AbstractVector{<:Integer}; kwargs...) - @assert all(x -> x == -1, ic_ecm_variation_ids) "If no ic_ecm_folder is given, then all ic_ecm_variation_ids must be -1." - return DataFrame(ICECMVarID=ic_ecm_variation_ids) -end - -function icECMVariationsTable(::Missing, ic_ecm_variation_ids::AbstractVector{<:Integer}; kwargs...) - @assert all(x -> x == 0, ic_ecm_variation_ids) "If the ic_ecm_folder contains a ecm.csv, then all ic_ecm_variation_ids must be 0." - return DataFrame(ICECMVarID=ic_ecm_variation_ids) -end - -icECMVariationsTable(S::AbstractSampling; remove_constants::Bool=false) = icECMVariationsTable(icECMDB(S), icECMVariationIDs(S); remove_constants=remove_constants) - -function variationsTableFromSimulations(query::String, id_name::Symbol, getVariationsTableFn::Function; remove_constants::Bool=false) - df = queryToDataFrame(query) - unique_tuples = [(row[1], row[2]) for row in eachrow(df)] |> unique - var_df = DataFrame(id_name=>Int[]) - for unique_tuple in unique_tuples - append!(var_df, getVariationsTableFn(unique_tuple), cols=:union) - end - if remove_constants && size(var_df, 1) > 1 - col_names = names(var_df) - filter!(n -> length(unique(var_df[!,n])) > 1, col_names) - select!(var_df, col_names) - end - return var_df -end - -function configVariationsTable(simulation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - query = constructSelectQuery("simulations", "WHERE simulation_id IN ($(join(simulation_ids,",")));", selection="config_id, config_variation_id") - getVariationsTableFn = x -> configVariationsTable(configDB(x[1]), [x[2]]; remove_constants=remove_constants) - return variationsTableFromSimulations(query, :ConfigVarID, getVariationsTableFn) -end - - -function rulesetsVariationsTable(simulation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - query = constructSelectQuery("simulations", "WHERE simulation_id IN ($(join(simulation_ids,",")));", selection="rulesets_collection_id, rulesets_collection_variation_id") - getVariationsTableFn = x -> rulesetsVariationsTable(rulesetsCollectionDB(x[1]), [x[2]]; remove_constants=remove_constants) - return variationsTableFromSimulations(query, :RulesVarID, getVariationsTableFn) +function variationsTable(location::Symbol, S::AbstractSampling; remove_constants::Bool=false) + return variationsTable(location, variationsDatabase(location, S), variationIDs(location, S); remove_constants=remove_constants) end -function icCellVariationsTable(simulation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - query = constructSelectQuery("simulations", "WHERE simulation_id IN ($(join(simulation_ids,",")));", selection="ic_cell_id, ic_cell_variation_id") - getVariationsTableFn = x -> icCellVariationsTable(icCellDB(x[1]), [x[2]]; remove_constants=remove_constants) - return variationsTableFromSimulations(query, :ICCellVarID, getVariationsTableFn) +function variationsTable(location::Symbol, ::Nothing, variation_ids::AbstractVector{<:Integer}; kwargs...) + @assert all(x -> x == -1, variation_ids) "If the $(location)_variation database is missing, then all $(locationVarIDName(location))s must be -1." + return DataFrame(shortLocationVariationID(location)=>variation_ids) end -function icECMVariationsTable(simulation_ids::AbstractVector{<:Integer}; remove_constants::Bool=false) - query = constructSelectQuery("simulations", "WHERE simulation_id IN ($(join(simulation_ids,",")));", selection="ic_ecm_id, ic_ecm_variation_id") - getVariationsTableFn = x -> icECMVariationsTable(icECMDB(x[1]), [x[2]]; remove_constants=remove_constants) - return variationsTableFromSimulations(query, :ICECMVarID, getVariationsTableFn) +function variationsTable(location::Symbol, ::Missing, variation_ids::AbstractVector{<:Integer}; kwargs...) + @assert all(x -> x == 0, variation_ids) "If the $(location)_folder does not contain a $(location)_variations.db, then all $(locationVarIDName(location))s must be 0." + return DataFrame(shortLocationVariationID(location)=>variation_ids) end function addFolderColumns!(df::DataFrame) - col_names = ["custom_code", "config", "rulesets_collection", "ic_cell", "ic_substrate", "ic_ecm", "ic_dc"] - get_function = [getFolder, getFolder, getOptionalFolder, getOptionalFolder, getOptionalFolder, getOptionalFolder] - for (col_name, get_function) in zip(col_names, get_function) - if !("$(col_name)_id" in names(df)) + for (location, location_dict) in pairs(inputs_dict) + if !(locationIDName(location) in names(df)) continue end - unique_ids = unique(df[!,"$(col_name)_id"]) - D = Dict{Int, String}() - for id in unique_ids - D[id] = get_function("$(col_name)s", "$(col_name)_id", id) + unique_ids = unique(df[!,locationIDName(location)]) + folder_names_dict = [id => inputFolderName(location, id) for id in unique_ids] |> Dict{Int,String} + if location_dict["required"] + @assert !any(folder_names_dict |> values .|> isempty) "Some $(location) folders are empty/missing, but they are required." end - df[!,"$(col_name)_folder"] .= [D[id] for id in df[!,"$(col_name)_id"]] + df[!,"$(location)_folder"] .= [folder_names_dict[id] for id in df[!,locationIDName(location)]] end return df end -function simulationsTableFromQuery(query::String; remove_constants::Bool=true, sort_by::Vector{String}=String[], sort_ignore::Vector{String}=["SimID", "ConfigVarID", "RulesVarID", "ICCellVarID"]) +function simulationsTableFromQuery(query::String; remove_constants::Bool=true, + sort_by=String[], + sort_ignore=[:SimID; shortLocationVariationID.(project_locations.varied)]) + #! preprocess sort kwargs + sort_by = (sort_by isa Vector ? sort_by : [sort_by]) .|> Symbol + sort_ignore = (sort_ignore isa Vector ? sort_ignore : [sort_ignore]) .|> Symbol + df = queryToDataFrame(query) - id_col_names_to_remove = names(df) # a bunch of ids that we don't want to show - filter!(n -> !(n in ["simulation_id", "config_variation_id", "ic_cell_id", "rulesets_collection_variation_id", "ic_cell_variation_id", "ic_ecm_variation_id"]), id_col_names_to_remove) # keep the simulation_id and config_variation_id columns - addFolderColumns!(df) # add the folder columns - select!(df, Not(id_col_names_to_remove)) # remove the id columns - - # handle each of the varying inputs - unique_tuples_config = [(row.config_folder, row.config_variation_id) for row in eachrow(df)] |> unique - df = appendVariations(df, unique_tuples_config, configDB, configVariationsTable, :config_folder => :folder_name, :config_variation_id => :ConfigVarID) - unique_tuples_rulesets_collection = [(row.rulesets_collection_folder, row.rulesets_collection_variation_id) for row in eachrow(df)] |> unique - df = appendVariations(df, unique_tuples_rulesets_collection, rulesetsCollectionDB, rulesetsVariationsTable, :rulesets_collection_folder => :folder_name, :rulesets_collection_variation_id => :RulesVarID) - unique_tuples_ic_cell = [(row.ic_cell_folder, row.ic_cell_variation_id) for row in eachrow(df)] |> unique - df = appendVariations(df, unique_tuples_ic_cell, icCellDB, icCellVariationsTable, :ic_cell_folder => :folder_name, :ic_cell_variation_id => :ICCellVarID) - unique_tuples_ic_ecm = [(row.ic_ecm_folder, row.ic_ecm_variation_id) for row in eachrow(df)] |> unique - df = appendVariations(df, unique_tuples_ic_ecm, icECMDB, icECMVariationsTable, :ic_ecm_folder => :folder_name, :ic_ecm_variation_id => :ICECMVarID) - - rename!(df, [:simulation_id => :SimID, :config_variation_id => :ConfigVarID, :rulesets_collection_variation_id => :RulesVarID, :ic_cell_variation_id => :ICCellVarID, :ic_ecm_variation_id => :ICECMVarID]) + id_col_names_to_remove = names(df) #! a bunch of ids that we don't want to show + locations = project_locations.varied + + filter!(n -> !(n in ["simulation_id"; [locationVarIDName(loc) for loc in locations]]), id_col_names_to_remove) #! but do not throw away the variation ids or the sim id, we want to show these + addFolderColumns!(df) #! add the folder columns + select!(df, Not(id_col_names_to_remove)) #! remove the id columns + + #! handle each of the varying inputs + for loc in locations + df = appendVariations(loc, df) + end + + rename!(df, [:simulation_id => :SimID; [(locationVarIDName(loc) |> Symbol) => shortLocationVariationID(loc) for loc in locations]]) col_names = names(df) if remove_constants && size(df, 1) > 1 filter!(n -> length(unique(df[!, n])) > 1, col_names) @@ -672,18 +410,22 @@ function simulationsTableFromQuery(query::String; remove_constants::Bool=true, s if isempty(sort_by) sort_by = deepcopy(col_names) end - sort_by = [n for n in sort_by if !(n in sort_ignore) && (n in col_names)] # sort by columns in sort_by (overridden by sort_ignore) and in the dataframe + sort_by = [n for n in sort_by if !(n in sort_ignore) && (n in col_names)] #! sort by columns in sort_by (overridden by sort_ignore) and in the dataframe sort!(df, sort_by) return df end -function appendVariations(df::DataFrame, unique_tuples::Vector{Tuple{String, Int}}, getDBFn::Function, getVariationsTableFn::Function, folder_pair::Pair{Symbol, Symbol}, id_pair::Pair{Symbol, Symbol}) - var_df = DataFrame(id_pair[2] => Int[]) +function appendVariations(location::Symbol, df::DataFrame) + short_var_name = shortLocationVariationID(location) + var_df = DataFrame(short_var_name => Int[], :folder_name => String[]) + unique_tuples = [(row["$(location)_folder"], row[locationVarIDName(location)]) for row in eachrow(df)] |> unique for unique_tuple in unique_tuples - temp_df = getVariationsTableFn(getDBFn(unique_tuple[1]), [unique_tuple[2]]; remove_constants=false) + temp_df = variationsTable(location, variationsDatabase(location, unique_tuple[1]), [unique_tuple[2]]; remove_constants=false) temp_df[!,:folder_name] .= unique_tuple[1] append!(var_df, temp_df, cols=:union) end + folder_pair = ("$(location)_folder" |> Symbol) => :folder_name + id_pair = (locationVarIDName(location) |> Symbol) => short_var_name return outerjoin(df, var_df, on = [folder_pair, id_pair]) end @@ -704,11 +446,6 @@ end ########### Printing Database Functions ########### -printConfigVariationsTable(args...; kwargs...) = configVariationsTable(args...; kwargs...) |> println -printRulesetsVariationsTable(args...; kwargs...) = rulesetsVariationsTable(args...; kwargs...) |> println -printICCellVariationsTable(args...; kwargs...) = icCellVariationsTable(args...; kwargs...) |> println -printICECMVariationsTable(args...; kwargs...) = icECMVariationsTable(args...; kwargs...) |> println - """ printSimulationsTable() diff --git a/src/VCTDeletion.jl b/src/VCTDeletion.jl index e5d24e17..9e2a1371 100644 --- a/src/VCTDeletion.jl +++ b/src/VCTDeletion.jl @@ -24,49 +24,27 @@ function deleteSimulations(simulation_ids::AbstractVector{<:Union{Integer,Missin filter!(x -> !ismissing(x), simulation_ids) where_stmt = "WHERE simulation_id IN ($(join(simulation_ids,","))) $(and_constraints)" sim_df = constructSelectQuery("simulations", where_stmt) |> queryToDataFrame - simulation_ids = sim_df.simulation_id # update based on the constraints added + simulation_ids = sim_df.simulation_id #! update based on the constraints added DBInterface.execute(db,"DELETE FROM simulations WHERE simulation_id IN ($(join(simulation_ids,",")));") + for row in eachrow(sim_df) rm_hpc_safe(outputFolder("simulation", row.simulation_id); force=true, recursive=true) - config_folder = configFolder(row.config_id) - result_df = constructSelectQuery( - "simulations", - "WHERE config_id = $(row.config_id) AND config_variation_id = $(row.config_variation_id);"; - selection="COUNT(*)" - ) |> queryToDataFrame - if result_df.var"COUNT(*)"[1] == 0 - rm_hpc_safe(joinpath(data_dir, "inputs", "configs", config_folder, "config_variations", "config_variation_$(row.config_variation_id).xml"); force=true) - end - - rulesets_collection_folder = rulesetsCollectionFolder(row.rulesets_collection_id) - result_df = constructSelectQuery( - "simulations", - "WHERE rulesets_collection_id = $(row.rulesets_collection_id) AND rulesets_collection_variation_id = $(row.rulesets_collection_variation_id);"; - selection="COUNT(*)" - ) |> queryToDataFrame - if result_df.var"COUNT(*)"[1] == 0 - rm_hpc_safe(joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder, "rulesets_collections_variations", "rulesets_variation_$(row.rulesets_collection_variation_id).xml"); force=true) - end - - ic_cell_folder = icCellFolder(row.ic_cell_id) - result_df = constructSelectQuery( - "simulations", - "WHERE ic_cell_id = $(row.ic_cell_id) AND ic_cell_variation_id = $(row.ic_cell_variation_id);"; - selection="COUNT(*)" - ) |> queryToDataFrame - if result_df.var"COUNT(*)"[1] == 0 - rm_hpc_safe(joinpath(data_dir, "inputs", "ic_cells", ic_cell_folder, "ic_cell_variations", "ic_cell_variation_$(row.ic_cell_variation_id).xml"); force=true) - end - - ic_ecm_folder = icECMFolder(row.ic_ecm_id) - result_df = constructSelectQuery( - "simulations", - "WHERE ic_ecm_id = $(row.ic_ecm_id) AND ic_ecm_variation_id = $(row.ic_ecm_variation_id);"; - selection="COUNT(*)" - ) |> queryToDataFrame - if result_df.var"COUNT(*)"[1] == 0 - rm_hpc_safe(joinpath(data_dir, "inputs", "ic_ecms", ic_ecm_folder, "ic_ecm_variations", "ic_ecm_variation_$(row.ic_ecm_variation_id).xml"); force=true) + for (location, location_dict) in pairs(inputs_dict) + if !any(location_dict["varied"]) + continue + end + id_name = locationIDName(location) + row_id = row[id_name] + folder = inputFolderName(location, row_id) + result_df = constructSelectQuery( + "simulations", + "WHERE $(id_name) = $(row_id) AND $(locationVarIDName(location)) = $(row[locationVarIDName(location)])"; + selection="COUNT(*)" + ) |> queryToDataFrame + if result_df.var"COUNT(*)"[1] == 0 + rm_hpc_safe(joinpath(locationPath(location, folder), variationsTableName(location), "$(location)_variation_$(row[locationVarIDName(location)]).xml"); force=true) + end end end @@ -78,7 +56,7 @@ function deleteSimulations(simulation_ids::AbstractVector{<:Union{Integer,Missin monad_ids_to_delete = Int[] for monad_id in monad_ids monad_simulation_ids = readMonadSimulationIDs(monad_id) - if !any(x -> x in simulation_ids, monad_simulation_ids) # if none of the monad simulation ids are among those to be deleted, then nothing to do here + if !any(x -> x in simulation_ids, monad_simulation_ids) #! if none of the monad simulation ids are among those to be deleted, then nothing to do here continue end filter!(x -> !(x in simulation_ids), monad_simulation_ids) @@ -95,7 +73,7 @@ function deleteSimulations(simulation_ids::AbstractVector{<:Union{Integer,Missin end deleteSimulations(simulation_id::Int; delete_supers::Bool=true, and_constraints::String="") = deleteSimulations([simulation_id]; delete_supers=delete_supers, and_constraints=and_constraints) -deleteSimulation = deleteSimulations # alias +deleteSimulation = deleteSimulations #! alias deleteAllSimulations(; delete_supers::Bool=true, and_constraints::String="") = getSimulationIDs() |> x -> deleteSimulations(x; delete_supers=delete_supers, and_constraints=and_constraints) function deleteMonad(monad_ids::AbstractVector{<:Integer}; delete_subs::Bool=true, delete_supers::Bool=true) @@ -119,7 +97,7 @@ function deleteMonad(monad_ids::AbstractVector{<:Integer}; delete_subs::Bool=tru sampling_ids_to_delete = Int[] for sampling_id in sampling_ids sampling_monad_ids = readSamplingMonadIDs(sampling_id) - if !any(x -> x in monad_ids, sampling_monad_ids) # if none of the sampling monad ids are among those to be deleted, then nothing to do here + if !any(x -> x in monad_ids, sampling_monad_ids) #! if none of the sampling monad ids are among those to be deleted, then nothing to do here continue end filter!(x -> !(x in monad_ids), sampling_monad_ids) @@ -150,11 +128,11 @@ function deleteSampling(sampling_ids::AbstractVector{<:Integer}; delete_subs::Bo all_sampling_ids = constructSelectQuery("samplings"; selection="sampling_id") |> queryToDataFrame |> x -> x.sampling_id for sampling_id in all_sampling_ids if sampling_id in sampling_ids - continue # skip the samplings to be deleted (we want to delete their monads) + continue #! skip the samplings to be deleted (we want to delete their monads) end - # this is then a sampling that we are not deleting, do not delete their monads!! + #! this is then a sampling that we are not deleting, do not delete their monads!! monad_ids = readSamplingMonadIDs(sampling_id) - filter!(x -> !(x in monad_ids), monad_ids_to_delete) # if a monad to delete is in the sampling to keep, then do not delete it!! (or more in line with logic here: if a monad marked for deletion is not in this sampling we are keeping, then leave it in the deletion list) + filter!(x -> !(x in monad_ids), monad_ids_to_delete) #! if a monad to delete is in the sampling to keep, then do not delete it!! (or more in line with logic here: if a monad marked for deletion is not in this sampling we are keeping, then leave it in the deletion list) end deleteMonad(monad_ids_to_delete; delete_subs=true, delete_supers=false) end @@ -167,7 +145,7 @@ function deleteSampling(sampling_ids::AbstractVector{<:Integer}; delete_subs::Bo trial_ids_to_delete = Int[] for trial_id in trial_ids trial_sampling_ids = readTrialSamplingIDs(trial_id) - if !any(x -> x in sampling_ids, trial_sampling_ids) # if none of the trial sampling ids are among those to be deleted, then nothing to do here + if !any(x -> x in sampling_ids, trial_sampling_ids) #! if none of the trial sampling ids are among those to be deleted, then nothing to do here continue end filter!(x -> !(x in sampling_ids), trial_sampling_ids) @@ -198,11 +176,11 @@ function deleteTrial(trial_ids::AbstractVector{<:Integer}; delete_subs::Bool=tru all_trial_ids = constructSelectQuery("trials"; selection="trial_id") |> queryToDataFrame |> x -> x.trial_id for trial_id in all_trial_ids if trial_id in trial_ids - continue # skip the trials to be deleted (we want to delete their samplings) + continue #! skip the trials to be deleted (we want to delete their samplings) end - # this is then a trial that we are not deleting, do not delete their samplings!! + #! this is then a trial that we are not deleting, do not delete their samplings!! sampling_ids = readTrialSamplingIDs(trial_id) - filter!(x -> !(x in sampling_ids), sampling_ids_to_delete) # if a sampling to delete is in the trial to keep, then do not delete it!! (or more in line with logic here: if a sampling marked for deletion is not in this trial we are keeping, then leave it in the deletion list) + filter!(x -> !(x in sampling_ids), sampling_ids_to_delete) #! if a sampling to delete is in the trial to keep, then do not delete it!! (or more in line with logic here: if a sampling marked for deletion is not in this trial we are keeping, then leave it in the deletion list) end deleteSampling(sampling_ids_to_delete; delete_subs=true, delete_supers=false) end @@ -225,15 +203,15 @@ If the user aborts the reset, the user will then be asked if they want to contin """ function resetDatabase(; force_reset::Bool=false, force_continue::Bool=false) if !force_reset - # prompt user to confirm + #! prompt user to confirm println("Are you sure you want to reset the database? (y/n)") response = readline() - if response != "y" # make user be very specific about resetting + if response != "y" #! make user be very specific about resetting println("\tYou entered '$response'.\n\tResetting the database has been cancelled.") if !force_continue println("\nDo you want to continue with the script? (y/n)") response = readline() - if response != "y" # make user be very specific about continuing + if response != "y" #! make user be very specific about continuing println("\tYou entered '$response'.\n\tThe script has been cancelled.") error("Script cancelled.") end @@ -246,43 +224,21 @@ function resetDatabase(; force_reset::Bool=false, force_continue::Bool=false) rm_hpc_safe(joinpath(data_dir, "outputs", folder); force=true, recursive=true) end - for config_folder in (readdir(joinpath(data_dir, "inputs", "configs"), sort=false, join=true) |> filter(x -> isdir(x))) - resetConfigFolder(config_folder) - end - - config_folders = constructSelectQuery("configs"; selection="folder_name") |> queryToDataFrame |> x -> x.folder_name - for config_folder in config_folders - resetConfigFolder(joinpath(data_dir, "inputs", "configs", config_folder)) - end - - for path_to_rulesets_collection_folder in (readdir(joinpath(data_dir, "inputs", "rulesets_collections"), sort=false, join=true) |> filter(x->isdir(x))) - resetRulesetsCollectionFolder(path_to_rulesets_collection_folder) - end - - rulesets_collection_folders = constructSelectQuery("rulesets_collections"; selection="folder_name") |> queryToDataFrame |> x -> x.folder_name - for rulesets_collection_folder in rulesets_collection_folders - resetRulesetsCollectionFolder(joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder)) - end - - for ic_cell_folder in (readdir(joinpath(data_dir, "inputs", "ics", "cells"), sort=false, join=true) |> filter(x -> isdir(x))) - resetICCellFolder(ic_cell_folder) - end - - ic_cell_folders = constructSelectQuery("ic_cells"; selection="folder_name") |> queryToDataFrame |> x -> x.folder_name - for ic_cell_folder in ic_cell_folders - resetICCellFolder(joinpath(data_dir, "inputs", "ics", "cells", ic_cell_folder)) - end - - for ic_ecm_folder in (readdir(joinpath(data_dir, "inputs", "ics", "ecms"), sort=false, join=true) |> filter(x -> isdir(x))) - resetICECMFolder(joinpath(ic_ecm_folder, "ic_ecm_variations")) + for (location, location_dict) in pairs(inputs_dict) + if !any(location_dict["varied"]) + continue + end + path_to_location = locationPath(location) + for folder in (readdir(path_to_location, sort=false, join=true) |> filter(x->isdir(x))) + resetFolder(location, folder) + end + folders = constructSelectQuery(tableName(location); selection="folder_name") |> queryToDataFrame |> x -> x.folder_name + for folder in folders + resetFolder(location, joinpath(path_to_location, folder)) + end end - ic_ecm_folders = constructSelectQuery("ic_ecms"; selection="folder_name") |> queryToDataFrame |> x -> x.folder_name - for ic_ecm_folder in ic_ecm_folders - resetICECMFolder(joinpath(data_dir, "inputs", "ics", "ecms", ic_ecm_folder, "ic_ecm_variations")) - end - - for custom_code_folder in (readdir(joinpath(data_dir, "inputs", "custom_codes"), sort=false, join=true) |> filter(x->isdir(x))) + for custom_code_folder in (readdir(locationPath(:custom_code), sort=false, join=true) |> filter(x->isdir(x))) files = [baseToExecutable("project"), "compilation.log", "compilation.err", "macros.txt"] for file in files rm_hpc_safe(joinpath(custom_code_folder, file); force=true) @@ -291,7 +247,7 @@ function resetDatabase(; force_reset::Bool=false, force_continue::Bool=false) custom_code_folders = constructSelectQuery("custom_codes"; selection="folder_name") |> queryToDataFrame |> x -> x.folder_name for custom_code_folder in custom_code_folders - rm_hpc_safe(joinpath(data_dir, "inputs", "custom_codes", custom_code_folder, baseToExecutable("project")); force=true) + rm_hpc_safe(joinpath(locationPath(:custom_code, custom_code_folder), baseToExecutable("project")); force=true) end if db.file == ":memory:" @@ -303,39 +259,24 @@ function resetDatabase(; force_reset::Bool=false, force_continue::Bool=false) return nothing end -function resetConfigFolder(path_to_config_folder::String) - if !isdir(path_to_config_folder) - return - end - rm_hpc_safe(joinpath(path_to_config_folder, "config_variations.db"); force=true) - rm_hpc_safe(joinpath(path_to_config_folder, "config_variations"); force=true, recursive=true) -end - -function resetRulesetsCollectionFolder(path_to_rulesets_collection_folder::String) - if !isdir(path_to_rulesets_collection_folder) +function resetFolder(location::Symbol, folder::String) + inputs_dict_entry = inputs_dict[location] + path_to_folder = locationPath(location, folder) + if !isdir(path_to_folder) return end - if isfile(joinpath(path_to_rulesets_collection_folder, "base_rulesets.csv")) - rm_hpc_safe(joinpath(path_to_rulesets_collection_folder, "base_rulesets.xml"); force=true) - end - rm_hpc_safe(joinpath(path_to_rulesets_collection_folder, "rulesets_collection_variations.db"); force=true) - rm_hpc_safe(joinpath(path_to_rulesets_collection_folder, "rulesets_collections_variations"); force=true, recursive=true) -end - -function resetICCellFolder(path_to_ic_cell_folder::String) - if !isdir(path_to_ic_cell_folder) || !isfile(joinpath(path_to_ic_cell_folder, "cells.xml")) - return - end - rm_hpc_safe(joinpath(path_to_ic_cell_folder, "ic_cell_variations.db"); force=true) - rm_hpc_safe(joinpath(path_to_ic_cell_folder, "ic_cell_variations"); force=true, recursive=true) -end - -function resetICECMFolder(path_to_ic_ecm_folder::String) - if !isdir(path_to_ic_ecm_folder) || !isfile(joinpath(path_to_ic_ecm_folder, "ecm.xml")) - return + if inputs_dict_entry["basename"] isa Vector + #! keep the most elementary of these and remove the rest + ind = findfirst(x -> joinpath(path_to_folder, x) |> isfile, inputs_dict_entry["basename"]) + if isnothing(ind) + return #! probably should not end up here, but it could happen if a location folder was created but never populated with the base file + end + for base_file in inputs_dict_entry["basename"][ind+1:end] + rm_hpc_safe(joinpath(path_to_folder, base_file); force=true) + end end - rm_hpc_safe(joinpath(path_to_ic_ecm_folder, "ic_ecm_variations.db"); force=true) - rm_hpc_safe(joinpath(path_to_ic_ecm_folder, "ic_ecm_variations"); force=true, recursive=true) + rm_hpc_safe(joinpath(path_to_folder, "$(location)_variations.db"); force=true) + rm_hpc_safe(joinpath(path_to_folder, variationsTableName(location)); force=true, recursive=true) end """ @@ -366,7 +307,7 @@ function deleteSimulationsByStatus(status_codes_to_delete::Vector{String}=["Fail println("Are you sure you want to delete all $(length(simulation_ids)) simulations with status code '$status_code'? (y/n)") response = readline() println("You entered '$response'.") - if response != "y" # make user be very specific about resetting + if response != "y" #! make user be very specific about resetting println("\tDeleting simulations with status code '$status_code' has been cancelled.") continue end @@ -387,19 +328,15 @@ This is used when running simulations if they error so that the monads no longer """ function eraseSimulationID(simulation_id::Int; monad_id::Union{Missing,Int}=missing) if ismissing(monad_id) - query = constructSelectQuery("simulations", "WHERE simulation_id = $(simulation_id);") + query = constructSelectQuery("simulations", "WHERE simulation_id = $(simulation_id)") df = queryToDataFrame(query) - where_stmt = """ - WHERE (config_id, config_variation_id,\ - rulesets_collection_id, rulesets_collection_variation_id,\ - ic_cell_id, ic_cell_variation_id,\ - ic_ecm_id, ic_ecm_variation_id) = \ - ($(df.config_id[1]), $(df.config_variation_id[1]),\ - $(df.rulesets_collection_id[1]),\ - $(df.rulesets_collection_variation_id[1]),\ - $(df.ic_cell_id[1]), $(df.ic_cell_variation_id[1]),\ - $(df.ic_ecm_id[1]), $(df.ic_ecm_variation_id[1])); - """ + all_id_features = [locationIDName(loc) for loc in project_locations.varied] #! project_locations.varied is a Tuple, so doing locationIDName.(project_locations.varied) makes a Tuple, not a Vector + add_id_values = [df[1, id_feature] for id_feature in all_id_features] + all_variation_id_features = [locationVarIDName(loc) for loc in project_locations.varied] #! project_locations.varied is a Tuple, so doing locationVarIDName.(project_locations.varied) makes a Tuple, not a Vector + all_variation_id_values = [df[1, variation_id_feature] for variation_id_feature in all_variation_id_features] + all_features = [all_id_features; all_variation_id_features] + all_values = [add_id_values; all_variation_id_values] + where_stmt = "WHERE ($(join(all_features, ", "))) = ($(join(all_values, ", ")))" query = constructSelectQuery("monads", where_stmt; selection="monad_id") df = queryToDataFrame(query) monad_id = df.monad_id[1] @@ -407,11 +344,11 @@ function eraseSimulationID(simulation_id::Int; monad_id::Union{Missing,Int}=miss simulation_ids = readMonadSimulationIDs(monad_id) index = findfirst(x->x==simulation_id, simulation_ids) if isnothing(index) - return # maybe this could happen? so let's check just in case + return #! maybe this could happen? so let's check just in case end if length(simulation_ids)==1 - # then this was the only simulation in this monad; delete the monad and any samplings, etc. that depend on it - # do not delete the given simulation from the database so that we can check the output files + #! then this was the only simulation in this monad; delete the monad and any samplings, etc. that depend on it + #! do not delete the given simulation from the database so that we can check the output files deleteMonad(monad_id; delete_subs=false, delete_supers=true) return end @@ -427,7 +364,7 @@ function rm_hpc_safe(path::String; force::Bool=false, recursive::Bool=false) if !ispath(path) return end - # NFS filesystem could stop the deletion by putting a lock on the folder or something + #! NFS filesystem could stop the deletion by putting a lock on the folder or something src = path path_rel_to_data = replace(path, "$(data_dir)/" => "") date_time = Dates.format(now(),"yymmdd") diff --git a/src/VCTExport.jl b/src/VCTExport.jl index 5844ce6a..875b0d45 100644 --- a/src/VCTExport.jl +++ b/src/VCTExport.jl @@ -51,70 +51,69 @@ function prepareFolder(simulation::Simulation, export_folder::AbstractString) query = constructSelectQuery("simulations", "WHERE simulation_id = $(simulation.id)") row = queryToDataFrame(query; is_row=true) - # config file - config_folder = simulation.inputs.config.folder - path_to_xml = joinpath(data_dir, "inputs", "configs", config_folder, "config_variations", "config_variation_$(row.config_variation_id[1]).xml") + #! config file + path_to_xml = joinpath(locationPath(:config, simulation), "config_variations", "config_variation_$(row.config_variation_id[1]).xml") cp(path_to_xml, joinpath(export_config_folder, "PhysiCell_settings.xml")) - # custom code - custom_code_folder = simulation.inputs.custom_code.folder - path_to_custom_codes_folder = joinpath(data_dir, "inputs", "custom_codes", custom_code_folder) + #! custom code + path_to_custom_codes_folder = locationPath(:custom_code, simulation) for filename in ["main.cpp", "Makefile"] cp(joinpath(path_to_custom_codes_folder, filename), joinpath(export_folder, filename)) end cp(joinpath(path_to_custom_codes_folder, "custom_modules"), joinpath(export_folder, "custom_modules")) - # rulesets + #! rulesets if row.rulesets_collection_id[1] != -1 - rulesets_collection_folder = simulation.inputs.rulesets_collection.folder - path_to_xml = joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder, "rulesets_collections_variations", "rulesets_variation_$(row.rulesets_collection_variation_id[1]).xml") + path_to_xml = joinpath(locationPath(:rulesets_collection, simulation), "rulesets_collection_variations", "rulesets_collection_variation_$(row.rulesets_collection_variation_id[1]).xml") path_to_csv = joinpath(export_folder, "config", "cell_rules.csv") exportRulesToCSV(path_to_csv, path_to_xml) end - # ic cells + #! intracellulars + if row.intracellular_id[1] != -1 + path_to_intracellular = joinpath(locationPath(:intracellular, simulation), "intracellular.xml") + cp(path_to_intracellular, joinpath(export_folder, "config", "intracellular.xml")) + end + + #! ic cells if row.ic_cell_id[1] != -1 - ic_cell_folder = simulation.inputs.ic_cell.folder - path_to_ic_cells_folder = joinpath(data_dir, "inputs", "ics", "cells", ic_cell_folder) + path_to_ic_cells_folder = locationPath(:ic_cell, simulation) ic_cell_file_name = readdir(path_to_ic_cells_folder) filter!(x -> x in ["cells.csv", "cells.xml"], ic_cell_file_name) ic_cell_file_name = ic_cell_file_name[1] if endswith(ic_cell_file_name, ".xml") - # rel path from ic_cells_folder + #! rel path from ic_cells_folder ic_cell_file_name = joinpath("ic_cell_variations", "ic_cell_variation_$(row.ic_cell_variation_id[1])_s$(simulation.id).csv") end cp(joinpath(path_to_ic_cells_folder, ic_cell_file_name), joinpath(export_folder, "config", "cells.csv")) end - # ic substrates + #! ic substrates if row.ic_substrate_id[1] != -1 - ic_substrate_folder = simulation.inputs.ic_substrate.folder - path_to_file = joinpath(data_dir, "inputs", "ics", "substrates", ic_substrate_folder, "substrates.csv") + path_to_file = joinpath(locationPath(:ic_substrate, simulation), "substrates.csv") cp(path_to_file, joinpath(export_folder, "config", "substrates.csv")) end - # ic ecm + #! ic ecm if row.ic_ecm_id[1] != -1 - ic_ecm_folder = simulation.inputs.ic_ecm.folder - path_to_ic_ecm_folder = joinpath(data_dir, "inputs", "ics", "ecms", ic_ecm_folder) + path_to_ic_ecm_folder = locationPath(:ic_ecm, simulation) ic_ecm_file_name = readdir(path_to_ic_ecm_folder) filter!(x -> x in ["ecm.csv", "ecm.xml"], ic_ecm_file_name) ic_ecm_file_name = ic_ecm_file_name[1] if endswith(ic_ecm_file_name, ".xml") - # rel path from ic_ecm_folder + #! rel path from ic_ecm_folder ic_ecm_file_name = joinpath("ic_ecm_variations", "ic_ecm_variation_$(row.ic_ecm_variation_id[1])_s$(simulation.id).csv") end cp(joinpath(path_to_ic_ecm_folder, ic_ecm_file_name), joinpath(export_folder, "config", "ecm.csv")) end - # ic dcs + #! ic dcs if row.ic_dc_id[1] != -1 - ic_dc_folder = simulation.inputs.ic_dc.folder - path_to_file = joinpath(data_dir, "inputs", "ics", "dcs", ic_dc_folder, "dcs.csv") + path_to_file = joinpath(locationPath(:ic_dc, simulation), "dcs.csv") cp(path_to_file, joinpath(export_folder, "config", "dcs.csv")) end - # get physicell version + #! get physicell version physicell_version_id = row.physicell_version_id[1] query = constructSelectQuery("physicell_versions", "WHERE physicell_version_id = $physicell_version_id") row = queryToDataFrame(query; is_row=true) @@ -168,7 +167,7 @@ function revertMain(export_folder::AbstractString, physicell_version::AbstractSt parsing_block = """ // load and parse settings file(s) - + bool XML_status = false; char copy_command [1024]; if( argc > 1 ) @@ -185,13 +184,6 @@ function revertMain(export_folder::AbstractString, physicell_version::AbstractSt { exit(-1); } """ lines = [lines[1:(idx1-1)]; parsing_block; lines[idx1:end]] - # main_idx = findfirst(x -> contains(x, "int main"), lines) - # if contains(lines[main_idx], "{") - # lines = [lines[1:main_idx]; parsing_block; lines[main_idx+1:end]] - # else - # # this might not be 100% robust, but I have to hope for some standardization of the main function - # lines = [lines[1:main_idx+1]; parsing_block; lines[main_idx+2:end]] - # end open(path_to_main, "w") do io for line in lines @@ -202,7 +194,7 @@ function revertMain(export_folder::AbstractString, physicell_version::AbstractSt end function revertMakefile(export_folder::AbstractString, physicell_version::AbstractString) - return true # nothing to do as of yet for the Makefile + return true #! nothing to do as of yet for the Makefile end function revertConfig(export_folder::AbstractString, physicell_version::AbstractString) @@ -210,18 +202,18 @@ function revertConfig(export_folder::AbstractString, physicell_version::Abstract path_to_config = joinpath(path_to_config_folder, "PhysiCell_settings.xml") xml_doc = openXML(path_to_config) - # output folder + #! output folder folder_element = retrieveElement(xml_doc, ["save", "folder"]) set_content(folder_element, "output") - # ic substrate + #! ic substrate substrate_ic_element = retrieveElement(xml_doc, ["microenvironment_setup", "options", "initial_condition"]) using_substrate_ics = isfile(joinpath(path_to_config_folder, "substrates.csv")) set_attributes(substrate_ic_element; type="csv", enabled=string(using_substrate_ics)) filename_element = find_element(substrate_ic_element, "filename") set_content(filename_element, joinpath(".", "config", "substrates.csv")) - - # ic cells + + #! ic cells cell_ic_element = retrieveElement(xml_doc, ["initial_conditions", "cell_positions"]) using_cell_ics = isfile(joinpath(path_to_config_folder, "cells.csv")) set_attributes(cell_ic_element; type="csv", enabled=string(using_substrate_ics)) @@ -230,20 +222,20 @@ function revertConfig(export_folder::AbstractString, physicell_version::Abstract filename_element = find_element(cell_ic_element, "filename") set_content(filename_element, "cells.csv") - # ic ecm + #! ic ecm using_ecm_ics = isfile(joinpath(path_to_config_folder, "ecm.csv")) if using_ecm_ics setECMSetupElement(xml_doc) end - # ic dcs + #! ic dcs dc_ic_element = retrieveElement(xml_doc, ["microenvironment_setup", "options", "dirichlet_nodes"]) using_dc_ics = isfile(joinpath(path_to_config_folder, "dcs.csv")) set_attributes(dc_ic_element; type="csv", enabled=string(using_dc_ics)) filename_element = find_element(dc_ic_element, "filename") set_content(filename_element, joinpath("config", "dcs.csv")) - - # rulesets + + #! rulesets rules_element = retrieveElement(xml_doc, ["cell_rules", "rulesets", "ruleset"]) using_rules = isfile(joinpath(path_to_config_folder, "cell_rules.csv")) set_attributes(rules_element; type="csv", enabled=string(using_rules)) @@ -252,6 +244,9 @@ function revertConfig(export_folder::AbstractString, physicell_version::Abstract filename_element = find_element(rules_element, "filename") set_content(filename_element, "cell_rules.csv") + #! intracellulars + #! lol, not supported for export yet + closeXML(xml_doc) return true end @@ -285,7 +280,7 @@ function revertCustomModules(export_folder::AbstractString, physicell_version::A end function revertCustomHeader(path_to_custom_modules::AbstractString, physicell_version::AbstractString) - return true # nothing to do as of yet for the custom header + return true #! nothing to do as of yet for the custom header end function revertCustomCPP(path_to_custom_modules::AbstractString, physicell_version::AbstractString) diff --git a/src/VCTHPC.jl b/src/VCTHPC.jl index fd0a1b64..4af4380c 100644 --- a/src/VCTHPC.jl +++ b/src/VCTHPC.jl @@ -49,7 +49,7 @@ end setJobOptions(options::Dict) Set the default job options for use with SLURM. - + For any key-value pair in `options`, the corresponding key in the global `sbatch_options` dictionary is set to the value. A flag is then added to the sbatch command for each key-value pair in `options`: `--key=value`. When running simulations, any values in this dictionary that are `Function`'s will be assumed to be functions of the simulation id. diff --git a/src/VCTICCell.jl b/src/VCTICCell.jl index 5de7071e..161047e7 100644 --- a/src/VCTICCell.jl +++ b/src/VCTICCell.jl @@ -6,7 +6,7 @@ using PhysiCellCellCreator Create folder with a template XML file for IC cells. See the PhysiCellCellCreator.jl documentation for more information on IC cells and how this function works outside of pcvct. -This pcvct function runs the `createICCellXMLTemplate` function from PhysiCellCellCreator.jl and then reinitializes the database. +This pcvct function runs the `createICCellXMLTemplate` function from PhysiCellCellCreator.jl and then updates the database. Furthermore, the folder can be passed in just as the name of the folder located in `data/inputs/ics/cells/` rather than the full path. This functionality is run outside of a PhysiCell runtime. @@ -23,9 +23,25 @@ Importantly, no two simulations will use the same CSV file. """ function createICCellXMLTemplate(folder::String) if length(splitpath(folder)) == 1 - # then the folder is just the name of the ics/cells/folder folder - folder = joinpath(data_dir, "inputs", "ics", "cells", folder) + @assert initialized "Must supply a full path to the folder if the database is not initialized." + #! then the folder is just the name of the ics/cells/folder folder + path_to_folder = locationPath(:ic_cell, folder) + else + path_to_folder = folder + folder = splitpath(folder)[end] end - PhysiCellCellCreator.createICCellXMLTemplate(folder) - reinitializeDatabase() + + if isfile(joinpath(path_to_folder, "cells.xml")) + println("cells.xml already exists in $path_to_folder. Skipping.") + return folder + end + + PhysiCellCellCreator.createICCellXMLTemplate(path_to_folder) + + #! finish by adding this folder to the database + if initialized + insertFolder(:ic_cell, folder) + end + + return folder end \ No newline at end of file diff --git a/src/VCTICECM.jl b/src/VCTICECM.jl index 9161a78e..056ce870 100644 --- a/src/VCTICECM.jl +++ b/src/VCTICECM.jl @@ -6,7 +6,7 @@ using PhysiCellECMCreator Create folder with a template XML file for IC ECM. See the PhysiCellECMCreator.jl documentation for more information on IC ECM and how this function works outside of pcvct. -This pcvct function runs the `createICECMXMLTemplate` function from PhysiCellECMCreator.jl and then reinitializes the database. +This pcvct function runs the `createICECMXMLTemplate` function from PhysiCellECMCreator.jl and then updates the database. Furthermore, the folder can be passed in just as the name of the folder located in `data/inputs/ics/ecms/` rather than the full path. This functionality is run outside of a PhysiCell runtime. @@ -21,9 +21,25 @@ Importantly, no two simulations will use the same CSV file. """ function createICECMXMLTemplate(folder::String) if length(splitpath(folder)) == 1 - # then the folder is just the name of the ics/ecms/folder folder - folder = joinpath(data_dir, "inputs", "ics", "ecms", folder) + @assert initialized "Must supply a full path to the folder if the database is not initialized." + #! then the folder is just the name of the ics/ecms/folder folder + path_to_folder = locationPath(:ic_ecm, folder) + else + path_to_folder = folder + folder = splitpath(folder)[end] end - PhysiCellECMCreator.createICECMXMLTemplate(folder) - reinitializeDatabase() + + if isfile(joinpath(path_to_folder, "ecm.xml")) + println("ecm.xml already exists in $path_to_folder. Skipping.") + return folder + end + + PhysiCellECMCreator.createICECMXMLTemplate(path_to_folder) + + #! finish by adding this folder to the database + if initialized + insertFolder(:ic_ecm, folder) + end + + return folder end \ No newline at end of file diff --git a/src/VCTImport.jl b/src/VCTImport.jl index dc69aed0..5487a06c 100644 --- a/src/VCTImport.jl +++ b/src/VCTImport.jl @@ -26,6 +26,7 @@ struct ImportSources makefile::ImportSource custom_modules::ImportSource rulesets_collection::ImportSource + intracellular::ImportSource ic_cell::ImportSource ic_substrate::ImportSource ic_ecm::ImportSource @@ -38,14 +39,15 @@ function ImportSources(src::Dict) main = ImportSource(src, "main", "", "main.cpp", "file", required; input_folder_key = :custom_code) makefile = ImportSource(src, "makefile", "", "Makefile", "file", required; input_folder_key = :custom_code) custom_modules = ImportSource(src, "custom_modules", "", "custom_modules", "folder", required; input_folder_key = :custom_code) - + required = false rules = ImportSource(src, "rules", "config", "cell_rules.csv", "file", required; pcvct_name="base_rulesets.csv") + intracellular = ImportSource(src, "intracellular", "config", "intracellular.xml", "file", required) ic_cell = ImportSource(src, "ic_cell", "config", "cells.csv", "file", required) ic_substrate = ImportSource(src, "ic_substrate", "config", "substrates.csv", "file", required) ic_ecm = ImportSource(src, "ic_ecm", "config", "ecm.csv", "file", required) ic_dc = ImportSource(src, "ic_dc", "config", "dcs.csv", "file", required) - return ImportSources(config, main, makefile, custom_modules, rules, ic_cell, ic_substrate, ic_ecm, ic_dc) + return ImportSources(config, main, makefile, custom_modules, rules, intracellular, ic_cell, ic_substrate, ic_ecm, ic_dc) end mutable struct ImportDestFolder @@ -58,6 +60,7 @@ struct ImportDestFolders config::ImportDestFolder custom_code::ImportDestFolder rules::ImportDestFolder + intracellular::ImportDestFolder ic_cell::ImportDestFolder ic_substrate::ImportDestFolder ic_ecm::ImportDestFolder @@ -70,17 +73,18 @@ function ImportDestFolders(path_to_project::AbstractString, dest::Dict) created = false description = "Imported from project at $(path_to_project)." - # required folders + #! required folders config = ImportDestFolder(path_fn("config", "configs"), created, description) custom_code = ImportDestFolder(path_fn("custom_code", "custom_codes"), created, description) - # optional folders + #! optional folders rules = ImportDestFolder(path_fn("rules", "rulesets_collections"), created, description) + intracellular = ImportDestFolder(path_fn("intracellular", "intracellular"), created, description) ic_cell = ImportDestFolder(path_fn("ic_cell", joinpath("ics", "cells")), created, description) ic_substrate = ImportDestFolder(path_fn("ic_substrate", joinpath("ics", "substrates")), created, description) ic_ecm = ImportDestFolder(path_fn("ic_ecm", joinpath("ics", "ecms")), created, description) ic_dc = ImportDestFolder(path_fn("ic_dc", joinpath("ics", "dcs")), created, description) - return ImportDestFolders(config, custom_code, rules, ic_cell, ic_substrate, ic_ecm, ic_dc) + return ImportDestFolders(config, custom_code, rules, intracellular, ic_cell, ic_substrate, ic_ecm, ic_dc) end """ @@ -98,31 +102,34 @@ The following keys are recognized: $(join(["`$fn`" for fn in fieldnames(ImportDe """ function importProject(path_to_project::AbstractString, src=Dict(), dest=Dict(); extreme_caution::Bool=false) project_sources = ImportSources(src) - input_folders = ImportDestFolders(path_to_project, dest) + import_dest_folders = ImportDestFolders(path_to_project, dest) success = resolveProjectSources!(project_sources, path_to_project) if success - success = createInputFolders!(input_folders, project_sources) - success = success && copyFilesToFolders(path_to_project, project_sources, input_folders) # only copy if successful so far - success = success && adaptProject(input_folders) + success = createInputFolders!(import_dest_folders, project_sources) + success = success && copyFilesToFolders(path_to_project, project_sources, import_dest_folders) #! only copy if successful so far + success = success && adaptProject(import_dest_folders) end if success msg = """ Imported project from $(path_to_project) into $(joinpath(data_dir, "inputs")): - - $(input_folders.config.path_from_inputs) - - $(input_folders.custom_code.path_from_inputs) + - $(import_dest_folders.config.path_from_inputs) + - $(import_dest_folders.custom_code.path_from_inputs) """ - if input_folders.rules.created - msg *= " - $(input_folders.rules.path_from_inputs)" + if import_dest_folders.rules.created + msg *= " - $(import_dest_folders.rules.path_from_inputs)" + end + if import_dest_folders.intracellular.created + msg *= " - $(import_dest_folders.intracellular.path_from_inputs)" end ics_started = false for ic in ["cell", "substrate", "ecm", "dc"] - input_folder = getfield(input_folders, Symbol("ic_$(ic)"))::ImportDestFolder - if input_folder.created + import_dest_folder = getfield(import_dest_folders, Symbol("ic_$(ic)"))::ImportDestFolder + if import_dest_folder.created if !ics_started msg *= "\n - ICs:" ics_started = true end - msg *= "\n - $(splitpath(input_folder.path_from_inputs)[2:end] |> joinpath)" + msg *= "\n - $(splitpath(import_dest_folder.path_from_inputs)[2:end] |> joinpath)" end end println(msg) @@ -147,9 +154,9 @@ function importProject(path_to_project::AbstractString, src=Dict(), dest=Dict(); end path_to_inputs = joinpath(data_dir, "inputs") for fieldname in fieldnames(ImportDestFolders) - input_folder = getfield(input_folders, fieldname) - if input_folder.created - path_to_folder = joinpath(path_to_inputs, input_folder.path_from_inputs) + import_dest_folder = getfield(import_dest_folders, fieldname) + if import_dest_folder.created + path_to_folder = joinpath(path_to_inputs, import_dest_folder.path_from_inputs) if extreme_caution println("Deleting the newly created $(fieldname) folder at $(path_to_folder). Proceed with deletion? (y/n)") response = readline() @@ -191,27 +198,27 @@ function resolveProjectSource!(project_source::ImportSource, path_to_project::Ab return false end -function createInputFolders!(input_folders::ImportDestFolders, project_sources::ImportSources) - # required folders - success = createInputFolder!(input_folders.config) - success &= createInputFolder!(input_folders.custom_code) +function createInputFolders!(import_dest_folders::ImportDestFolders, project_sources::ImportSources) + #! required folders + success = createInputFolder!(import_dest_folders.config) + success &= createInputFolder!(import_dest_folders.custom_code) - # optional folders + #! optional folders for fieldname in fieldnames(ImportSources) if fieldname in [:config, :main, :makefile, :custom_modules] continue end project_source = getfield(project_sources, fieldname) if project_source.found - success &= createInputFolder!(getfield(input_folders, project_source.src_key)::ImportDestFolder) + success &= createInputFolder!(getfield(import_dest_folders, project_source.src_key)::ImportDestFolder) end end return success end -function createInputFolder!(input_folder::ImportDestFolder) +function createInputFolder!(import_dest_folder::ImportDestFolder) path_to_inputs = joinpath(data_dir, "inputs") - path_from_inputs_vec = splitpath(input_folder.path_from_inputs) + path_from_inputs_vec = splitpath(import_dest_folder.path_from_inputs) path_from_inputs_to_collection = joinpath(path_from_inputs_vec[1:end-1]...) folder_base = path_from_inputs_vec[end] collection_contents = joinpath(path_to_inputs, path_from_inputs_to_collection) |> readdir @@ -222,12 +229,12 @@ function createInputFolder!(input_folder::ImportDestFolder) n += 1 folder_name = "$(folder_base)_$(n)" end - input_folder.path_from_inputs = joinpath(path_from_inputs_to_collection, folder_name) - path_to_folder = joinpath(path_to_inputs, input_folder.path_from_inputs) + import_dest_folder.path_from_inputs = joinpath(path_from_inputs_to_collection, folder_name) + path_to_folder = joinpath(path_to_inputs, import_dest_folder.path_from_inputs) mkpath(path_to_folder) path_to_metadata = joinpath(path_to_folder, "metadata.xml") - writeDescriptionToMetadata(path_to_metadata, input_folder.description) - input_folder.created = true + writeDescriptionToMetadata(path_to_metadata, import_dest_folder.description) + import_dest_folder.created = true return true end @@ -241,7 +248,7 @@ function writeDescriptionToMetadata(path_to_metadata::AbstractString, descriptio return end -function copyFilesToFolders(path_to_project::AbstractString, project_sources::ImportSources, input_folders::ImportDestFolders) +function copyFilesToFolders(path_to_project::AbstractString, project_sources::ImportSources, import_dest_folders::ImportDestFolders) success = true for fieldname in fieldnames(ImportSources) project_source = getfield(project_sources, fieldname)::ImportSource @@ -249,8 +256,8 @@ function copyFilesToFolders(path_to_project::AbstractString, project_sources::Im continue end src = joinpath(path_to_project, project_source.path_from_project) - input_folder = getfield(input_folders, project_source.input_folder_key) - dest = joinpath(data_dir, "inputs", input_folder.path_from_inputs, project_source.pcvct_name) + import_dest_folder = getfield(import_dest_folders, project_source.input_folder_key) + dest = joinpath(data_dir, "inputs", import_dest_folder.path_from_inputs, project_source.pcvct_name) if dest |> (project_source.type == "file" ? isfile : isdir) msg = """ In copying $(src) to $(dest), found a $(project_source.type) with the same name. @@ -266,14 +273,14 @@ function copyFilesToFolders(path_to_project::AbstractString, project_sources::Im return success end -function adaptProject(input_folders::ImportDestFolders) - success = adaptConfig(input_folders.config) - success &= adaptCustomCode(input_folders.custom_code) +function adaptProject(import_dest_folders::ImportDestFolders) + success = adaptConfig(import_dest_folders.config) + success &= adaptCustomCode(import_dest_folders.custom_code) return success end function adaptConfig(config::ImportDestFolder) - return true # nothing to do for now + return true #! nothing to do for now end function adaptCustomCode(custom_code::ImportDestFolder) @@ -288,7 +295,7 @@ function adaptMain(path_from_inputs::AbstractString) lines = readlines(path_to_main) if any(x->contains(x, "argument_parser.parse"), lines) - # already adapted the main.cpp + #! already adapted the main.cpp return true end @@ -327,7 +334,7 @@ function adaptMain(path_from_inputs::AbstractString) // load and parse settings file(s) load_PhysiCell_config_file(); - + char copy_command [1024]; sprintf( copy_command , "cp %s %s/PhysiCell_settings.xml" , argument_parser.path_to_config_file.c_str(), PhysiCell_settings.folder.c_str() ); //, PhysiCell_settings.folder.c_str() ); @@ -343,7 +350,7 @@ function adaptMain(path_from_inputs::AbstractString) end function adaptMakefile(path_from_inputs::AbstractString) - return true # nothing to do for now + return true #! nothing to do for now end function adaptCustomModules(path_from_inputs::AbstractString) @@ -353,7 +360,7 @@ function adaptCustomModules(path_from_inputs::AbstractString) end function adaptCustomHeader(path_from_inputs::AbstractString) - return true # nothing to do for now + return true #! nothing to do for now end function adaptCustomCPP(path_from_inputs::AbstractString) diff --git a/src/VCTLoader.jl b/src/VCTLoader.jl index caf1c2f9..80b344bb 100644 --- a/src/VCTLoader.jl +++ b/src/VCTLoader.jl @@ -61,7 +61,7 @@ function getLabels!(labels::Vector{String}, xml_doc::XMLDocument) append!(labels, label_name) else if label_name == "elapsed_time_in_phase" && label_name in labels - label_name = "elapsed_time_in_phase_2" # hack to get around a MultiCellDS duplicate? + label_name = "elapsed_time_in_phase_2" #! hack to get around a MultiCellDS duplicate? end push!(labels, label_name) end @@ -165,7 +165,7 @@ indexToFilename(index::Int) = "output$(lpad(index,8,"0"))" function PhysiCellSnapshot(folder::String, index::Union{Int, Symbol}; include_cells::Bool=false, cell_type_to_name_dict::Dict{Int, String}=Dict{Int, String}(), labels::Vector{String}=String[], include_substrates::Bool=false, substrate_names::Vector{String}=String[], include_mesh::Bool=false) filepath_base = joinpath(folder, indexToFilename(index)) xml_doc = openXML("$(filepath_base).xml") - time = getField(xml_doc, ["metadata","current_time"]) |> x->parse(Float64, x) + time = getContent(xml_doc, ["metadata","current_time"]) |> x->parse(Float64, x) cells = DataFrame() if include_cells loadCells!(cells, filepath_base, cell_type_to_name_dict, labels) @@ -210,7 +210,7 @@ end function loadSubstrates(filepath_base::String, substrate_names::Vector{String}) getSubstrateNames!(substrate_names, "$(filepath_base).xml") mat_file = "$(filepath_base)_microenvironment0.mat" - A = matread(mat_file) |> values |> first # julia seems to read in the multiscale_microenvironment and assign the key multiscale_microenvironmen (note the missing 't'); do this to make sure we get the data + A = matread(mat_file) |> values |> first #! julia seems to read in the multiscale_microenvironment and assign the key multiscale_microenvironmen (note the missing 't'); do this to make sure we get the data substrates = DataFrame(A', [:x; :y; :z; :volume; substrate_names]) return substrates, substrate_names end @@ -221,13 +221,13 @@ function loadSubstrates!(substrates::DataFrame, filepath_base::String, substrate end getSubstrateNames!(substrate_names, "$(filepath_base).xml") mat_file = "$(filepath_base)_microenvironment0.mat" - A = matread(mat_file) |> values |> first # julia seems to read in the multiscale_microenvironment and assign the key multiscale_microenvironmen (note the missing 't'); do this to make sure we get the data + A = matread(mat_file) |> values |> first #! julia seems to read in the multiscale_microenvironment and assign the key multiscale_microenvironmen (note the missing 't'); do this to make sure we get the data labels = [:x; :y; :z; :volume; substrate_names] for (label, row) in zip(labels, eachrow(A)) substrates[!, label] = row end end - + function loadSubstrates!(snapshot::PhysiCellSnapshot, substrate_names::Vector{String}=String[]) loadSubstrates!(snapshot.substrates, joinpath(snapshot.folder, "$(indexToFilename(snapshot.index))"), substrate_names) end @@ -248,7 +248,6 @@ function loadMesh!(snapshot::PhysiCellSnapshot) closeXML(xml_doc) end - function PhysiCellSequence(folder::String; include_cells::Bool=false, include_substrates::Bool=false, include_mesh::Bool=false) cell_type_to_name_dict = Dict{Int, String}() if include_cells @@ -331,7 +330,7 @@ function getCellDataSequence(sequence::PhysiCellSequence, labels::Vector{String} if all(length.(values(temp_dict)) .== 1) return data end - C(v, label) = begin # Concatenation of columns that belong together + C(v, label) = begin #! Concatenation of columns that belong together if length(temp_dict[label]) == 1 return v[temp_dict[label][1]] end diff --git a/src/VCTModule.jl b/src/VCTModule.jl index 87dd917a..cd8a4a6f 100644 --- a/src/VCTModule.jl +++ b/src/VCTModule.jl @@ -1,13 +1,14 @@ using SQLite, DataFrames, LightXML, Dates, CSV, Tables, Distributions, Statistics, Random, QuasiMonteCarlo, Sobol using PhysiCellXMLRules, PhysiCellCellCreator -export initializeVCT, getSimulationIDs, setNumberOfParallelSims +export initializeModelManager, getSimulationIDs, setNumberOfParallelSims -# put these first as they define classes the rest rely on +#! put these first as they define classes the rest rely on include("VCTClasses.jl") include("VCTPruner.jl") include("VCTVariations.jl") +include("VCTProjectConfiguration.jl") include("VCTCompilation.jl") include("VCTConfiguration.jl") include("VCTCreation.jl") @@ -21,6 +22,7 @@ include("VCTUp.jl") include("VCTVersion.jl") include("VCTPhysiCellVersion.jl") include("VCTHPC.jl") +include("VCTComponents.jl") include("VCTUserAPI.jl") @@ -36,12 +38,14 @@ include("VCTExport.jl") VERSION >= v"1.11" && include("public.julia") +inputs_dict = Dict{Symbol, Any}() + initialized = false physicell_dir::String = abspath("PhysiCell") current_physicell_version_id = missing data_dir::String = abspath("data") -PHYSICELL_CPP::String = haskey(ENV, "PHYSICELL_CPP") ? ENV["PHYSICELL_CPP"] : "/opt/homebrew/bin/g++-14" +PHYSICELL_CPP::String = haskey(ENV, "PHYSICELL_CPP") ? ENV["PHYSICELL_CPP"] : "g++" if Sys.iswindows() baseToExecutable(s::String) = "$(s).exe" else @@ -50,15 +54,16 @@ end run_on_hpc = isRunningOnHPC() max_number_of_parallel_simulations = 1 -march_flag = run_on_hpc ? "x86-64" : "native" +march_flag::String = run_on_hpc ? "x86-64" : "native" -sbatch_options = defaultJobOptions() # this is a dictionary that will be used to pass options to the sbatch command +sbatch_options::Dict{String,Any} = defaultJobOptions() #! this is a dictionary that will be used to pass options to the sbatch command function __init__() global max_number_of_parallel_simulations = haskey(ENV, "PCVCT_NUM_PARALLEL_SIMS") ? parse(Int, ENV["PCVCT_NUM_PARALLEL_SIMS"]) : 1 global path_to_python = haskey(ENV, "PCVCT_PYTHON_PATH") ? ENV["PCVCT_PYTHON_PATH"] : missing global path_to_studio = haskey(ENV, "PCVCT_STUDIO_PATH") ? ENV["PCVCT_STUDIO_PATH"] : missing end + ################## Initialization Functions ################## """ @@ -86,7 +91,7 @@ function pcvctLogo() end """ - initializeVCT(path_to_physicell::String, path_to_data::String) + initializeModelManager(path_to_physicell::String, path_to_data::String) Initialize the VCT environment by setting the paths to PhysiCell and data directories, and initializing the database. @@ -94,28 +99,39 @@ Initialize the VCT environment by setting the paths to PhysiCell and data direct - `path_to_physicell::String`: Path to the PhysiCell directory as either an absolute or relative path. - `path_to_data::String`: Path to the data directory as either an absolute or relative path. """ -function initializeVCT(path_to_physicell::String, path_to_data::String; auto_upgrade::Bool=false) - # print big logo of PCVCT here +function initializeModelManager(path_to_physicell::String, path_to_data::String; auto_upgrade::Bool=false) + #! print big logo of PCVCT here println(pcvctLogo()) println("----------INITIALIZING----------") global physicell_dir = abspath(path_to_physicell) global data_dir = abspath(path_to_data) - println(rpad("Path to PhysiCell:", 20, ' ') * physicell_dir) - println(rpad("Path to data:", 20, ' ') * data_dir) + println(rpad("Path to PhysiCell:", 25, ' ') * physicell_dir) + println(rpad("Path to data:", 25, ' ') * data_dir) + success = parseProjectInputsConfigurationFile() + if !success + println("Project configuration file parsing failed.") + return + end success = initializeDatabase(joinpath(data_dir, "vct.db"); auto_upgrade=auto_upgrade) if !success global db = SQLite.DB() println("Database initialization failed.") return end - println(rpad("PhysiCell version:", 20, ' ') * physicellInfo()) - println(rpad("pcvct version:", 20, ' ') * string(pcvctVersion())) - println(rpad("Compiler:", 20, ' ') * PHYSICELL_CPP) - println(rpad("Running on HPC:", 20, ' ') * string(run_on_hpc)) - println(rpad("Max parallel sims:", 20, ' ') * string(max_number_of_parallel_simulations)) + println(rpad("PhysiCell version:", 25, ' ') * physicellInfo()) + println(rpad("pcvct version:", 25, ' ') * string(pcvctVersion())) + println(rpad("Compiler:", 25, ' ') * PHYSICELL_CPP) + println(rpad("Running on HPC:", 25, ' ') * string(run_on_hpc)) + println(rpad("Max parallel sims:", 25, ' ') * string(max_number_of_parallel_simulations)) flush(stdout) end +function initializeModelManager() + physicell_dir = "PhysiCell" + data_dir = "data" + return initializeModelManager(physicell_dir, data_dir) +end + ################## Selection Functions ################## function readConstituentIDs(path_to_csv::String) @@ -195,7 +211,7 @@ end function outputFolder(T::AbstractTrial) name = typeof(T) |> string |> lowercase - name = split(name, ".")[end] # remove module name that comes with the type, e.g. main.vctmodule.sampling -> sampling + name = split(name, ".")[end] #! remove module name that comes with the type, e.g. main.vctmodule.sampling -> sampling return outputFolder(name, T.id) end diff --git a/src/VCTPhysiCellStudio.jl b/src/VCTPhysiCellStudio.jl index 54205e9f..6280b51b 100644 --- a/src/VCTPhysiCellStudio.jl +++ b/src/VCTPhysiCellStudio.jl @@ -45,18 +45,18 @@ end function setUpStudioInputs(simulation_id::Int) path_to_output = joinpath(outputFolder("simulation", simulation_id), "output") - + physicell_version = physicellVersion(Simulation(simulation_id)) upstream_version = split(physicell_version, "-")[1] |> VersionNumber - + rules_header = ["cell_type", "signal", "response", "behavior", "base_response", "max_response", "half_max", "hill_power", "applies_to_dead"] if upstream_version < v"1.14.0" output_rules_file = "cell_rules.csv" - else # starting in 1.14.1, export the v3 rules to cell_rules_parsed.csv + else #! starting in PhysiCell v1.14.1, export the v3 rules to cell_rules_parsed.csv output_rules_file = "cell_rules_parsed.csv" filter!(h -> h != "base_response", rules_header) end - + path_to_xml = joinpath(path_to_output, "PhysiCell_settings.xml") xml_doc = openXML(path_to_xml) makeXMLPath(xml_doc, ["save", "folder"]) diff --git a/src/VCTPhysiCellVersion.jl b/src/VCTPhysiCellVersion.jl index 91388d45..b45cc452 100644 --- a/src/VCTPhysiCellVersion.jl +++ b/src/VCTPhysiCellVersion.jl @@ -24,34 +24,34 @@ function physicellVersionID() repo_is_dirty = true end - # then, get the current commit hash + #! then, get the current commit hash commit_hash = readchomp(`git -C $physicell_dir rev-parse HEAD`) commit_hash *= repo_is_dirty ? "-dirty" : "" - # then, compare that hash with hashes in the database + #! then, compare that hash with hashes in the database query = constructSelectQuery("physicell_versions", "WHERE commit_hash = '$commit_hash'") current_entry_df = queryToDataFrame(query) @assert size(current_entry_df, 1) <= 1 "The database should have unique 'commit_hash' entries." is_hash_in_db = !isempty(current_entry_df) no_entries_missing = is_hash_in_db && all(.!ismissing.([x[1] for x in eachcol(current_entry_df)])) if no_entries_missing - # if the commit hash is already in the database, and it has a tag, then we are done + #! if the commit hash is already in the database, and it has a tag, then we are done return current_entry_df.physicell_version_id[1] end entry_dict = Dict{String,String}() - - # then, compare that hash with remote hashes to identify the tag, repo owner, and date + + #! then, compare that hash with remote hashes to identify the tag, repo owner, and date hash_to_tag_dict = getCommitHashToTagDict(physicell_dir) if !repo_is_dirty && haskey(hash_to_tag_dict, commit_hash) entry_dict["tag"] = hash_to_tag_dict[commit_hash] else entry_dict["tag"] = "NULL" end - + entry_dict["repo_owner"] = repo_is_dirty ? "NULL" : repoOwner(commit_hash, entry_dict["tag"]) entry_dict["date"] = repo_is_dirty ? "NULL" : readchomp(`git -C $physicell_dir show -s --format=%ci $commit_hash`) - - db_entry_dict = [k => v=="NULL" ? v : "'$v'" for (k,v) in entry_dict] |> Dict # surround non-NULL values with single quotes, so NULL really go in as NULL + + db_entry_dict = [k => v=="NULL" ? v : "'$v'" for (k,v) in entry_dict] |> Dict #! surround non-NULL values with single quotes, so NULL really go in as NULL if is_hash_in_db for (name, col) in pairs(eachcol(current_entry_df)) if !ismissing(col[1]) @@ -70,7 +70,7 @@ end function physicellIsGit() is_git = isdir(joinpath(physicell_dir, ".git")) - if !is_git # possible it is a submodule + if !is_git #! possible it is a submodule path_to_file = joinpath(physicell_dir, ".git") if isfile(path_to_file) lines = readlines(path_to_file) @@ -86,7 +86,7 @@ function physicellIsGit() end function gitDirectoryIsClean(dir::String) - cmd = `git -C $dir status --porcelain` # -C flag is for changing directory, --porcelain flag is for machine-readable output (much easier to tell if clean this way) + cmd = `git -C $dir status --porcelain` #! -C flag is for changing directory, --porcelain flag is for machine-readable output (much easier to tell if clean this way) output = read(cmd, String) is_clean = length(output) == 0 if is_clean @@ -96,11 +96,15 @@ function gitDirectoryIsClean(dir::String) "licenses", "matlab", "output", "povray", "protocols", "sample_projects", "sample_projects_intracellular", "sample_projects_physipkpd", "tests", "unit_tests", "user_projects"] + files_to_ignore = ["ALL_CITATIONS.txt"] lines = split(output, "\n") filter!(x -> x != "", lines) for folder in folders_to_ignore filter!(x -> !contains(x, " $folder/"), lines) end + for file in files_to_ignore + filter!(x -> !contains(x, " $file"), lines) + end is_clean = isempty(lines) if !is_clean println("PhysiCell repository is dirty. The following files are modified in the PhysiCell repository:") diff --git a/src/VCTProjectConfiguration.jl b/src/VCTProjectConfiguration.jl new file mode 100644 index 00000000..b3380636 --- /dev/null +++ b/src/VCTProjectConfiguration.jl @@ -0,0 +1,162 @@ +using TOML + +project_locations = NamedTuple() + +struct ProjectLocations{L,M,N} + all::NTuple{L,Symbol} + required::NTuple{M,Symbol} + varied::NTuple{N,Symbol} + + function ProjectLocations(inputs_dict::Dict{Symbol,Any}) + all_locations = (location for location in keys(inputs_dict)) |> collect |> sort |> Tuple + required = (location for (location, location_dict) in pairs(inputs_dict) if location_dict["required"]) |> collect |> sort |> Tuple + varied_locations = (location for (location,location_dict) in pairs(inputs_dict) if any(location_dict["varied"])) |> collect |> sort |> Tuple + return new{length(all_locations),length(required),length(varied_locations)}(all_locations, required, varied_locations) + end +end + +function sanitizePathElements(path_elements::Vector{String}) + for element in path_elements + #! Disallow `..` to prevent directory traversal + if element == ".." + throw(ArgumentError("Path element '..' is not allowed")) + end + + #! Disallow absolute paths + if isabspath(element) + throw(ArgumentError("Absolute paths are not allowed")) + end + + #! Disallow special characters or sequences (e.g., `~`, `*`, etc.) + if contains(element, r"[~*?<>|:]") + throw(ArgumentError("Path element contains invalid characters")) + end + end + return path_elements +end + +sanitizePathElements(path_element::String) = sanitizePathElements([path_element]) + +function parseProjectInputsConfigurationFile() + inputs_dict_temp = Dict{String, Any}() + try + inputs_dict_temp = TOML.parsefile(joinpath(data_dir, "inputs.toml")) + catch e + println("Error parsing project configuration file: ", e) + return false + end + for (location, location_dict) in pairs(inputs_dict_temp) + if !("path_from_inputs" in keys(location_dict)) + location_dict["path_from_inputs"] = tableName(location) + else + location_dict["path_from_inputs"] = location_dict["path_from_inputs"] |> sanitizePathElements |> joinpath + end + if !("basename" in keys(location_dict)) + location_dict["basename"] = missing + else + @assert haskey(location_dict, "varied") "inputs.toml: $(location): basename must be accompanied by varied." + if location_dict["varied"] isa Vector + @assert location_dict["basename"] isa Vector && length(location_dict["varied"]) == length(location_dict["basename"]) "inputs.toml: $(location): varied must be a Bool or a Vector of the same length as basename." + end + end + end + global inputs_dict = [Symbol(location) => location_dict for (location, location_dict) in pairs(inputs_dict_temp)] |> Dict{Symbol, Any} + global project_locations = ProjectLocations(inputs_dict) + createSimpleInputFolders() + println(rpad("Path to inputs.toml:", 25, ' ') * joinpath(data_dir, "inputs.toml")) + return true +end + +locationIDName(location::Union{String,Symbol}) = "$(location)_id" +locationVarIDName(location::Union{String,Symbol}) = "$(location)_variation_id" +locationIDNames() = (locationIDName(loc) for loc in project_locations.all) +locationVariationIDNames() = (locationVarIDName(loc) for loc in project_locations.varied) +tableName(location::Union{String,Symbol}) = "$(location)s" +variationsTableName(location::Union{String,Symbol}) = "$(location)_variations" + +function locationPath(location::Symbol, folder=missing) + location_dict = inputs_dict[Symbol(location)] + path_to_locations = joinpath(data_dir, "inputs", location_dict["path_from_inputs"]) + return ismissing(folder) ? path_to_locations : joinpath(path_to_locations, folder) +end + +function locationPath(input_folder::InputFolder) + return locationPath(input_folder.location, input_folder.folder) +end + +function locationPath(location::Symbol, S::AbstractSampling) + return locationPath(location, S.inputs[location].folder) +end + +function folderIsVaried(location::Symbol, folder::String) + location_dict = inputs_dict[location] + varieds = location_dict["varied"] + if !any(varieds) + return false #! if none of the basenames are declared to be varied, then the folder is not varied + end + basenames = location_dict["basename"] + basenames = basenames isa Vector ? basenames : [basenames] + @assert varieds isa Bool || length(varieds) == length(basenames) "varied must be a Bool or a Vector of the same length as basename" + varieds = varieds isa Vector ? varieds : fill(varieds, length(basenames)) + + #! look for the first basename in the folder. if that one is varied, then this is a potential target for varying + path_to_folder = locationPath(location, folder) + for (basename, varied) in zip(basenames, varieds) + path_to_file = joinpath(path_to_folder, basename) + if isfile(path_to_file) + return varied + end + end + throw(ErrorException("No basename files found in folder $(path_to_folder). Must be one of $(basenames)")) +end + +function createInputsTOMLTemplate(path_to_toml::String) + s = """ + [config] + required = true + varied = true + basename = "PhysiCell_settings.xml" + + [custom_code] + required = true + varied = false + + [rulesets_collection] + required = false + varied = true + basename = ["base_rulesets.csv", "base_rulesets.xml"] + + [intracellular] + required = false + varied = true + basename = "intracellular.xml" + + [ic_cell] + path_from_inputs = ["ics", "cells"] + required = false + varied = [false, true] + basename = ["cells.csv", "cells.xml"] + + [ic_substrate] + path_from_inputs = ["ics", "substrates"] + required = false + varied = false + basename = "substrates.csv" + + [ic_ecm] + path_from_inputs = ["ics", "ecms"] + required = false + varied = [false, true] + basename = ["ecm.csv", "ecm.xml"] + + [ic_dc] + path_from_inputs = ["ics", "dcs"] + required = false + varied = false + basename = "dcs.csv" + """ + open(path_to_toml, "w") do f + write(f, s) + end + return +end diff --git a/src/VCTRecorder.jl b/src/VCTRecorder.jl index 167fa752..9e631c4b 100644 --- a/src/VCTRecorder.jl +++ b/src/VCTRecorder.jl @@ -36,22 +36,22 @@ end function compressIDs(ids::AbstractArray{Int}) ids = ids |> vec |> unique |> sort lines = String[] - while !isempty(ids) # while there are still ids to compress - if length(ids) == 1 # if there's only one id left - next_line = string(ids[1]) # just add it to the list - popfirst!(ids) # and remove it from the list of ids - else # if there's more than one id left - I = findfirst(diff(ids) .!= 1) # find the first index where the difference between consecutive ids is greater than 1 - I = isnothing(I) ? length(ids) : I # if none found, then all the diffs are 1 so we want to take the entire list - if I > 1 # if compressing multiple ids - next_line = "$(ids[1]):$(ids[I])" # add the first and last id separated by a colon - ids = ids[I+1:end] # remove the ids that were just compressed - else # if only compressing one id - next_line = string(ids[1]) # just add the id to the list - popfirst!(ids) # and remove it from the list of ids + while !isempty(ids) #! while there are still ids to compress + if length(ids) == 1 #! if there's only one id left + next_line = string(ids[1]) #! just add it to the list + popfirst!(ids) #! and remove it from the list of ids + else #! if there's more than one id left + I = findfirst(diff(ids) .!= 1) #! find the first index where the difference between consecutive ids is greater than 1 + I = isnothing(I) ? length(ids) : I #! if none found, then all the diffs are 1 so we want to take the entire list + if I > 1 #! if compressing multiple ids + next_line = "$(ids[1]):$(ids[I])" #! add the first and last id separated by a colon + ids = ids[I+1:end] #! remove the ids that were just compressed + else #! if only compressing one id + next_line = string(ids[1]) #! just add the id to the list + popfirst!(ids) #! and remove it from the list of ids end end - push!(lines, next_line) # add the compressed id(s) to the list of lines + push!(lines, next_line) #! add the compressed id(s) to the list of lines end return Tables.table(lines) end diff --git a/src/VCTRunner.jl b/src/VCTRunner.jl index 9307457a..dac84b8c 100644 --- a/src/VCTRunner.jl +++ b/src/VCTRunner.jl @@ -7,10 +7,9 @@ function prepareSimulationCommand(simulation::Simulation, monad_id::Int, do_full mkpath(path_to_simulation_output) if do_full_setup - loadConfiguration(simulation) - loadRulesets(simulation) - loadICCells(simulation) - loadICECM(simulation) + for loc in project_locations.varied + prepareVariedInputFolder(loc, simulation) + end success = loadCustomCode(simulation; force_recompile=force_recompile) if !success simulationFailedToRun(simulation, monad_id) @@ -18,10 +17,10 @@ function prepareSimulationCommand(simulation::Simulation, monad_id::Int, do_full end end - executable_str = joinpath(data_dir, "inputs", "custom_codes", simulation.inputs.custom_code.folder, baseToExecutable("project")) # path to executable - config_str = joinpath(data_dir, "inputs", "configs", simulation.inputs.config.folder, "config_variations", "config_variation_$(simulation.variation_ids.config).xml") + executable_str = joinpath(locationPath(:custom_code, simulation), baseToExecutable("project")) #! path to executable + config_str = joinpath(locationPath(:config, simulation), "config_variations", "config_variation_$(simulation.variation_id[:config]).xml") flags = ["-o", path_to_simulation_output] - if simulation.inputs.ic_cell.id != -1 + if simulation.inputs[:ic_cell].id != -1 try append!(flags, ["-i", pathToICCell(simulation)]) catch e @@ -30,26 +29,30 @@ function prepareSimulationCommand(simulation::Simulation, monad_id::Int, do_full return nothing end end - if simulation.inputs.ic_substrate.id != -1 - append!(flags, ["-s", joinpath(data_dir, "inputs", "ics", "substrates", simulation.inputs.ic_substrate.folder, "substrates.csv")]) # if ic file included (id != -1), then include this in the command + if simulation.inputs[:ic_substrate].id != -1 + append!(flags, ["-s", joinpath(locationPath(:ic_substrate, simulation), "substrates.csv")]) #! if ic file included (id != -1), then include this in the command end - if simulation.inputs.ic_ecm.id != -1 + if simulation.inputs[:ic_ecm].id != -1 try - append!(flags, ["-e", pathToICECM(simulation)]) # if ic file included (id != -1), then include this in the command + append!(flags, ["-e", pathToICECM(simulation)]) #! if ic file included (id != -1), then include this in the command catch e println("\nWARNING: Simulation $(simulation.id) failed to initialize the IC ECM file.\n\tCause: $e\n") simulationFailedToRun(simulation, monad_id) return nothing end end - if simulation.inputs.ic_dc.id != -1 - append!(flags, ["-d", joinpath(data_dir, "inputs", "ics", "dcs", simulation.inputs.ic_dc.folder, "dcs.csv")]) # if ic file included (id != -1), then include this in the command + if simulation.inputs[:ic_dc].id != -1 + append!(flags, ["-d", joinpath(locationPath(:ic_dc, simulation), "dcs.csv")]) #! if ic file included (id != -1), then include this in the command end - if simulation.variation_ids.rulesets_collection != -1 - path_to_rules_file = joinpath(data_dir, "inputs", "rulesets_collections", simulation.inputs.rulesets_collection.folder, "rulesets_collections_variations", "rulesets_variation_$(simulation.variation_ids.rulesets_collection).xml") + if simulation.variation_id[:rulesets_collection] != -1 + path_to_rules_file = joinpath(locationPath(:rulesets_collection, simulation), "rulesets_collection_variations", "rulesets_collection_variation_$(simulation.variation_id[:rulesets_collection]).xml") append!(flags, ["-r", path_to_rules_file]) end - return `$executable_str $config_str $flags` + if simulation.variation_id[:intracellular] != -1 + path_to_intracellular_file = joinpath(locationPath(:intracellular, simulation), "intracellular_variations", "intracellular_variation_$(simulation.variation_id[:intracellular]).xml") + append!(flags, ["-n", path_to_intracellular_file]) + end + return Cmd(`$executable_str $config_str $flags`; env=ENV, dir=physicell_dir) end function simulationFailedToRun(simulation::Simulation, monad_id::Int) @@ -97,16 +100,21 @@ end function prepareHPCCommand(cmd::Cmd, simulation_id::Int) path_to_simulation_folder = outputFolder("simulation", simulation_id) base_cmd_str = "sbatch" - flags = ["--wrap=$(prepCmdForWrap(cmd))", "--wait", "--output=$(joinpath(path_to_simulation_folder, "output.log"))", "--error=$(joinpath(path_to_simulation_folder, "output.err"))"] + flags = ["--wrap=$(prepCmdForWrap(Cmd(cmd.exec)))", + "--wait", + "--output=$(joinpath(path_to_simulation_folder, "output.log"))", + "--error=$(joinpath(path_to_simulation_folder, "output.err"))", + "--chdir=$(physicell_dir)" + ] for (k, v) in sbatch_options - if k in ["wrap", "output", "error", "wait"] + if k in ["wrap", "output", "error", "wait", "chdir"] println("WARNING: The key $k is reserved for pcvct to set in the sbatch command. Skipping this key.") continue end if typeof(v) <: Function v = v(simulation_id) end - # check if v has any spaces + #! check if v has any spaces if occursin(" ", v) v = "\"$v\"" end @@ -128,10 +136,10 @@ function resolveSimulation(simulation_process::SimulationProcess, prune_options: DBInterface.execute(db,"UPDATE simulations SET status_code_id=$(getStatusCodeID("Completed")) WHERE simulation_id=$(simulation.id);" ) else println("\nWARNING: Simulation $(simulation.id) failed. Please check $(path_to_err) for more information.\n") - # write the execution command to output.err + #! write the execution command to output.err lines = readlines(path_to_err) open(path_to_err, "w+") do io - # read the lines of the output.err file + #! read the lines of the output.err file println(io, "Execution command: $(p.cmd)") println(io, "\n---stderr from PhysiCell---") for line in lines @@ -152,21 +160,21 @@ function runMonad(monad::Monad; do_full_setup::Bool=true, force_recompile::Bool= if do_full_setup compilation_success = loadCustomCode(monad; force_recompile=force_recompile) if !compilation_success - return Task[] # do not delete simulations or the monad as these could have succeeded in the past (or on other nodes, etc.) + return Task[] #! do not delete simulations or the monad as these could have succeeded in the past (or on other nodes, etc.) end end - loadConfiguration(monad) - loadRulesets(monad) - loadICCells(monad) - loadICECM(monad) + + for loc in project_locations.varied + prepareVariedInputFolder(loc, monad) + end simulation_tasks = Task[] for simulation_id in monad.simulation_ids if isStarted(simulation_id; new_status_code="Queued") - continue # if the simulation has already been started (or even completed), then don't run it again + continue #! if the simulation has already been started (or even completed), then don't run it again end simulation = Simulation(simulation_id) - + push!(simulation_tasks, @task SimulationProcess(simulation; monad_id=monad.id, do_full_setup=false, force_recompile=false)) end @@ -178,13 +186,13 @@ function runSampling(sampling::Sampling; force_recompile::Bool=false) compilation_success = loadCustomCode(sampling; force_recompile=force_recompile) if !compilation_success - return Task[] # do not delete simulations, monads, or the sampling as these could have succeeded in the past (or on other nodes, etc.) + return Task[] #! do not delete simulations, monads, or the sampling as these could have succeeded in the past (or on other nodes, etc.) end simulation_tasks = [] for index in eachindex(sampling.variation_ids) - monad = Monad(sampling, index) # instantiate a monad with the variation_id and the simulation ids already found - append!(simulation_tasks, runMonad(monad, do_full_setup=false, force_recompile=false)) # run the monad and add the number of new simulations to the total + monad = Monad(sampling, index) #! instantiate a monad with the variation_id and the simulation ids already found + append!(simulation_tasks, runMonad(monad, do_full_setup=false, force_recompile=false)) #! run the monad and add the number of new simulations to the total end return simulation_tasks @@ -195,8 +203,8 @@ function runTrial(trial::Trial; force_recompile::Bool=false) simulation_tasks = [] for i in eachindex(trial.sampling_ids) - sampling = Sampling(trial, i) # instantiate a sampling with the variation_ids and the simulation ids already found - append!(simulation_tasks, runSampling(sampling; force_recompile=force_recompile)) # run the sampling and add the number of new simulations to the total + sampling = Sampling(trial, i) #! instantiate a sampling with the variation_ids and the simulation ids already found + append!(simulation_tasks, runSampling(sampling; force_recompile=force_recompile)) #! run the sampling and add the number of new simulations to the total end return simulation_tasks @@ -219,7 +227,6 @@ See also [`run`](@ref). """ function collectSimulationTasks(T::AbstractTrial; force_recompile::Bool=false, prune_options::PruneOptions=PruneOptions=PruneOptions()) end - struct PCVCTOutput trial::AbstractTrial n_scheduled::Int @@ -242,8 +249,6 @@ Also print out messages to the console to inform the user about the progress and - `prune_options::PruneOptions=PruneOptions()`: Options for pruning simulations. """ function run(T::AbstractTrial; force_recompile::Bool=false, prune_options::PruneOptions=PruneOptions()) - cd(()->run(pipeline(`make clean`; stdout=devnull)), physicell_dir) # remove all *.o files so that a future recompile will re-compile all the files - simulation_tasks = collectSimulationTasks(T; force_recompile=force_recompile) n_simulation_tasks = length(simulation_tasks) n_success = 0 @@ -254,19 +259,19 @@ function run(T::AbstractTrial; force_recompile::Bool=false, prune_options::Prune queue_channel = Channel{Task}(n_simulation_tasks) result_channel = Channel{Bool}(n_simulation_tasks) @async for simulation_task in simulation_tasks - put!(queue_channel, simulation_task) # if the queue_channel is full, this will block until there is space + put!(queue_channel, simulation_task) #! if the queue_channel is full, this will block until there is space end - for _ in 1:num_parallel_sims # start one task per allowed num of parallel sims - @async for simulation_task in queue_channel # do not let the creation of this task block the creation of the other tasks - # once the simulation_task is processed, put it in the result_channel and move on to the next simulation_task in the queue_channel + for _ in 1:num_parallel_sims #! start one task per allowed num of parallel sims + @async for simulation_task in queue_channel #! do not let the creation of this task block the creation of the other tasks + #! once the simulation_task is processed, put it in the result_channel and move on to the next simulation_task in the queue_channel put!(result_channel, processSimulationTask(simulation_task, prune_options)) end end - # this code block effectively blocks the main thread until all the simulation_tasks have been processed - for _ in 1:n_simulation_tasks # take exactly the number of expected outputs - success = take!(result_channel) # wait until the result_channel has a value to take + #! this code block effectively blocks the main thread until all the simulation_tasks have been processed + for _ in 1:n_simulation_tasks #! take exactly the number of expected outputs + success = take!(result_channel) #! wait until the result_channel has a value to take n_success += success end @@ -289,7 +294,7 @@ function run(T::AbstractTrial; force_recompile::Bool=false, prune_options::Prune if print_low_success_warning println(" ($(repeat("*", n_asterisks)))") asterisks["low_success_warning"] = n_asterisks - n_asterisks += 1 # in case something gets added later + n_asterisks += 1 #! in case something gets added later else println() end @@ -305,7 +310,7 @@ end """ runAbstractTrial(T::AbstractTrial; force_recompile::Bool=false, prune_options::PruneOptions=PruneOptions()) - + Alias for [`run`](@ref), but only with this particular signature. Does not work on `Cmd` objects as `Base.run` is built for. Also, does not work with `run`ning sensitivity samplings. """ diff --git a/src/VCTSensitivity.jl b/src/VCTSensitivity.jl index ffd509c9..e1cfd437 100644 --- a/src/VCTSensitivity.jl +++ b/src/VCTSensitivity.jl @@ -1,5 +1,5 @@ using Distributions, DataFrames, CSV, Sobol, FFTW -import GlobalSensitivity # do not bring in their definition of Sobol as it conflicts with the Sobol module +import GlobalSensitivity #! do not bring in their definition of Sobol as it conflicts with the Sobol module export MOAT, Sobolʼ, RBD @@ -30,7 +30,7 @@ getSimulationIDs(gsa_sampling::GSASampling) = getSimulationIDs(gsa_sampling.samp function methodString(gsa_sampling::GSASampling) method = typeof(gsa_sampling) |> string |> lowercase - method = split(method, ".")[end] # remove module name that comes with the type, e.g. main.vctmodule.moatsampling -> moatsampling + method = split(method, ".")[end] #! remove module name that comes with the type, e.g. main.vctmodule.moatsampling -> moatsampling return endswith(method, "sampling") ? method[1:end-8] : method end @@ -49,11 +49,9 @@ Alternatively, the third argument, `inputs`, can be replaced with a `reference:: This should be preferred to setting reference variation IDs manually, i.e., if not using the base files in the input folders. # Keyword Arguments -The three `reference_` keyword arguments are only compatible when the third argument is of type `InputFolders`. -- `reference_config_variation_id::Int=0`: the reference config variation ID -- `reference_rulesets_variation_id::Int=0`: the reference rulesets variation ID -- `reference_ic_cell_variation_id::Int=0`: the reference IC cell variation ID -- `reference_ic_ecm_variation_id::Int=0`: the reference IC ECM variation ID +The `reference_variation_id` keyword argument is only compatible when the third argument is of type `InputFolders`. +Otherwise, the `reference` simulation/monad will set the reference variation values. +- `reference_variation_id::VariationID`: the reference variation IDs as a `VariationID` - `ignore_indices::Vector{Int}=[]`: indices into `avs` to ignore when perturbing the parameters. Only used for Sobolʼ. See [`Sobolʼ`](@ref) for a use case. - `force_recompile::Bool=false`: whether to force recompilation of the simulation code - `prune_options::PruneOptions=PruneOptions()`: the options for pruning the simulation results @@ -71,12 +69,7 @@ function run(method::GSAMethod, n_replicates::Integer, inputs::InputFolders, avs end function run(method::GSAMethod, n_replicates::Integer, reference::AbstractMonad, avs::Union{AbstractVariation,Vector{<:AbstractVariation}}; functions::Vector{<:Function}=Function[], kwargs...) - return run(method, n_replicates, reference.inputs, avs; - reference_config_variation_id=reference.variation_ids.config, - reference_rulesets_variation_id=reference.variation_ids.rulesets_collection, - reference_ic_cell_variation_id=reference.variation_ids.ic_cell, - reference_ic_ecm_variation_id=reference.variation_ids.ic_ecm, - functions=functions, kwargs...) + return run(method, n_replicates, reference.inputs, avs; reference_variation_id=reference.variation_id, functions, kwargs...) end function sensitivityResults!(gsa_sampling::GSASampling, functions::Vector{<:Function}) @@ -127,7 +120,7 @@ struct MOAT <: GSAMethod lhs_variation::LHSVariation end -MOAT() = MOAT(LHSVariation(15)) # default to 15 points +MOAT() = MOAT(LHSVariation(15)) #! default to 15 points MOAT(n::Int; kwargs...) = MOAT(LHSVariation(n; kwargs...)) """ @@ -148,34 +141,33 @@ end MOATSampling(sampling::Sampling, monad_ids_df::DataFrame) = MOATSampling(sampling, monad_ids_df, Dict{Function, GlobalSensitivity.MorrisResult}()) -function _runSensitivitySampling(method::MOAT, n_replicates::Int, inputs::InputFolders, pv::ParsedVariations; - reference_config_variation_id::Int=0, reference_rulesets_variation_id::Int=0, - reference_ic_cell_variation_id::Int=inputs.ic_cell.folder=="" ? -1 : 0, - reference_ic_ecm_variation_id::Int=inputs.ic_ecm.folder=="" ? -1 : 0, +function _runSensitivitySampling(method::MOAT, n_replicates::Int, inputs::InputFolders, pv::ParsedVariations; reference_variation_id::VariationID=VariationID(inputs), ignore_indices::Vector{Int}=Int[], force_recompile::Bool=false, prune_options::PruneOptions=PruneOptions(), use_previous::Bool=true) if !isempty(ignore_indices) error("MOAT does not support ignoring indices...yet? Only Sobolʼ does for now.") end - reference_variation_ids = VariationIDs(reference_config_variation_id, reference_rulesets_variation_id, reference_ic_cell_variation_id, reference_ic_ecm_variation_id) - config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids = addVariations(method.lhs_variation, inputs, pv, reference_variation_ids) - perturbed_config_variation_ids = repeat(config_variation_ids, 1, length(pv.sz)) - perturbed_rulesets_variation_ids = repeat(rulesets_collection_variation_ids, 1, length(pv.sz)) - perturbed_ic_cell_variation_ids = repeat(ic_cell_variation_ids, 1, length(pv.sz)) - perturbed_ic_ecm_variation_ids = repeat(ic_ecm_variation_ids, 1, length(pv.sz)) - for (base_point_ind, (config_variation_id, rulesets_collection_variation_id, ic_cell_variation_id, ic_ecm_variation_id)) in enumerate(zip(config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids)) # for each base point in the LHS - for d in eachindex(pv.sz) # perturb each feature one time - perturbed_config_variation_ids[base_point_ind, d] = perturbVariation(pv, config_variation_id, inputs.config.folder, d, :config) - perturbed_rulesets_variation_ids[base_point_ind, d] = perturbVariation(pv, rulesets_collection_variation_id, inputs.rulesets_collection.folder, d, :rulesets_collection) - perturbed_ic_cell_variation_ids[base_point_ind, d] = perturbVariation(pv, ic_cell_variation_id, inputs.ic_cell.folder, d, :ic_cell) - perturbed_ic_ecm_variation_ids[base_point_ind, d] = perturbVariation(pv, ic_ecm_variation_id, inputs.ic_ecm.folder, d, :ic_ecm) + add_variations_result = addVariations(method.lhs_variation, inputs, pv, reference_variation_id) + variation_ids = add_variations_result.all_variation_ids + base_variation_ids = Dict{Symbol, Vector{Int}}() + perturbed_variation_ids = Dict{Symbol, Matrix{Int}}() + for location in project_locations.varied + base_variation_ids[location] = [variation_id[location] for variation_id in variation_ids] + perturbed_variation_ids[location] = repeat(base_variation_ids[location], 1, length(pv.sz)) + end + for (base_point_ind, variation_id) in enumerate(variation_ids) #! for each base point in the LHS + for d in eachindex(pv.sz) #! perturb each feature one time + for location in project_locations.varied + perturbed_variation_ids[location][base_point_ind, d] = perturbVariation(location, pv, inputs[location].folder, variation_id[location], d) + end end end - all_config_variation_ids = hcat(config_variation_ids, perturbed_config_variation_ids) - all_rulesets_variation_ids = hcat(rulesets_collection_variation_ids, perturbed_rulesets_variation_ids) - all_ic_cell_variation_ids = hcat(ic_cell_variation_ids, perturbed_ic_cell_variation_ids) - all_ic_ecm_variation_ids = hcat(ic_ecm_variation_ids, perturbed_ic_ecm_variation_ids) - monad_dict, monad_ids = variationsToMonads(inputs, all_config_variation_ids, all_rulesets_variation_ids, all_ic_cell_variation_ids, all_ic_ecm_variation_ids, use_previous) + all_variation_ids = Dict{Symbol, Matrix{Int}}() + for location in project_locations.varied + all_variation_ids[location] = hcat(base_variation_ids[location], perturbed_variation_ids[location]) + end + location_variation_dict = (loc => all_variation_ids[loc] for loc in project_locations.varied) |> Dict + monad_dict, monad_ids = variationsToMonads(inputs, location_variation_dict, use_previous) header_line = ["base"; columnName.(pv.variations)] monad_ids_df = DataFrame(monad_ids, header_line) sampling = Sampling(n_replicates, monad_dict |> values |> collect) @@ -183,75 +175,32 @@ function _runSensitivitySampling(method::MOAT, n_replicates::Int, inputs::InputF return MOATSampling(sampling, monad_ids_df) end -function perturbVariation(pv::ParsedVariations, reference_variation_id::Int, folder::String, d::Int, location::Symbol) - matching_dims = getfield(pv, Symbol("$(location)_variation_indices")) .== d - variations = getfield(pv, Symbol("$(location)_variations"))[matching_dims] # all the variations associated with the dth feature - if isempty(variations) +function perturbVariation(location::Symbol, pv::ParsedVariations, folder::String, reference_variation_id::Int, d::Int) + matching_dims = pv[location].indices .== d + evs = pv[location].variations[matching_dims] #! all the variations associated with the dth feature + if isempty(evs) return reference_variation_id end - if location == :config - baseValFn = configValue - fns = prepareConfigVariationFunctions(retrieveID("configs", folder), variations; reference_config_variation_id=reference_variation_id) - elseif location == :rulesets_collection - baseValFn = rulesetsValue - fns = prepareRulesetsVariationFunctions(retrieveID("rulesets_collections", folder); reference_rulesets_variation_id=reference_variation_id) - elseif location == :ic_cell - baseValFn = icCellBaseValue - fns = prepareICCellVariationFunctions(retrieveID("ic_cells", folder); reference_ic_cell_variation_id=reference_variation_id) - elseif location == :ic_ecm - baseValFn = icECMBaseValue - fns = prepareICECMVariationFunctions(retrieveID("ic_ecms", folder); reference_ic_ecm_variation_id=reference_variation_id) - else - error("Unknown location: $location") - end - base_values = baseValFn.(variations, reference_variation_id, folder) - addFn = (evs) -> gridToDB(evs, fns...) - return makePerturbation(variations, base_values, addFn) -end + base_values = variationValue.(evs, reference_variation_id, folder) -function makePerturbation(evs::Vector{<:ElementaryVariation}, base_values::Vector{<:Real}, addFn::Function) cdfs_at_base = [cdf(ev, bv) for (ev, bv) in zip(evs, base_values)] @assert maximum(cdfs_at_base) - minimum(cdfs_at_base) < 1e-10 "All base values must have the same CDF (within tolerance).\nInstead, got $cdfs_at_base." dcdf = cdfs_at_base[1] < 0.5 ? 0.5 : -0.5 - new_values = _values.(evs, cdfs_at_base[1] + dcdf) # note, this is a vector of values + new_values = _values.(evs, cdfs_at_base[1] + dcdf) #! note, this is a vector of values discrete_variations = [DiscreteVariation(target(ev), new_value) for (ev, new_value) in zip(evs, new_values)] - new_variation_id = addFn(discrete_variations) + new_variation_id = gridToDB(discrete_variations, retrieveID(location, folder), reference_variation_id) @assert length(new_variation_id) == 1 "Only doing one perturbation at a time." return new_variation_id[1] end -configValue(ev::ElementaryVariation, args...) = configValue(columnName(ev), args...) - -function configValue(column_name::String, config_variation_id::Int, folder::String) - query = constructSelectQuery("config_variations", "WHERE config_variation_id=$config_variation_id;"; selection="\"$(column_name)\"") - variation_value_df = queryToDataFrame(query; db=configDB(folder), is_row=true) +function variationValue(ev::ElementaryVariation, variation_id::Int, folder::String) + loc = location(ev) + query = constructSelectQuery("$(loc)_variations", "WHERE $(locationVarIDName(loc))=$variation_id"; selection="\"$(columnName(ev))\"") + variation_value_df = queryToDataFrame(query; db=variationsDatabase(loc, folder), is_row=true) return variation_value_df[1,1] -end - -rulesetsValue(ev::ElementaryVariation, args...) = rulesetsValue(columnName(ev), args...) -function rulesetsValue(column_name::String, rulesets_collection_variation_id::Int, folder::String) - query = constructSelectQuery("rulesets_collection_variations", "WHERE rulesets_collection_variation_id=$rulesets_collection_variation_id;"; selection="\"$(column_name)\"") - variation_value_df = queryToDataFrame(query; db=rulesetsCollectionDB(folder), is_row=true) - return variation_value_df[1,1] -end - -icCellBaseValue(ev::ElementaryVariation, args...) = icCellBaseValue(columnName(ev), args...) - -function icCellBaseValue(column_name::String, ic_cell_variation_id::Int, folder::String) - query = constructSelectQuery("ic_cell_variations", "WHERE ic_cell_variation_id=$ic_cell_variation_id;"; selection="\"$(column_name)\"") - variation_value_df = queryToDataFrame(query; db=icCellDB(folder), is_row=true) - return variation_value_df[1,1] -end - -icECMBaseValue(ev::ElementaryVariation, args...) = icECMBaseValue(columnName(ev), args...) - -function icECMBaseValue(column_name::String, ic_ecm_variation_id::Int, folder::String) - query = constructSelectQuery("ic_ecm_variations", "WHERE ic_ecm_variation_id=$ic_ecm_variation_id;"; selection="\"$(column_name)\"") - variation_value_df = queryToDataFrame(query; db=icECMDB(folder), is_row=true) - return variation_value_df[1,1] end function calculateGSA!(moat_sampling::MOATSampling, f::Function) @@ -259,7 +208,7 @@ function calculateGSA!(moat_sampling::MOATSampling, f::Function) return end values = evaluateFunctionOnSampling(moat_sampling, f) - effects = 2 * (values[:,2:end] .- values[:,1]) # all diffs in the design matrix are 0.5 + effects = 2 * (values[:,2:end] .- values[:,1]) #! all diffs in the design matrix are 0.5 means = mean(effects, dims=1) means_star = mean(abs.(effects), dims=1) variances = var(effects, dims=1) @@ -294,7 +243,7 @@ Sobolʼ(15; skip_start=false) # force the Sobol' sequence to start at the beginn Sobolʼ(15; include_one=true) # force the Sobol' sequence to include 1 in the sequence ``` """ -struct Sobolʼ <: GSAMethod # the prime symbol is used to avoid conflict with the Sobol module +struct Sobolʼ <: GSAMethod #! the prime symbol is used to avoid conflict with the Sobol module sobol_variation::SobolVariation sobol_index_methods::NamedTuple{(:first_order, :total_order), Tuple{Symbol, Symbol}} end @@ -322,60 +271,30 @@ end SobolSampling(sampling::Sampling, monad_ids_df::DataFrame; sobol_index_methods::NamedTuple{(:first_order, :total_order), Tuple{Symbol, Symbol}}=(first_order=:Jansen1999, total_order=:Jansen1999)) = SobolSampling(sampling, monad_ids_df, Dict{Function, GlobalSensitivity.SobolResult}(), sobol_index_methods) -function _runSensitivitySampling(method::Sobolʼ, n_replicates::Int, inputs::InputFolders, pv::ParsedVariations; - reference_config_variation_id::Int=0, reference_rulesets_variation_id::Int=0, - reference_ic_cell_variation_id::Int=inputs.ic_cell.folder=="" ? -1 : 0, - reference_ic_ecm_variation_id::Int=inputs.ic_ecm.folder=="" ? -1 : 0, +function _runSensitivitySampling(method::Sobolʼ, n_replicates::Int, inputs::InputFolders, pv::ParsedVariations; reference_variation_id::VariationID=VariationID(inputs), ignore_indices::Vector{Int}=Int[], force_recompile::Bool=false, prune_options::PruneOptions=PruneOptions(), use_previous::Bool=true) - config_id = retrieveID("configs", inputs.config.folder) - rulesets_collection_id = retrieveID("rulesets_collections", inputs.rulesets_collection.folder) - ic_cell_id = retrieveID("ic_cells", inputs.ic_cell.folder) - ic_ecm_id = retrieveID("ic_ecms", inputs.ic_ecm.folder) - reference_variation_ids = VariationIDs(reference_config_variation_id, reference_rulesets_variation_id, reference_ic_cell_variation_id, reference_ic_ecm_variation_id) - config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids, cdfs = - addVariations(method.sobol_variation, inputs, pv, reference_variation_ids) + add_variations_result = addVariations(method.sobol_variation, inputs, pv, reference_variation_id) + all_variation_ids = add_variations_result.all_variation_ids + cdfs = add_variations_result.cdfs d = length(pv.sz) focus_indices = [i for i in 1:d if !(i in ignore_indices)] - config_variation_ids_A = config_variation_ids[:,1] - rulesets_variation_ids_A = rulesets_collection_variation_ids[:,1] - ic_cell_variation_ids_A = ic_cell_variation_ids[:,1] - ic_ecm_variation_ids_A = ic_ecm_variation_ids[:,1] - A = cdfs[:,1,:] # cdfs is of size (d, 2, n) - config_variation_ids_B = config_variation_ids[:,2] - rulesets_variation_ids_B = rulesets_collection_variation_ids[:,2] - ic_cell_variation_ids_B = ic_cell_variation_ids[:,2] - ic_ecm_variation_ids_B = ic_ecm_variation_ids[:,2] + location_variation_ids_A = [loc => [variation_id[loc] for variation_id in all_variation_ids[:,1]] for loc in project_locations.varied] |> Dict + A = cdfs[:,1,:] #! cdfs is of size (d, 2, n) + location_variation_ids_B = [loc => [variation_id[loc] for variation_id in all_variation_ids[:,2]] for loc in project_locations.varied] |> Dict B = cdfs[:,2,:] Aᵦ = [i => copy(A) for i in focus_indices] |> Dict - config_variation_ids_Aᵦ = [i => copy(config_variation_ids_A) for i in focus_indices] |> Dict - rulesets_variation_ids_Aᵦ = [i => copy(rulesets_variation_ids_A) for i in focus_indices] |> Dict - ic_cell_variation_ids_Aᵦ = [i => copy(ic_cell_variation_ids_A) for i in focus_indices] |> Dict - ic_ecm_variation_ids_Aᵦ = [i => copy(ic_ecm_variation_ids_A) for i in focus_indices] |> Dict + location_variation_ids_Aᵦ = [loc => [i => copy(location_variation_ids_A[loc]) for i in focus_indices] |> Dict for loc in project_locations.varied] |> Dict for i in focus_indices Aᵦ[i][i,:] .= B[i,:] - if i in pv.config_variation_indices - fns = prepareConfigVariationFunctions(config_id, pv.config_variations; reference_config_variation_id=reference_config_variation_id) - config_variation_ids_Aᵦ[i][:] .= cdfsToVariations(Aᵦ[i]', pv.config_variations, fns..., pv.config_variation_indices) - end - if i in pv.rulesets_collection_variation_indices - fns = prepareRulesetsVariationFunctions(rulesets_collection_id; reference_rulesets_variation_id=reference_rulesets_variation_id) - rulesets_variation_ids_Aᵦ[i][:] .= cdfsToVariations(Aᵦ[i]', pv.rulesets_collection_variations, fns..., pv.rulesets_collection_variation_indices) - end - if i in pv.ic_cell_variation_indices - fns = prepareICCellVariationFunctions(ic_cell_id; reference_ic_cell_variation_id=reference_ic_cell_variation_id) - ic_cell_variation_ids_Aᵦ[i][:] .= cdfsToVariations(Aᵦ[i]', pv.ic_cell_variations, fns..., pv.ic_cell_variation_indices) - end - if i in pv.ic_ecm_variation_indices - fns = prepareICECMVariationFunctions(ic_ecm_id; reference_ic_ecm_variation_id=reference_ic_ecm_variation_id) - ic_ecm_variation_ids_Aᵦ[i][:] .= cdfsToVariations(Aᵦ[i]', pv.ic_ecm_variations, fns..., pv.ic_ecm_variation_indices) + for loc in project_locations.varied + if i in pv[loc].indices + location_variation_ids_Aᵦ[loc][i][:] .= cdfsToVariations(loc, pv, inputs[loc].id, reference_variation_id[loc], Aᵦ[i]') + end end end - all_config_variation_ids = hcat(config_variation_ids_A, config_variation_ids_B, [config_variation_ids_Aᵦ[i] for i in focus_indices]...) # make sure to the values from the dict in the expected order - all_rulesets_variation_ids = hcat(rulesets_variation_ids_A, rulesets_variation_ids_B, [rulesets_variation_ids_Aᵦ[i] for i in focus_indices]...) - all_ic_cell_variation_ids = hcat(ic_cell_variation_ids_A, ic_cell_variation_ids_B, [ic_cell_variation_ids_Aᵦ[i] for i in focus_indices]...) - all_ic_ecm_variation_ids = hcat(ic_ecm_variation_ids_A, ic_ecm_variation_ids_B, [ic_ecm_variation_ids_Aᵦ[i] for i in focus_indices]...) - monad_dict, monad_ids = variationsToMonads(inputs, all_config_variation_ids, all_rulesets_variation_ids, all_ic_cell_variation_ids, all_ic_ecm_variation_ids, use_previous) + location_variation_ids_dict = [loc => hcat(location_variation_ids_A[loc], location_variation_ids_B[loc], [location_variation_ids_Aᵦ[loc][i] for i in focus_indices]...) for loc in project_locations.varied] |> Dict{Symbol,Matrix{Int}} + monad_dict, monad_ids = variationsToMonads(inputs, location_variation_ids_dict, use_previous) monads = monad_dict |> values |> collect header_line = ["A"; "B"; columnName.(pv.variations[focus_indices])] monad_ids_df = DataFrame(monad_ids, header_line) @@ -393,36 +312,36 @@ function calculateGSA!(sobol_sampling::SobolSampling, f::Function) A_values = @view values[:, 1] B_values = @view values[:, 2] Aᵦ_values = [values[:, 2+i] for i in 1:d] - expected_value² = mean(A_values .* B_values) # see Saltelli, 2002 Eq 21 + expected_value² = mean(A_values .* B_values) #! see Saltelli, 2002 Eq 21 total_variance = var([A_values; B_values]) first_order_variances = zeros(Float64, d) total_order_variances = zeros(Float64, d) si_method = sobol_sampling.sobol_index_methods.first_order st_method = sobol_sampling.sobol_index_methods.total_order for (i, Aᵦ) in enumerate(Aᵦ_values) - # I found Jansen, 1999 to do best for first order variances on a simple test of f(x,y) = x.^2 + y.^2 + c with a uniform distribution on [0,1] x [0,1] including with noise added + #! I found Jansen, 1999 to do best for first order variances on a simple test of f(x,y) = x.^2 + y.^2 + c with a uniform distribution on [0,1] x [0,1] including with noise added if si_method == :Sobol1993 - first_order_variances[i] = mean(B_values .* Aᵦ) .- expected_value² # Sobol, 1993 + first_order_variances[i] = mean(B_values .* Aᵦ) .- expected_value² #! Sobol, 1993 elseif si_method == :Jansen1999 - first_order_variances[i] = total_variance - 0.5 * mean((B_values .- Aᵦ) .^ 2) # Jansen, 1999 + first_order_variances[i] = total_variance - 0.5 * mean((B_values .- Aᵦ) .^ 2) #! Jansen, 1999 elseif si_method == :Saltelli2010 - first_order_variances[i] = mean(B_values .* (Aᵦ .- A_values)) # Saltelli, 2010 + first_order_variances[i] = mean(B_values .* (Aᵦ .- A_values)) #! Saltelli, 2010 end - # I found Jansen, 1999 to do best for total order variances on a simple test of f(x,y) = x.^2 + y.^2 + c with a uniform distribution on [0,1] x [0,1] including with noise added + #! I found Jansen, 1999 to do best for total order variances on a simple test of f(x,y) = x.^2 + y.^2 + c with a uniform distribution on [0,1] x [0,1] including with noise added if st_method == :Homma1996 - total_order_variances[i] = total_variance - mean(A_values .* Aᵦ) + expected_value² # Homma, 1996 + total_order_variances[i] = total_variance - mean(A_values .* Aᵦ) + expected_value² #! Homma, 1996 elseif st_method == :Jansen1999 - total_order_variances[i] = 0.5 * mean((Aᵦ .- A_values) .^ 2) # Jansen, 1999 + total_order_variances[i] = 0.5 * mean((Aᵦ .- A_values) .^ 2) #! Jansen, 1999 elseif st_method == :Sobol2007 - total_order_variances[i] = mean(A_values .* (A_values .- Aᵦ)) # Sobol, 2007 + total_order_variances[i] = mean(A_values .* (A_values .- Aᵦ)) #! Sobol, 2007 end end first_order_indices = first_order_variances ./ total_variance total_order_indices = total_order_variances ./ total_variance - sobol_sampling.results[f] = GlobalSensitivity.SobolResult(first_order_indices, nothing, nothing, nothing, total_order_indices, nothing) # do not yet support (S1 CIs, second order indices (S2), S2 CIs, or ST CIs) + sobol_sampling.results[f] = GlobalSensitivity.SobolResult(first_order_indices, nothing, nothing, nothing, total_order_indices, nothing) #! do not yet support (S1 CIs, second order indices (S2), S2 CIs, or ST CIs) return end @@ -451,7 +370,7 @@ RBD(15; num_harmonics=10) # use 10 harmonics RBD(15; use_sobol=false) # opt out of using the Sobol' sequence, instead using a random sequence in each dimension ``` """ -struct RBD <: GSAMethod # the prime symbol is used to avoid conflict with the Sobol module +struct RBD <: GSAMethod #! the prime symbol is used to avoid conflict with the Sobol module rbd_variation::RBDVariation num_harmonics::Int end @@ -480,17 +399,14 @@ end RBDSampling(sampling::Sampling, monad_ids_df::DataFrame, num_cycles; num_harmonics::Int=6) = RBDSampling(sampling, monad_ids_df, Dict{Function, GlobalSensitivity.SobolResult}(), num_harmonics, num_cycles) -function _runSensitivitySampling(method::RBD, n_replicates::Int, inputs::InputFolders, pv::ParsedVariations; - reference_config_variation_id::Int=0, reference_rulesets_variation_id::Int=0, - reference_ic_cell_variation_id::Int=inputs.ic_cell.folder=="" ? -1 : 0, - reference_ic_ecm_variation_id::Int=inputs.ic_ecm.folder=="" ? -1 : 0, +function _runSensitivitySampling(method::RBD, n_replicates::Int, inputs::InputFolders, pv::ParsedVariations; reference_variation_id::VariationID=VariationID(inputs), ignore_indices::Vector{Int}=Int[], force_recompile::Bool=false, prune_options::PruneOptions=PruneOptions(), use_previous::Bool=true) if !isempty(ignore_indices) error("RBD does not support ignoring indices...yet? Only Sobolʼ does for now.") end - reference_variation_ids = VariationIDs(reference_config_variation_id, reference_rulesets_variation_id, reference_ic_cell_variation_id, reference_ic_ecm_variation_id) - config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids, config_variations_matrix, rulesets_variations_matrix, ic_cell_variations_matrix, ic_ecm_variations_matrix = addVariations(method.rbd_variation, inputs, pv, reference_variation_ids) - monad_dict, monad_ids = variationsToMonads(inputs, config_variations_matrix, rulesets_variations_matrix, ic_cell_variations_matrix, ic_ecm_variations_matrix, use_previous) + add_variations_result = addVariations(method.rbd_variation, inputs, pv, reference_variation_id) + location_variation_ids_dict = add_variations_result.location_variation_ids_dict + monad_dict, monad_ids = variationsToMonads(inputs, location_variation_ids_dict, use_previous) monads = monad_dict |> values |> collect header_line = columnName.(pv.variations) monad_ids_df = DataFrame(monad_ids, header_line) @@ -528,7 +444,7 @@ function evaluateFunctionOnSampling(gsa_sampling::GSASampling, f::Function) value_dict = Dict{Int, Float64}() values = zeros(Float64, size(monad_id_df)) for (ind, monad_id) in enumerate(monad_id_df |> Matrix) - if !(monad_id in keys(value_dict)) + if !haskey(value_dict, monad_id) simulation_ids = readMonadSimulationIDs(monad_id) sim_values = [f(simulation_id) for simulation_id in simulation_ids] value = sim_values |> mean @@ -540,29 +456,29 @@ function evaluateFunctionOnSampling(gsa_sampling::GSASampling, f::Function) end """ - variationsToMonads(inputs::InputFolders, all_config_variation_ids::Matrix{Int}, all_rulesets_variation_ids::Matrix{Int}, all_ic_cell_variation_ids::Matrix{Int}, all_ic_ecm_variation_ids::Matrix{Int}, use_previous::Bool) + variationsToMonads(inputs::InputFolders, variation_ids::Dict{Symbol,Matrix{Int}}, use_previous::Bool) Return a dictionary of monads and a matrix of monad IDs based on the given variation IDs. -The four matrix inputs together define a single matrix of variation IDs. +The five matrix inputs together define a single matrix of variation IDs. This information, together with the `inputs`, identifies the monads to be used. The `use_previous` flag determines whether to use previous simulations, if they exist. # Returns -- `monad_dict::Dict{VariationIDs, Monad}`: a dictionary of the monads to be used without duplicates. +- `monad_dict::Dict{VariationID, Monad}`: a dictionary of the monads to be used without duplicates. - `monad_ids::Matrix{Int}`: a matrix of the monad IDs to be used. Matches the shape of the input IDs matrices. """ -function variationsToMonads(inputs::InputFolders, all_config_variation_ids::Matrix{Int}, all_rulesets_variation_ids::Matrix{Int}, all_ic_cell_variation_ids::Matrix{Int}, all_ic_ecm_variation_ids::Matrix{Int}, use_previous::Bool) - monad_dict = Dict{VariationIDs, Monad}() - monad_ids = zeros(Int, size(all_config_variation_ids)) - for (i, (config_variation_id, rulesets_collection_variation_id, ic_cell_variation_id, ic_ecm_variation_id)) in enumerate(zip(all_config_variation_ids, all_rulesets_variation_ids, all_ic_cell_variation_ids, all_ic_ecm_variation_ids)) - variation_ids = VariationIDs(config_variation_id, rulesets_collection_variation_id, ic_cell_variation_id, ic_ecm_variation_id) - if variation_ids in keys(monad_dict) - monad_ids[i] = monad_dict[variation_ids].id +function variationsToMonads(inputs::InputFolders, location_variation_ids_dict::Dict{Symbol,Matrix{Int}}, use_previous::Bool) + monad_dict = Dict{VariationID, Monad}() + monad_ids = zeros(Int, size(location_variation_ids_dict |> values |> first)) + for i in eachindex(monad_ids) + monad_variation_id = [loc => location_variation_ids_dict[loc][i] for loc in project_locations.varied] |> VariationID + if haskey(monad_dict, monad_variation_id) + monad_ids[i] = monad_dict[monad_variation_id].id continue end - monad = Monad(inputs, variation_ids; use_previous=use_previous) - monad_dict[variation_ids] = monad + monad = Monad(inputs, monad_variation_id; use_previous=use_previous) + monad_dict[monad_variation_id] = monad monad_ids[i] = monad.id end return monad_dict, monad_ids diff --git a/src/VCTUp.jl b/src/VCTUp.jl index dc5b936c..3e389c7a 100644 --- a/src/VCTUp.jl +++ b/src/VCTUp.jl @@ -1,7 +1,7 @@ function upgradePCVCT(from_version::VersionNumber, to_version::VersionNumber, auto_upgrade::Bool) println("Upgrading pcvct from version $(from_version) to $(to_version)...") milestone_versions = [v"0.0.1", v"0.0.3", v"0.0.10", v"0.0.11", v"0.0.13", v"0.0.15"] - next_milestone_inds = findall(x -> from_version < x, milestone_versions) # this could be simplified to take advantage of this list being sorted, but who cares? It's already so fast + next_milestone_inds = findall(x -> from_version < x, milestone_versions) #! this could be simplified to take advantage of this list being sorted, but who cares? It's already so fast next_milestones = milestone_versions[next_milestone_inds] success = true for next_milestone in next_milestones @@ -26,7 +26,7 @@ end function populateTableOnFeatureSubset(db::SQLite.DB, source_table::String, target_table::String; column_mapping::Dict{String, String}=Dict{String,String}()) source_columns = queryToDataFrame("PRAGMA table_info($(source_table));") |> x -> x[!, :name] - target_columns = [c in keys(column_mapping) ? column_mapping[c] : c for c in source_columns] + target_columns = [haskey(column_mapping, c) ? column_mapping[c] : c for c in source_columns] insert_into_cols = "(" * join(target_columns, ",") * ")" select_cols = join(source_columns, ",") query = "INSERT INTO $(target_table) $(insert_into_cols) SELECT $(select_cols) FROM $(source_table);" @@ -37,10 +37,10 @@ function upgradeToV0_0_1(::Bool) println("\t- Upgrading to version 0.0.1...") data_dir_contents = readdir(joinpath(data_dir, "inputs"); sort=false) if "rulesets_collections" in data_dir_contents - rulesets_collections_folders = readdir(joinpath(data_dir, "inputs", "rulesets_collections"); sort=false) |> filter(x -> isdir(joinpath(data_dir, "inputs", "rulesets_collections", x))) - for rulesets_collection_folder in rulesets_collections_folders - path_to_rulesets_collections_folder = joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder) - path_to_rulesets_variations_db = joinpath(path_to_rulesets_collections_folder, "rulesets_variations.db") + rulesets_collection_folders = readdir(locationPath(:rulesets_collection); sort=false) |> filter(x -> isdir(locationPath(:rulesets_collection, x))) + for rulesets_collection_folder in rulesets_collection_folders + path_to_rulesets_collection_folder = locationPath(:rulesets_collection, rulesets_collection_folder) + path_to_rulesets_variations_db = joinpath(path_to_rulesets_collection_folder, "rulesets_variations.db") if !isfile(joinpath(path_to_rulesets_variations_db)) continue end @@ -51,14 +51,14 @@ function upgradeToV0_0_1(::Bool) end column_names = queryToDataFrame("PRAGMA table_info(rulesets_variations);"; db=db_rulesets_variations) |> x -> x[!, :name] filter!(x -> x != "rulesets_collection_variation_id", column_names) - path_to_xml = joinpath(path_to_rulesets_collections_folder, "base_rulesets.xml") + path_to_xml = joinpath(path_to_rulesets_collection_folder, "base_rulesets.xml") if !isfile(path_to_xml) - writeRules(path_to_xml, joinpath(path_to_rulesets_collections_folder, "base_rulesets.csv")) + writeRules(path_to_xml, joinpath(path_to_rulesets_collection_folder, "base_rulesets.csv")) end xml_doc = openXML(path_to_xml) for column_name in column_names xml_path = columnNameToXMLPath(column_name) - base_value = getField(xml_doc, xml_path) + base_value = getContent(xml_doc, xml_path) query = "UPDATE rulesets_variations SET '$(column_name)'=$(base_value) WHERE rulesets_collection_variation_id=0;" DBInterface.execute(db_rulesets_variations, query) end @@ -85,7 +85,7 @@ function upgradeToV0_0_3(auto_upgrade::Bool) return false end println("\t- Upgrading to version 0.0.3...") - # first get vct.db right changing simulations and monads tables + #! first get vct.db right changing simulations and monads tables if DBInterface.execute(db, "SELECT 1 FROM pragma_table_info('simulations') WHERE name='config_variation_id';") |> DataFrame |> isempty DBInterface.execute(db, "ALTER TABLE simulations RENAME COLUMN variation_id TO config_variation_id;") end @@ -102,27 +102,27 @@ function upgradeToV0_0_3(auto_upgrade::Bool) DBInterface.execute(db, "UPDATE monads_temp SET ic_cell_variation_id=CASE WHEN ic_cell_id=-1 THEN -1 ELSE 0 END;") DBInterface.execute(db, "DROP TABLE monads;") createPCVCTTable("monads", monadsSchema()) - # drop the previous unique constraint on monads - # insert from monads_temp all values except ic_cell_variation_id (set that to -1 if ic_cell_id is -1 and to 0 if ic_cell_id is not -1) + #! drop the previous unique constraint on monads + #! insert from monads_temp all values except ic_cell_variation_id (set that to -1 if ic_cell_id is -1 and to 0 if ic_cell_id is not -1) populateTableOnFeatureSubset(db, "monads_temp", "monads") DBInterface.execute(db, "DROP TABLE monads_temp;") end - # now get the config_variations.db's right + #! now get the config_variations.db's right config_folders = queryToDataFrame(constructSelectQuery("configs"; selection="folder_name")) |> x -> x.folder_name for config_folder in config_folders - path_to_config_folder = joinpath(data_dir, "inputs", "configs", config_folder) + path_to_config_folder = locationPath(:config, config_folder) if !isfile(joinpath(path_to_config_folder, "variations.db")) continue end - # rename all "variation" to "config_variation" in filenames and in databases + #! rename all "variation" to "config_variation" in filenames and in databases old_db_file = joinpath(path_to_config_folder, "variations.db") db_file = joinpath(path_to_config_folder, "config_variations.db") if isfile(old_db_file) mv(old_db_file, db_file) end db_config_variations = db_file |> SQLite.DB - # check if variations is a table name in the database + #! check if variations is a table name in the database if DBInterface.execute(db_config_variations, "SELECT name FROM sqlite_master WHERE type='table' AND name='variations';") |> DataFrame |> x -> (length(x.name)==1) DBInterface.execute(db_config_variations, "ALTER TABLE variations RENAME TO config_variations;") end @@ -227,7 +227,7 @@ function upgradeToV0_0_13(::Bool) end rulesets_collection_folders = queryToDataFrame(constructSelectQuery("rulesets_collections"; selection="folder_name")) |> x -> x.folder_name for rulesets_collection_folder in rulesets_collection_folders - path_to_rulesets_collection_folder = joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder) + path_to_rulesets_collection_folder = locationPath(:rulesets_collection, rulesets_collection_folder) path_to_new_db = joinpath(path_to_rulesets_collection_folder, "rulesets_collection_variations.db") if isfile(path_to_new_db) continue @@ -265,7 +265,7 @@ function upgradeToV0_0_15(auto_upgrade::Bool) end println("\t- Upgrading to version 0.0.15...") - # first include ic_ecm_variation_id in simulations and monads tables + #! first include ic_ecm_variation_id in simulations and monads tables if DBInterface.execute(db, "SELECT 1 FROM pragma_table_info('simulations') WHERE name='ic_ecm_variation_id';") |> DataFrame |> isempty DBInterface.execute(db, "ALTER TABLE simulations ADD COLUMN ic_ecm_variation_id INTEGER;") DBInterface.execute(db, "UPDATE simulations SET ic_ecm_variation_id=CASE WHEN ic_ecm_id=-1 THEN -1 ELSE 0 END;") @@ -280,7 +280,7 @@ function upgradeToV0_0_15(auto_upgrade::Bool) DBInterface.execute(db, "DROP TABLE monads_temp;") end - # now add ic_dc_id to simulations and monads tables + #! now add ic_dc_id to simulations and monads tables if DBInterface.execute(db, "SELECT 1 FROM pragma_table_info('simulations') WHERE name='ic_dc_id';") |> DataFrame |> isempty DBInterface.execute(db, "ALTER TABLE simulations ADD COLUMN ic_dc_id INTEGER;") DBInterface.execute(db, "UPDATE simulations SET ic_dc_id=-1;") @@ -295,4 +295,45 @@ function upgradeToV0_0_15(auto_upgrade::Bool) DBInterface.execute(db, "DROP TABLE monads_temp;") end return true +end + +function upgradeToV0_0_16(auto_upgrade::Bool) + warning_msg = """ + \t- Upgrading to version 0.0.16... + \nWARNING: Upgrading to version 0.0.16 will change the database schema. + See info at https://drbergman.github.io/pcvct/stable/misc/database_upgrades/ + + ------IF ANOTHER INSTANCE OF PCVCT IS USING THIS DATABASE, PLEASE CLOSE IT BEFORE PROCEEDING.------ + + Continue upgrading to version 0.0.16? (y/n): + """ + println(warning_msg) + response = auto_upgrade ? "y" : readline() + if response != "y" + println("Upgrade to version 0.0.16 aborted.") + return false + end + println("\t- Upgrading to version 0.0.16...") + + #! add intracellular_id and intracellular_variation_id to simulations and monads tables + if DBInterface.execute(db, "SELECT 1 FROM pragma_table_info('simulations') WHERE name='intracellular_id';") |> DataFrame |> isempty + DBInterface.execute(db, "ALTER TABLE simulations ADD COLUMN intracellular_id INTEGER;") + DBInterface.execute(db, "UPDATE simulations SET intracellular_id=-1;") + DBInterface.execute(db, "ALTER TABLE simulations ADD COLUMN intracellular_variation_id INTEGER;") + DBInterface.execute(db, "UPDATE simulations SET intracellular_variation_id=-1;") + end + if DBInterface.execute(db, "SELECT 1 FROM pragma_table_info('monads') WHERE name='intracellular_id';") |> DataFrame |> isempty + DBInterface.execute(db, "ALTER TABLE monads ADD COLUMN intracellular_id INTEGER;") + DBInterface.execute(db, "UPDATE monads SET intracellular_id=-1;") + DBInterface.execute(db, "ALTER TABLE monads ADD COLUMN intracellular_variation_id INTEGER;") + DBInterface.execute(db, "UPDATE monads SET intracellular_variation_id=-1;") + DBInterface.execute(db, "CREATE TABLE monads_temp AS SELECT * FROM monads;") + DBInterface.execute(db, "UPDATE monads_temp SET intracellular_id=-1;") + DBInterface.execute(db, "UPDATE monads_temp SET intracellular_variation_id=-1;") + DBInterface.execute(db, "DROP TABLE monads;") + createPCVCTTable("monads", monadsSchema()) + populateTableOnFeatureSubset(db, "monads_temp", "monads") + DBInterface.execute(db, "DROP TABLE monads_temp;") + end + return true end \ No newline at end of file diff --git a/src/VCTUserAPI.jl b/src/VCTUserAPI.jl index b1fc3156..e4fe7e76 100644 --- a/src/VCTUserAPI.jl +++ b/src/VCTUserAPI.jl @@ -41,7 +41,7 @@ sampling = createTrial(monad, dv_apoptosis; n_replicates=2) # uses the max time """ function createTrial(method::AddVariationMethod, inputs::InputFolders, avs::Vector{<:AbstractVariation}=AbstractVariation[]; n_replicates::Integer=1, use_previous::Bool=true) - return _createTrial(method, inputs, VariationIDs(inputs), avs, n_replicates, use_previous) + return _createTrial(method, inputs, VariationID(inputs), avs, n_replicates, use_previous) end function createTrial(method::AddVariationMethod, inputs::InputFolders, av::AbstractVariation; kwargs...) @@ -50,10 +50,9 @@ end createTrial(inputs::InputFolders, args...; kwargs...) = createTrial(GridVariation(), inputs, args...; kwargs...) - function createTrial(method::AddVariationMethod, reference::AbstractMonad, avs::Vector{<:AbstractVariation}=AbstractVariation[]; n_replicates::Integer=1, use_previous::Bool=true) - return _createTrial(method, reference.inputs, reference.variation_ids, avs, n_replicates, use_previous) + return _createTrial(method, reference.inputs, reference.variation_id, avs, n_replicates, use_previous) end function createTrial(method::AddVariationMethod, reference::AbstractMonad, av::AbstractVariation; kwargs...) @@ -62,24 +61,26 @@ end createTrial(reference::AbstractMonad, args...; kwargs...) = createTrial(GridVariation(), reference, args...; kwargs...) -function _createTrial(method::AddVariationMethod, inputs::InputFolders, reference_variation_ids::VariationIDs, +function _createTrial(method::AddVariationMethod, inputs::InputFolders, reference_variation_id::VariationID, avs::Vector{<:AbstractVariation}, n_replicates::Integer, use_previous::Bool) - - config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids, ic_ecm_variation_ids = addVariations(method, inputs, avs, reference_variation_ids) - if length(config_variation_ids) == 1 - variation_ids = VariationIDs(config_variation_ids[1], rulesets_collection_variation_ids[1], ic_cell_variation_ids[1], ic_ecm_variation_ids[1]) + + add_variations_result = addVariations(method, inputs, avs, reference_variation_id) + all_variation_ids = add_variations_result.all_variation_ids + if length(all_variation_ids) == 1 + variation_ids = all_variation_ids[1] monad = Monad(n_replicates, inputs, variation_ids, use_previous) if n_replicates != 1 return monad end return Simulation(monad.simulation_ids[end]) else + location_variation_ids = [loc => [variation_id[loc] for variation_id in all_variation_ids] for loc in project_locations.varied] |> + Dict{Symbol,Union{Integer,AbstractArray{<:Integer}}} + return Sampling(inputs; n_replicates=n_replicates, - config_variation_ids=config_variation_ids, - rulesets_collection_variation_ids=rulesets_collection_variation_ids, - ic_cell_variation_ids=ic_cell_variation_ids, - ic_ecm_variation_ids=ic_ecm_variation_ids, - use_previous=use_previous) + location_variation_ids=location_variation_ids, + use_previous=use_previous + ) end end diff --git a/src/VCTVariations.jl b/src/VCTVariations.jl index 28e3d0e8..ba3680c4 100644 --- a/src/VCTVariations.jl +++ b/src/VCTVariations.jl @@ -40,7 +40,7 @@ abstract type ElementaryVariation <: AbstractVariation end The location, target, and values of a discrete variation. # Fields -- `location::Symbol`: The location of the variation. Can be `:config`, `:rulesets`, `:ic_cell`, `:ic_ecm`. The location is inferred from the target. +- `location::Symbol`: The location of the variation. Can be `:config`, `:rulesets_collection`, `:intracellular`, `:ic_cell`, `:ic_ecm`. The location is inferred from the target. - `target::XMLPath`: The target of the variation. The target is a vector of strings that represent the XML path to the element being varied. See [`XMLPath`](@ref) for more information. - `values::Vector{T}`: The values of the variation. The values are the possible values that the target can take on. @@ -55,7 +55,7 @@ DiscreteVariation{Float64}(:config, pcvct.XMLPath(["overall", "max_time"]), [144 xml_path = ["hypothesis_ruleset:name:default","behavior:name:cycle entry","decreasing_signals","max_response"] DiscreteVariation(xml_path, 0) # output -DiscreteVariation{Int64}(:rulesets, pcvct.XMLPath(["hypothesis_ruleset:name:default", "behavior:name:cycle entry", "decreasing_signals", "max_response"]), [0]) +DiscreteVariation{Int64}(:rulesets_collection, pcvct.XMLPath(["hypothesis_ruleset:name:default", "behavior:name:cycle entry", "decreasing_signals", "max_response"]), [0]) ``` ```jldoctest xml_path = ["cell_patches:name:default","patch_collection:type:disc","patch:ID:1","x0"] @@ -73,7 +73,7 @@ struct DiscreteVariation{T} <: ElementaryVariation location::Symbol target::XMLPath values::Vector{T} - + function DiscreteVariation(target::Vector{<:AbstractString}, values::Vector{T}) where T return DiscreteVariation(XMLPath(target), values) end @@ -101,7 +101,7 @@ Analagousy to [`DiscreteVariation`](@ref), instances of `DistributedVariation` c Alternatively, users can use the [`UniformDistributedVariation`](@ref) and [`NormalDistributedVariation`](@ref) functions to create instances of `DistributedVariation`. # Fields -- `location::Symbol`: The location of the variation. Can be `:config`, `:rulesets`, `:ic_cell`, or `:ic_ecm`. The location is inferred from the target. +- `location::Symbol`: The location of the variation. Can be `:config`, `:rulesets_collection`, `:intracellular`, `:ic_cell`, or `:ic_ecm`. The location is inferred from the target. - `target::XMLPath`: The target of the variation. The target is a vector of strings that represent the XML path to the element being varied. See [`XMLPath`](@ref) for more information. - `distribution::Distribution`: The distribution of the variation. - `flip::Bool=false`: Whether to flip the distribution, i.e., when asked for the iCDF of `x`, return the iCDF of `1-x`. Useful for [`CoVariation`](@ref)'s. @@ -141,7 +141,7 @@ target(ev::ElementaryVariation) = ev.target location(ev::ElementaryVariation) = ev.location columnName(ev::ElementaryVariation) = target(ev).xml_path |> xmlPathToColumnName -Base.length(::DistributedVariation) = -1 # set to -1 to be a convention +Base.length(::DistributedVariation) = -1 #! set to -1 to be a convention """ UniformDistributedVariation(xml_path::Vector{<:AbstractString}, lb::T, ub::T) where {T<:Real} @@ -165,7 +165,7 @@ _values(discrete_variation::DiscreteVariation) = discrete_variation.values function _values(discrete_variation::DiscreteVariation, cdf::Vector{<:Real}) index = floor.(Int, cdf * length(discrete_variation)) .+ 1 - index[index.==(length(discrete_variation)+1)] .= length(discrete_variation) # if cdf = 1, index = length(discrete_variation)+1, so we set it to length(discrete_variation) + index[index.==(length(discrete_variation)+1)] .= length(discrete_variation) #! if cdf = 1, index = length(discrete_variation)+1, so we set it to length(discrete_variation) return discrete_variation.values[index] end @@ -181,7 +181,7 @@ _values(::DistributedVariation) = error("A cdf must be provided for a Distribute _values(ev::ElementaryVariation, cdf) = error("values not defined for $(typeof(ev)) with type of cdf = $(typeof(cdf))") function dataType(discrete_variation::DiscreteVariation) - return typeof(discrete_variation).parameters[1] # typeof(discrete_variation).parameters[1] is the type parameter T in the definition of DiscreteVariation{T} + return typeof(discrete_variation).parameters[1] #! typeof(discrete_variation).parameters[1] is the type parameter T in the definition of DiscreteVariation{T} end function dataType(dv::DistributedVariation) @@ -190,6 +190,19 @@ end dataType(ev::ElementaryVariation) = error("dataType not defined for $(typeof(ev))") +function sqliteDataType(ev::ElementaryVariation) + data_type = dataType(ev) + if data_type == Bool + return "TEXT" + elseif data_type <: Integer + return "INT" + elseif data_type <: Real + return "REAL" + else + return "TEXT" + end +end + function cdf(discrete_variation::DiscreteVariation, x::Real) if !(x in discrete_variation.values) error("Value not in elementary variation values.") @@ -209,7 +222,9 @@ cdf(ev::ElementaryVariation, ::Real) = error("cdf not defined for $(typeof(ev))" function variationLocation(xp::XMLPath) if startswith(xp.xml_path[1], "hypothesis_ruleset:name:") - return :rulesets + return :rulesets_collection + elseif xp.xml_path[1] == "intracellulars" + return :intracellular elseif startswith(xp.xml_path[1], "cell_patches:name:") return :ic_cell elseif startswith(xp.xml_path[1], "layer:ID:") @@ -313,7 +328,7 @@ function addDomainVariationDimension!(evs::Vector{<:ElementaryVariation}, domain dim_char = dim_chars[ind] tag = "$(dim_char)_$(dim_side)" xml_path = ["domain", tag] - push!(evs, DiscreteVariation(xml_path, value)) # do this to make sure that singletons and vectors are converted to vectors + push!(evs, DiscreteVariation(xml_path, value)) #! do this to make sure that singletons and vectors are converted to vectors end end @@ -349,7 +364,25 @@ end ################## Database Interface Functions ################## -function addColumns(xps::Vector{XMLPath}, table_name::String, id_column_name::String, db_columns::SQLite.DB, path_to_xml::String, dataTypeRulesFn::Function) +function addColumns(loc::Symbol, folder_id::Int, evs::Vector{<:ElementaryVariation}) + @assert all(location.(evs) .== loc) "All variations must be in the same location to do addColumns. Somehow found $(unique(location.(evs))) here." + folder = inputFolderName(loc, folder_id) + db_columns = variationsDatabase(loc, folder) + basenames = inputs_dict[loc]["basename"] + basenames = basenames isa Vector ? basenames : [basenames] #! force basenames to be a vector to handle all the same way + basename_is_varied = inputs_dict[loc]["varied"] .&& ([splitext(bn)[2] .== ".xml" for bn in basenames]) #! the varied entry is either a singleton Boolean or a vector of the same length as basenames + basename_ind = findall(basename_is_varied .&& isfile.([joinpath(locationPath(loc, folder), bn) for bn in basenames])) + @assert !isnothing(basename_ind) "Folder $(folder) does not contain a valid $(loc) file to support variations. The options are $(basenames[basename_is_varied])." + @assert length(basename_ind) == 1 "Folder $(folder) contains multiple valid $(loc) files to support variations. The options are $(basenames[basename_is_varied])." + + path_to_xml = joinpath(locationPath(loc, folder), basenames[basename_ind[1]]) + return addColumns(evs, loc, db_columns, path_to_xml) +end + +function addColumns(evs::Vector{<:ElementaryVariation}, location::Symbol, db_columns::SQLite.DB, path_to_xml::String) + xps = target.(evs) + table_name = variationsTableName(location) + id_column_name = locationVarIDName(location) column_names = queryToDataFrame("PRAGMA table_info($(table_name));"; db=db_columns) |> x->x[!,:name] filter!(x -> x != id_column_name, column_names) varied_column_names = [xmlPathToColumnName(xp.xml_path) for xp in xps] @@ -357,20 +390,20 @@ function addColumns(xps::Vector{XMLPath}, table_name::String, id_column_name::St is_new_column = [!(varied_column_name in column_names) for varied_column_name in varied_column_names] if any(is_new_column) new_column_names = varied_column_names[is_new_column] + new_column_data_types = evs[is_new_column] .|> sqliteDataType xml_doc = openXML(path_to_xml) - default_values_for_new = [getField(xml_doc, xp.xml_path) for xp in xps[is_new_column]] + default_values_for_new = [getContent(xml_doc, xp.xml_path) for xp in xps[is_new_column]] closeXML(xml_doc) - for (i, new_column_name) in enumerate(new_column_names) - sqlite_data_type = dataTypeRulesFn(i, new_column_name) - DBInterface.execute(db_columns, "ALTER TABLE $(table_name) ADD COLUMN '$(new_column_name)' $(sqlite_data_type);") + for (new_column_name, data_type) in zip(new_column_names, new_column_data_types) + DBInterface.execute(db_columns, "ALTER TABLE $(table_name) ADD COLUMN '$(new_column_name)' $(data_type);") end - DBInterface.execute(db_columns, "UPDATE $(table_name) SET ($(join("\"".*new_column_names.*"\"",",")))=($(join("\"".*default_values_for_new.*"\"",",")));") # set newly added columns to default values + DBInterface.execute(db_columns, "UPDATE $(table_name) SET ($(join("\"".*new_column_names.*"\"",",")))=($(join("\"".*default_values_for_new.*"\"",",")));") #! set newly added columns to default values index_name = "$(table_name)_index" - SQLite.dropindex!(db_columns, index_name; ifexists=true) # remove previous index + SQLite.dropindex!(db_columns, index_name; ifexists=true) #! remove previous index index_columns = deepcopy(column_names) append!(index_columns, new_column_names) - SQLite.createindex!(db_columns, table_name, index_name, index_columns; unique=true, ifnotexists=false) # add new index to make sure no variations are repeated + SQLite.createindex!(db_columns, table_name, index_name, index_columns; unique=true, ifnotexists=false) #! add new index to make sure no variations are repeated end static_column_names = deepcopy(column_names) @@ -380,106 +413,27 @@ function addColumns(xps::Vector{XMLPath}, table_name::String, id_column_name::St return static_column_names, varied_column_names end -function addConfigVariationColumns(config_id::Int, xps::Vector{XMLPath}, variable_types::Vector{DataType}) - config_folder = configFolder(config_id) - db_columns = configDB(config_folder) - path_to_xml = joinpath(data_dir, "inputs", "configs", config_folder, "PhysiCell_settings.xml") - dataTypeRulesFn = (i, _) -> begin - if variable_types[i] == Bool - "TEXT" - elseif variable_types[i] <: Integer - "INT" - elseif variable_types[i] <: Real - "REAL" - else - "TEXT" - end - end - return addColumns(xps, "config_variations", "config_variation_id", db_columns, path_to_xml, dataTypeRulesFn) -end - -function addRulesetsVariationsColumns(rulesets_collection_id::Int, xps::Vector{XMLPath}) - rulesets_collection_folder = rulesetsCollectionFolder(rulesets_collection_id) - db_columns = rulesetsCollectionDB(rulesets_collection_folder) - path_to_rulesets_collection_folder = joinpath(data_dir, "inputs", "rulesets_collections", rulesets_collection_folder) - path_to_base_xml = joinpath(path_to_rulesets_collection_folder, "base_rulesets.xml") - if !isfile(path_to_base_xml) - writeRules(path_to_base_xml, joinpath(path_to_rulesets_collection_folder, "base_rulesets.csv")) - end - dataTypeRulesFn = (_, name) -> occursin("applies_to_dead", name) ? "INT" : "REAL" - return addColumns(xps, "rulesets_collection_variations", "rulesets_collection_variation_id", db_columns, path_to_base_xml, dataTypeRulesFn) -end - -function addICCellVariationColumns(ic_cell_id::Int, xps::Vector{XMLPath}) - ic_cell_folder = icCellFolder(ic_cell_id) - db_columns = icCellDB(ic_cell_folder) - @assert db_columns isa SQLite.DB "ic_cell_folder must contain a cells.xml file to support variations." - path_to_ic_cell_folder = joinpath(data_dir, "inputs", "ics", "cells", ic_cell_folder) - path_to_base_xml = joinpath(path_to_ic_cell_folder, "cells.xml") - dataTypeRulesFn = (_, name) -> endswith(name, "number") ? "INT" : "REAL" - return addColumns(xps, "ic_cell_variations", "ic_cell_variation_id", db_columns, path_to_base_xml, dataTypeRulesFn) -end - -function addICECMVariationColumns(ic_ecm_id::Int, xps::Vector{XMLPath}) - ic_ecm_folder = icECMFolder(ic_ecm_id) - db_columns = icECMDB(ic_ecm_folder) - @assert db_columns isa SQLite.DB "ic_ecm_folder must contain a ecm.xml file to support variations." - path_to_ic_ecm_folder = joinpath(data_dir, "inputs", "ics", "ecms", ic_ecm_folder) - path_to_base_xml = joinpath(path_to_ic_ecm_folder, "ecm.xml") - dataTypeRulesFn = (_, name) -> endswith(name, "number") ? "INT" : "REAL" - return addColumns(xps, "ic_ecm_variations", "ic_ecm_variation_id", db_columns, path_to_base_xml, dataTypeRulesFn) -end +function addVariationRow(location::Symbol, folder_id::Int, table_features::String, values::String) + db_columns = variationsDatabase(location, folder_id) + table_name = variationsTableName(location) + variation_id_name = locationVarIDName(location) -function addRow(db_columns::SQLite.DB, table_name::String, id_name::String, table_features::String, values::String) - new_id = DBInterface.execute(db_columns, "INSERT OR IGNORE INTO $(table_name) ($(table_features)) VALUES($(values)) RETURNING $(id_name);") |> DataFrame |> x->x[!,1] + new_id = DBInterface.execute(db_columns, "INSERT OR IGNORE INTO $(table_name) ($(table_features)) VALUES($(values)) RETURNING $(variation_id_name);") |> DataFrame |> x->x[!,1] new_added = length(new_id)==1 if !new_added - query = constructSelectQuery(table_name, "WHERE ($(table_features))=($(values))"; selection=id_name) + query = constructSelectQuery(table_name, "WHERE ($(table_features))=($(values))"; selection=variation_id_name) new_id = queryToDataFrame(query; db=db_columns, is_row=true) |> x->x[!,1] end return new_id[1] end -function addConfigVariationRow(config_id::Int, table_features::String, values::String) - db_columns = configDB(config_id) - return addRow(db_columns, "config_variations", "config_variation_id", table_features, values) -end - -function addConfigVariationRow(config_id::Int, table_features::String, static_values::String, varied_values::String) - return addConfigVariationRow(config_id, table_features, "$(static_values)$(varied_values)") -end - -function addRulesetsVariationRow(rulesets_collection_id::Int, table_features::String, values::String) - db_columns = rulesetsCollectionDB(rulesets_collection_id) - return addRow(db_columns, "rulesets_collection_variations", "rulesets_collection_variation_id", table_features, values) +function addVariationRow(location::Symbol, folder_id::Int, table_features::String, static_values::String, varied_values::String) + return addVariationRow(location, folder_id, table_features, "$(static_values)$(varied_values)") end -function addRulesetsVariationRow(rulesets_collection_id::Int, table_features::String, static_values::String, varied_values::String) - return addRulesetsVariationRow(rulesets_collection_id, table_features, "$(static_values)$(varied_values)") -end - -function addICCellVariationRow(ic_cell_id::Int, table_features::String, values::String) - db_columns = icCellDB(ic_cell_id) - return addRow(db_columns, "ic_cell_variations", "ic_cell_variation_id", table_features, values) -end - -function addICCellVariationRow(ic_cell_id::Int, table_features::String, static_values::String, varied_values::String) - return addICCellVariationRow(ic_cell_id, table_features, "$(static_values)$(varied_values)") -end - -function addICECMVariationRow(ic_ecm_id::Int, table_features::String, values::String) - db_columns = icECMDB(ic_ecm_id) - return addRow(db_columns, "ic_ecm_variations", "ic_ecm_variation_id", table_features, values) -end - -function addICECMVariationRow(ic_ecm_id::Int, table_features::String, static_values::String, varied_values::String) - return addICECMVariationRow(ic_ecm_id, table_features, "$(static_values)$(varied_values)") -end - -function setUpColumns(evs::Vector{<:ElementaryVariation}, addColumnsByPathsFn::Function, prepareAddNewFn::Function) - xps = target.(evs) - static_column_names, varied_column_names = addColumnsByPathsFn(xps) - return prepareAddNewFn(static_column_names, varied_column_names) +function setUpColumns(location::Symbol, evs::Vector{<:ElementaryVariation}, folder_id::Int, reference_variation_id::Int) + static_column_names, varied_column_names = addColumns(location, folder_id, evs) + return prepareAddNewVariations(location, folder_id, static_column_names, varied_column_names, reference_variation_id) end function prepareAddNew(db_columns::SQLite.DB, static_column_names::Vector{String}, varied_column_names::Vector{String}, table_name::String, id_name::String, reference_id::Int) @@ -499,24 +453,11 @@ function prepareAddNew(db_columns::SQLite.DB, static_column_names::Vector{String return static_values, table_features end -function prepareAddNewConfigVariations(config_id::Int, static_column_names::Vector{String}, varied_column_names::Vector{String}; reference_config_variation_id::Int=0) - db_columns = configDB(config_id) - return prepareAddNew(db_columns, static_column_names, varied_column_names, "config_variations", "config_variation_id", reference_config_variation_id) -end - -function prepareAddNewRulesetsVariations(rulesets_collection_id::Int, static_column_names::Vector{String}, varied_column_names::Vector{String}; reference_rulesets_variation_id::Int=0) - db_columns = rulesetsCollectionDB(rulesets_collection_id) - return prepareAddNew(db_columns, static_column_names, varied_column_names, "rulesets_collection_variations", "rulesets_collection_variation_id", reference_rulesets_variation_id) -end - -function prepareAddNewICCellVariations(ic_cell_id::Int, static_column_names::Vector{String}, varied_column_names::Vector{String}; reference_ic_cell_variation_id::Int=ic_cell_id==-1 ? -1 : 0) - db_columns = icCellDB(ic_cell_id) - return prepareAddNew(db_columns, static_column_names, varied_column_names, "ic_cell_variations", "ic_cell_variation_id", reference_ic_cell_variation_id) -end - -function prepareAddNewICECMVariations(ic_ecm_id::Int, static_column_names::Vector{String}, varied_column_names::Vector{String}; reference_ic_ecm_variation_id::Int=ic_ecm_id==-1 ? -1 : 0) - db_columns = icECMDB(ic_ecm_id) - return prepareAddNew(db_columns, static_column_names, varied_column_names, "ic_ecm_variations", "ic_ecm_variation_id", reference_ic_ecm_variation_id) +function prepareAddNewVariations(location::Symbol, folder_id::Int, static_column_names::Vector{String}, varied_column_names::Vector{String}, reference_variation_id::Int) + db_columns = variationsDatabase(location, folder_id) + table_name = variationsTableName(location) + variation_id_name = locationVarIDName(location) + return prepareAddNew(db_columns, static_column_names, varied_column_names, table_name, variation_id_name, reference_variation_id) end ################## Specialized Variations ################## @@ -647,7 +588,7 @@ struct RBDVariation <: AddVariationMethod function RBDVariation(n::Int, rng::AbstractRNG, use_sobol::Bool, pow2_diff::Union{Missing, Int}, num_cycles::Union{Missing, Int, Rational}) if use_sobol - k = log2(n) |> round |> Int # nearest power of 2 to n + k = log2(n) |> round |> Int #! nearest power of 2 to n if ismissing(pow2_diff) pow2_diff = n - 2^k else @@ -660,7 +601,7 @@ struct RBDVariation <: AddVariationMethod @assert num_cycles == 1//2 "num_cycles must be 1//2 for RBDVariation with Sobol sequence" end else - pow2_diff = missing # not used in this case + pow2_diff = missing #! not used in this case if ismissing(num_cycles) num_cycles = 1 else @@ -673,134 +614,94 @@ end RBDVariation(n::Int; rng::AbstractRNG=Random.GLOBAL_RNG, use_sobol::Bool=true, pow2_diff=missing, num_cycles=missing) = RBDVariation(n, rng, use_sobol, pow2_diff, num_cycles) -function addVariations(method::AddVariationMethod, inputs::InputFolders, avs::Vector{<:AbstractVariation}; - reference_config_variation_id::Int=0, reference_rulesets_variation_id::Int=0, - reference_ic_cell_variation_id::Int=inputs.ic_cell.folder=="" ? -1 : 0, - reference_ic_ecm_variation_id::Int=inputs.ic_ecm.folder=="" ? -1 : 0) - - reference_variation_ids = VariationIDs(reference_config_variation_id, reference_rulesets_variation_id, reference_ic_cell_variation_id, reference_ic_ecm_variation_id) - return addVariations(method, inputs, avs, reference_variation_ids) -end +abstract type AddVariationsResult end -function addVariations(method::AddVariationMethod, inputs::InputFolders, avs::Vector{<:AbstractVariation}, reference_variation_ids::VariationIDs) +function addVariations(method::AddVariationMethod, inputs::InputFolders, avs::Vector{<:AbstractVariation}, reference_variation_id::VariationID=VariationID(inputs)) pv = ParsedVariations(avs) - return addVariations(method, inputs, pv, reference_variation_ids) + return addVariations(method, inputs, pv, reference_variation_id) end -struct ParsedVariations - sz::Vector{Int} - variations::Vector{<:AbstractVariation} # - - config_variations::Vector{<:ElementaryVariation} - rulesets_collection_variations::Vector{<:ElementaryVariation} - ic_cell_variations::Vector{<:ElementaryVariation} - ic_ecm_variations::Vector{<:ElementaryVariation} - - config_variation_indices::Vector{Int} - rulesets_collection_variation_indices::Vector{Int} - ic_cell_variation_indices::Vector{Int} - ic_ecm_variation_indices::Vector{Int} - - function ParsedVariations(sz::Vector{Int}, variations::Vector{<:AbstractVariation}, - config_variations::Vector{<:ElementaryVariation}, - rulesets_collection_variations::Vector{<:ElementaryVariation}, - ic_cell_variations::Vector{<:ElementaryVariation}, - ic_ecm_variations::Vector{<:ElementaryVariation}, - config_variation_indices::Vector{Int}, - rulesets_collection_variation_indices::Vector{Int}, - ic_cell_variation_indices::Vector{Int}, - ic_ecm_variation_indices::Vector{Int}) - - @assert length(config_variations) == length(config_variation_indices) "config_variations and config_variation_indices must have the same length" - @assert length(rulesets_collection_variations) == length(rulesets_collection_variation_indices) "rulesets_collection_variations and rulesets_collection_variation_indices must have the same length" - @assert length(ic_cell_variations) == length(ic_cell_variation_indices) "ic_cell_variations and ic_cell_variation_indices must have the same length" - @assert length(ic_ecm_variations) == length(ic_ecm_variation_indices) "ic_ecm_variations and ic_ecm_variation_indices must have the same length" - return new(sz, variations, config_variations, rulesets_collection_variations, ic_cell_variations, ic_ecm_variations, config_variation_indices, rulesets_collection_variation_indices, ic_cell_variation_indices, ic_ecm_variation_indices) +struct LocationParsedVariations + variations::Vector{<:ElementaryVariation} + indices::Vector{Int} + function LocationParsedVariations(variations::Vector{<:ElementaryVariation}, indices::Vector{Int}) + @assert length(variations) == length(indices) "variations and indices must have the same length" + return new(variations, indices) end end -function ParsedVariations(avs::Vector{<:AbstractVariation}) - sz = length.(avs) +struct ParsedVariations + sz::Vector{Int} + variations::Vector{<:AbstractVariation} - config_variations = ElementaryVariation[] - rulesets_collection_variations = ElementaryVariation[] - ic_cell_variations = ElementaryVariation[] - ic_ecm_variations = ElementaryVariation[] + location_parsed_variations::NamedTuple - config_variation_indices = Int[] - rulesets_collection_variation_indices = Int[] - ic_cell_variation_indices = Int[] - ic_ecm_variation_indices = Int[] + function ParsedVariations(avs::Vector{<:AbstractVariation}) + sz = length.(avs) - for (i, av) in enumerate(avs) - if av isa ElementaryVariation - av = CoVariation(av) # wrap it in a covariation + location_variations_dict = Dict{Symbol, Any}() + for loc in project_locations.varied + location_variations_dict[loc] = (ElementaryVariation[], Int[]) end - @assert av isa CoVariation "Everything at this point should have been converted to a CoVariation" - for ev in av.variations - loc = location(ev) - if loc == :config - push!(config_variations, ev) - push!(config_variation_indices, i) - elseif loc == :rulesets - push!(rulesets_collection_variations, ev) - push!(rulesets_collection_variation_indices, i) - elseif loc == :ic_cell - push!(ic_cell_variations, ev) - push!(ic_cell_variation_indices, i) - elseif loc == :ic_ecm - push!(ic_ecm_variations, ev) - push!(ic_ecm_variation_indices, i) - else - error("Variation type not recognized.") + + for (i, av) in enumerate(avs) + if av isa ElementaryVariation + av = CoVariation(av) #! wrap it in a covariation + end + @assert av isa CoVariation "Everything at this point should have been converted to a CoVariation" + for ev in av.variations + push!(location_variations_dict[location(ev)][1], ev) + push!(location_variations_dict[location(ev)][2], i) end end + for (_, variation_indices) in values(location_variations_dict) + @assert issorted(variation_indices) "Variation indices must be sorted after parsing." + end + location_parsed_variations = [loc => LocationParsedVariations(variations, variation_indices) for (loc, (variations, variation_indices)) in pairs(location_variations_dict)] |> NamedTuple + return new(sz, avs, location_parsed_variations) end - for v in [config_variation_indices, rulesets_collection_variation_indices, ic_cell_variation_indices, ic_ecm_variation_indices] - @assert issorted(v) "Variation indices must be sorted after parsing." - end - return ParsedVariations(sz, avs, config_variations, rulesets_collection_variations, ic_cell_variations, ic_ecm_variations, config_variation_indices, rulesets_collection_variation_indices, ic_cell_variation_indices, ic_ecm_variation_indices) end +Base.getindex(pv::ParsedVariations, loc::Symbol) = pv.location_parsed_variations[loc] + ################## Grid Variations ################## -function addVariations(::GridVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_ids::VariationIDs) +struct AddGridVariationsResult <: AddVariationsResult + all_variation_ids::AbstractArray{VariationID} +end + +function addVariations(::GridVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_id::VariationID) @assert all(pv.sz .!= -1) "GridVariation only works with DiscreteVariations" - return [addLocationGridVariations(inputs, pv, reference_variation_ids, location) for location in [:config, :rulesets_collection, :ic_cell, :ic_ecm]] + all_location_variation_ids = [addLocationGridVariations(loc, inputs, pv, reference_variation_id) for loc in project_locations.varied] + return [([loc => loc_var_ids[i] for (loc, loc_var_ids) in zip(project_locations.varied, all_location_variation_ids)] |> VariationID) for i in eachindex(all_location_variation_ids[1])] |> AddGridVariationsResult end -function addLocationGridVariations(inputs::InputFolders, pv::ParsedVariations, reference_variation_ids::VariationIDs, location::Symbol) - variations_field = Symbol("$(location)_variations") - if isempty(getfield(pv, variations_field)) - return fill(getfield(reference_variation_ids, location), pv.sz...) +function addLocationGridVariations(location::Symbol, inputs::InputFolders, pv::ParsedVariations, reference_variation_id::VariationID) + if isempty(pv[location].variations) + return fill(reference_variation_id[location], pv.sz...) end - variation_indices_field = Symbol("$(location)_variation_indices") - fns = prepareVariationFunctions(location, inputs, pv, reference_variation_ids) - out = gridToDB(getfield(pv, variations_field), fns..., getfield(pv, variation_indices_field)) - dim_szs = [d in getfield(pv, variation_indices_field) ? pv.sz[d] : 1 for d in eachindex(pv.sz)] + out = gridToDB(location, pv, inputs[location].id, reference_variation_id[location]) + dim_szs = [d in pv[location].indices ? pv.sz[d] : 1 for d in eachindex(pv.sz)] out = reshape(out, dim_szs...) other_dims = [dim_szs[d] == 1 ? pv.sz[d] : 1 for d in eachindex(pv.sz)] return repeat(out, other_dims...) end -function prepareVariationFunctions(location::Symbol, inputs::InputFolders, pv::ParsedVariations, reference_variation_ids::VariationIDs) - if location == :config - return prepareConfigVariationFunctions(inputs.config.id, pv.config_variations; reference_config_variation_id=reference_variation_ids.config) - elseif location == :rulesets_collection - return prepareRulesetsVariationFunctions(inputs.rulesets_collection.id; reference_rulesets_variation_id=reference_variation_ids.rulesets_collection) - elseif location == :ic_cell - return prepareICCellVariationFunctions(inputs.ic_cell.id; reference_ic_cell_variation_id=reference_variation_ids.ic_cell) - elseif location == :ic_ecm - return prepareICECMVariationFunctions(inputs.ic_ecm.id; reference_ic_ecm_variation_id=reference_variation_ids.ic_ecm) - else - error("Location not recognized.") - end +function gridToDB(location::Symbol, pv::ParsedVariations, folder_id::Int, reference_variation_id::Int) + return gridToDB(location, pv[location].variations, folder_id, reference_variation_id, pv[location].indices) end -function gridToDB(evs::Vector{<:ElementaryVariation}, addColumnsByPathsFn::Function, prepareAddNewFn::Function, addRowFn::Function, ev_dims::AbstractVector{Int}=1:length(evs)) - static_values, table_features = setUpColumns(evs, addColumnsByPathsFn, prepareAddNewFn) - +function gridToDB(evs::Vector{<:ElementaryVariation}, folder_id::Int, reference_variation_id::Int) + locs = location.(evs) + @assert all(locs .== locs[1]) "All variations must be in the same location to do gridToDB. Instead got $(locs)." + loc = locs[1] + return gridToDB(loc, evs, folder_id, reference_variation_id, 1:length(evs)) +end + +function gridToDB(location::Symbol, evs::Vector{<:ElementaryVariation}, folder_id::Int, reference_variation_id::Int, ev_dims::AbstractVector{Int}) + static_values, table_features = setUpColumns(location, evs, folder_id, reference_variation_id) + all_values = [] for ev_dim in unique(ev_dims) dim_indices = findall(ev_dim .== ev_dims) @@ -811,9 +712,9 @@ function gridToDB(evs::Vector{<:ElementaryVariation}, addColumnsByPathsFn::Funct sz_variations = size(NDG[1]) variation_ids = zeros(Int, sz_variations) for i in eachindex(NDG[1]) - dim_vals_as_vecs = [[A[i]...] for A in NDG] # ith entry is a vector of the values for the ith dimension + dim_vals_as_vecs = [[A[i]...] for A in NDG] #! ith entry is a vector of the values for the ith dimension varied_values = vcat(dim_vals_as_vecs...) .|> string |> x -> join("\"" .* x .* "\"", ",") - variation_ids[i] = addRowFn(table_features, static_values, varied_values) + variation_ids[i] = addVariationRow(location, folder_id, table_features, static_values, varied_values) end return variation_ids end @@ -824,55 +725,26 @@ function orthogonalLHS(k::Int, d::Int) n = k^d lhs_inds = zeros(Int, (n, d)) for i in 1:d - n_bins = k^(i - 1) # number of bins from previous dims (a bin has sampled points that are in the same subelement up through i-1 dim and need to be separated in subsequent dims) - bin_size = k^(d-i+1) # number of sampled points in each bin + n_bins = k^(i - 1) #! number of bins from previous dims (a bin has sampled points that are in the same subelement up through i-1 dim and need to be separated in subsequent dims) + bin_size = k^(d-i+1) #! number of sampled points in each bin if i == 1 lhs_inds[:, 1] = 1:n else - bin_inds_gps = [(j - 1) * bin_size .+ (1:bin_size) |> collect for j in 1:n_bins] # the indices belonging to each of the bins (this relies on the sorting step below to easily find which points are currently in the same box and need to be separated along the ith dimension) - for pt_ind = 1:bin_size # pick ith coordinate for each point in the bin; each iter here will work up the ith coordinates assigning one to each bin at each iter - ind = zeros(Int, n_bins) # indices where the next set of ith coordinates will go - for (j, bin_inds) in enumerate(bin_inds_gps) # pick a random, remaining element for each bin - rand_ind_of_ind = rand(1:length(bin_inds)) # pick the index of a remaining index - ind[j] = popat!(bin_inds, rand_ind_of_ind) # get the random index and remove it so we don't pick it again + bin_inds_gps = [(j - 1) * bin_size .+ (1:bin_size) |> collect for j in 1:n_bins] #! the indices belonging to each of the bins (this relies on the sorting step below to easily find which points are currently in the same box and need to be separated along the ith dimension) + for pt_ind = 1:bin_size #! pick ith coordinate for each point in the bin; each iter here will work up the ith coordinates assigning one to each bin at each iter + ind = zeros(Int, n_bins) #! indices where the next set of ith coordinates will go + for (j, bin_inds) in enumerate(bin_inds_gps) #! pick a random, remaining element for each bin + rand_ind_of_ind = rand(1:length(bin_inds)) #! pick the index of a remaining index + ind[j] = popat!(bin_inds, rand_ind_of_ind) #! get the random index and remove it so we don't pick it again end - lhs_inds[ind,i] = shuffle(1:n_bins) .+ (pt_ind - 1) * n_bins # for the selected inds, shuffle the next set of ith coords into them + lhs_inds[ind,i] = shuffle(1:n_bins) .+ (pt_ind - 1) * n_bins #! for the selected inds, shuffle the next set of ith coords into them end end - lhs_inds[:, 1:i] = sortslices(lhs_inds[:, 1:i], dims=1, by=x -> (x ./ (n / k) .|> ceil .|> Int)) # sort the found values so that sampled points in the same box upon projection into the 1:i dims are adjacent + lhs_inds[:, 1:i] = sortslices(lhs_inds[:, 1:i], dims=1, by=x -> (x ./ (n / k) .|> ceil .|> Int)) #! sort the found values so that sampled points in the same box upon projection into the 1:i dims are adjacent end return lhs_inds end -# function orthogonalLHS_relaxed(k::Int, d::Int) -# # I have this here because this technically gives all possible orthogonal lhs samples, but my orthogonalLHS gives a more uniform LHS -# n = k^d -# lhs_inds = zeros(Int, (n, d)) -# for i in 1:d -# bin_size = n / (k^(i - 1)) |> ceil |> Int # number of sampled points grouped by all previous dims -# n_bins = k^(i - 1) # number of bins in this dimension -# if i == 1 -# lhs_inds[:, 1] = 1:n -# continue -# else -# bin_inds_gps = [(j - 1) * bin_size .+ (1:bin_size) |> collect for j in 1:n_bins] # the indexes in y corresponding to each of the bins (this relies on the sorting step below to easily find which points are currently in the same box and need to be separated along the ith dimension) -# for pt_ind = 1:k -# y_vals = shuffle((pt_ind - 1) * Int(n / k) .+ (1:Int(n / k))) -# inds = zeros(Int, Int(n / k)) -# for (j, bin_inds) in enumerate(bin_inds_gps) -# for s in 1:Int(n / k^(i)) -# rand_ind_of_ind = rand(1:length(bin_inds)) -# rand_ind = popat!(bin_inds, rand_ind_of_ind) # random value remaining in bin, remove it so we don't pick it again -# inds[(j-1)*Int(n / k^(i))+s] = rand_ind # record the index -# end -# end -# lhs_inds[inds, i] = y_vals -# end -# end -# lhs_inds[:, 1:i] = sortslices(lhs_inds[:, 1:i], dims=1, by=x -> (x ./ (n / k) .|> ceil .|> Int)) # sort the found values so that sampled points in the same box upon projection into the 1:i dims are adjacent -# end -# end - """ generateLHSCDFs(n::Int, d::Int[; add_noise::Bool=false, rng::AbstractRNG=Random.GLOBAL_RNG, orthogonalize::Bool=true]) @@ -897,31 +769,34 @@ size(cdfs) ``` """ function generateLHSCDFs(n::Int, d::Int; add_noise::Bool=false, rng::AbstractRNG=Random.GLOBAL_RNG, orthogonalize::Bool=true) - cdfs = (Float64.(1:n) .- (add_noise ? rand(rng, Float64, n) : 0.5)) / n # permute below for each parameter separately + cdfs = (Float64.(1:n) .- (add_noise ? rand(rng, Float64, n) : 0.5)) / n #! permute below for each parameter separately k = n ^ (1 / d) |> round |> Int if orthogonalize && (n == k^d) - # then good to do the orthogonalization + #! then good to do the orthogonalization lhs_inds = orthogonalLHS(k, d) else - lhs_inds = reduce(hcat, [shuffle(rng, 1:n) for _ in 1:d]) # each shuffled index vector is added as a column + lhs_inds = reduce(hcat, [shuffle(rng, 1:n) for _ in 1:d]) #! each shuffled index vector is added as a column end return cdfs[lhs_inds] end -function addVariations(lhs_variation::LHSVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_ids::VariationIDs) +struct AddLHSVariationsResult <: AddVariationsResult + all_variation_ids::AbstractArray{VariationID} +end + +function addVariations(lhs_variation::LHSVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_id::VariationID) d = length(pv.sz) cdfs = generateLHSCDFs(lhs_variation.n, d; add_noise=lhs_variation.add_noise, rng=lhs_variation.rng, orthogonalize=lhs_variation.orthogonalize) - return [addLocationCDFVariations(inputs, pv, reference_variation_ids, location, cdfs) for location in [:config, :rulesets_collection, :ic_cell, :ic_ecm]] + all_location_variation_ids = [addLocationCDFVariations(loc, inputs, pv, reference_variation_id, cdfs) for loc in project_locations.varied] + return [([loc => loc_var_ids[i] for (loc, loc_var_ids) in zip(project_locations.varied, all_location_variation_ids)] |> VariationID) for i in eachindex(all_location_variation_ids[1])] |> AddLHSVariationsResult end -function addLocationCDFVariations(inputs::InputFolders, pv::ParsedVariations, reference_variation_ids::VariationIDs, location::Symbol, cdfs::AbstractMatrix{Float64}) - variations_field = Symbol("$(location)_variations") - if isempty(getfield(pv, variations_field)) - return fill(getfield(reference_variation_ids, location), size(cdfs, 1)) +function addLocationCDFVariations(location::Symbol, inputs::InputFolders, pv::ParsedVariations, reference_variation_id::VariationID, cdfs::AbstractMatrix{Float64}) + if isempty(pv[location].variations) + #! if the location is not varied, just return the reference variation id + return fill(reference_variation_id[location], size(cdfs, 1)) end - variation_indices_field = Symbol("$(location)_variation_indices") - fns = prepareVariationFunctions(location, inputs, pv, reference_variation_ids) - return cdfsToVariations(cdfs, getfield(pv, variations_field), fns..., getfield(pv, variation_indices_field)) + return cdfsToVariations(location, pv, inputs[location].id, reference_variation_id[location], cdfs) end ################## Sobol Sequence Sampling Functions ################## @@ -971,27 +846,27 @@ size(cdfs) """ function generateSobolCDFs(n::Int, d::Int; n_matrices::Int=1, T::Type=Float64, randomization::RandomizationMethod=NoRand(), skip_start::Union{Missing, Bool, Int}=missing, include_one::Union{Missing, Bool}=missing) s = Sobol.SobolSeq(d * n_matrices) - if ismissing(skip_start) # default to this - if ispow2(n + 1) # then n = 2^k - 1 - skip_start = 1 # skip the first point (0) + if ismissing(skip_start) #! default to this + if ispow2(n + 1) #! then n = 2^k - 1 + skip_start = 1 #! skip the first point (0) else - skip_start = false # don't skip the first point (0) - if ispow2(n - 1) # then n = 2^k + 1 - include_one |= ismissing(include_one) # unless otherwise specified, assume the +1 is to get the boundary 1 included as well - elseif ispow2(n) # then n = 2^k - nothing # including 0, grab the first 2^k points - else # not within 1 of a power of 2, just start at the beginning? + skip_start = false #! don't skip the first point (0) + if ispow2(n - 1) #! then n = 2^k + 1 + include_one |= ismissing(include_one) #! unless otherwise specified, assume the +1 is to get the boundary 1 included as well + elseif ispow2(n) #! then n = 2^k + nothing #! including 0, grab the first 2^k points + else #! not within 1 of a power of 2, just start at the beginning? nothing end end end - n_draws = n - (include_one===true) # if include_one is true, then we need to draw n-1 points and then append 1 to the end - if skip_start == false # false or 0 - cdfs = randomize(reduce(hcat, [zeros(T, n_matrices * d), [next!(s) for i in 1:n_draws-1]...]), randomization) # n_draws-1 because the SobolSeq already skips 0 + n_draws = n - (include_one===true) #! if include_one is true, then we need to draw n-1 points and then append 1 to the end + if skip_start == false #! false or 0 + cdfs = randomize(reduce(hcat, [zeros(T, n_matrices * d), [next!(s) for i in 1:n_draws-1]...]), randomization) #! n_draws-1 because the SobolSeq already skips 0 else cdfs = Matrix{T}(undef, d * n_matrices, n_draws) num_to_skip = skip_start === true ? ((1 << (floor(Int, log2(n_draws - 1)) + 1))) : skip_start - num_to_skip -= 1 # the SobolSeq already skips 0 + num_to_skip -= 1 #! the SobolSeq already skips 0 for _ in 1:num_to_skip Sobol.next!(s) end @@ -1000,7 +875,7 @@ function generateSobolCDFs(n::Int, d::Int; n_matrices::Int=1, T::Type=Float64, r end cdfs = randomize(cdfs, randomization) end - if include_one===true # cannot compare missing==true, but can make this comparison + if include_one===true #! cannot compare missing==true, but can make this comparison cdfs = hcat(cdfs, ones(T, d * n_matrices)) end return reshape(cdfs, (d, n_matrices, n)) @@ -1008,14 +883,20 @@ end generateSobolCDFs(sobol_variation::SobolVariation, d::Int) = generateSobolCDFs(sobol_variation.n, d; n_matrices=sobol_variation.n_matrices, randomization=sobol_variation.randomization, skip_start=sobol_variation.skip_start, include_one=sobol_variation.include_one) -function addVariations(sobol_variation::SobolVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_ids::VariationIDs) +struct AddSobolVariationsResult <: AddVariationsResult + all_variation_ids::AbstractArray{VariationID} + cdfs::Array{Float64, 3} +end + +function addVariations(sobol_variation::SobolVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_id::VariationID) d = length(pv.sz) - cdfs = generateSobolCDFs(sobol_variation, d) # cdfs is (d, sobol_variation.n_matrices, sobol_variation.n) - cdfs_reshaped = reshape(cdfs, (d, sobol_variation.n_matrices * sobol_variation.n)) # reshape to (d, sobol_variation.n_matrices * sobol_variation.n) so that each column is a sobol sample - cdfs_reshaped = cdfs_reshaped' # transpose so that each row is a sobol sample - cvis, rvis, ivis, evis = [addLocationCDFVariations(inputs, pv, reference_variation_ids, location, cdfs_reshaped) for location in [:config, :rulesets_collection, :ic_cell, :ic_ecm]] .|> - x -> reshape(x, (sobol_variation.n_matrices, sobol_variation.n))' # first, each sobol matrix variation indices goes into a row so that each column is the kth sample for each matrix; take the transpose so that each column corresponds to a matrix - return cvis, rvis, ivis, evis, cdfs + cdfs = generateSobolCDFs(sobol_variation, d) #! cdfs is (d, sobol_variation.n_matrices, sobol_variation.n) + cdfs_reshaped = reshape(cdfs, (d, sobol_variation.n_matrices * sobol_variation.n)) #! reshape to (d, sobol_variation.n_matrices * sobol_variation.n) so that each column is a sobol sample + cdfs_reshaped = cdfs_reshaped' #! transpose so that each row is a sobol sample + all_location_variation_ids = [addLocationCDFVariations(location, inputs, pv, reference_variation_id, cdfs_reshaped) for location in project_locations.varied] + all_variation_ids = [([loc => loc_var_ids[i] for (loc, loc_var_ids) in zip(project_locations.varied, all_location_variation_ids)] |> VariationID) for i in eachindex(all_location_variation_ids[1])] + all_variation_ids = reshape(all_variation_ids, (sobol_variation.n_matrices, sobol_variation.n)) |> permutedims + return AddSobolVariationsResult(all_variation_ids, cdfs) end ################## Random Balance Design Sampling Functions ################## @@ -1036,7 +917,7 @@ function generateRBDCDFs(rbd_variation::RBDVariation, d::Int) else skip_start = false end - S = generateSobolCDFs(rbd_variation.n, d; n_matrices=1, randomization=NoRand(), skip_start=skip_start, include_one=rbd_variation.pow2_diff==1) # pre_s is (d, n_matrices, rbd_variation.n) + S = generateSobolCDFs(rbd_variation.n, d; n_matrices=1, randomization=NoRand(), skip_start=skip_start, include_one=rbd_variation.pow2_diff==1) #! pre_s is (d, n_matrices, rbd_variation.n) S = reshape(S, d, rbd_variation.n)' cdfs = deepcopy(S) end @@ -1050,41 +931,6 @@ function generateRBDCDFs(rbd_variation::RBDVariation, d::Int) return cdfs, S end -# This function could be used to get a Sobol sequence for the RBD using all [-π, π] values -# function generateRBDCDFs(n::Int, d::Int; rng::AbstractRNG=Random.GLOBAL_RNG, use_sobol::Bool=true) -# if use_sobol -# println("Using Sobol sequence for RBD.") -# sobol_seq = SobolSeq(d) -# pre_s = zeros(Float64, (d, n-1)) -# for col in eachcol(pre_s) -# next!(sobol_seq, col) -# end -# S = -π .+ 2π * pre_s' -# if n == 1 -# cdfs = 0.5 .+ zeros(Float64, (1,d)) -# elseif n == 2 -# cdfs = 0.5 .+ zeros(Float64, (2, d)) -# else -# cdfs_all = range(0, stop=1, length=Int(n / 2) + 1) |> collect # all the cdf values that will be used -# cdfs_all = vcat(cdfs_all, reverse(cdfs_all[2:end-1])) # make them go 0->1->0 but not repeat the 0 and 1 -# cdfs_all = circshift(cdfs_all, Int(n / 4)) # shift so that the first value is 0.5 and we begin by decreasing to 0 -# # now use the values from cdfs_all to create the cdfs for each parameter based on the pre_s values -# cdfs = zeros(Float64, (n, d)) -# for (pre_s_row, cdfs_col) in zip(eachrow(pre_s), eachcol(cdfs)) -# ord = sortperm(pre_s_row) |> invperm # this ranks all of the pre_s values for this parameter (remember, the first one at 0 is omitted by SobolSeq) -# cdfs_col .= [0.5; cdfs_all[ord.+1]] # the first one (0.5) comes from the omitted 0.0 to start the SobolSeq; the rest are the cdfs based on the pre_s values (the +1 is because ord comes from pre_s which omits the first element, but this first element is included in cdfs_all) -# end -# end -# # cdfs = 0.5 .+ [zeros(Float64, (1,d)); asin.(sin.(S)) ./ π] # this is the simpler line to do this, but floating point arithmetic introduces some differences that should not exist when using a n=2^k Sobol sequence -# else -# pre_s = range(-π, stop = π, length = n+1) |> collect -# pop!(pre_s) -# S = [s0[randperm(rng, n)] for _ in 1:d] |> x->reduce(hcat, x) -# cdfs = 0.5 .+ asin.(sin.(S)) ./ π -# end -# return cdfs, S -# end - function createSortedRBDMatrix(variation_ids::Vector{Int}, S::AbstractMatrix{Float64}) variations_matrix = Array{Int}(undef, size(S)) for (vm_col, s_col) in zip(eachcol(variations_matrix), eachcol(S)) @@ -1093,23 +939,32 @@ function createSortedRBDMatrix(variation_ids::Vector{Int}, S::AbstractMatrix{Flo return variations_matrix end -function addVariations(rbd_variation::RBDVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_ids::VariationIDs) +struct AddRBDVariationsResult <: AddVariationsResult + all_variation_ids::AbstractArray{VariationID} + location_variation_ids_dict::Dict{Symbol, Matrix{Int}} +end + +function addVariations(rbd_variation::RBDVariation, inputs::InputFolders, pv::ParsedVariations, reference_variation_id::VariationID) d = length(pv.sz) cdfs, S = generateRBDCDFs(rbd_variation, d) - config_vids, rules_vids, ic_cell_vids, ic_ecm_vids = [addLocationCDFVariations(inputs, pv, reference_variation_ids, location, cdfs) for location in [:config, :rulesets_collection, :ic_cell, :ic_ecm]] - config_var_matrix, rules_var_matrix, ic_cell_var_matrix, ic_ecm_var_matrix = [createSortedRBDMatrix(vids, S) for vids in [config_vids, rules_vids, ic_cell_vids, ic_ecm_vids]] - return config_vids, rules_vids, ic_cell_vids, ic_ecm_vids, config_var_matrix, rules_var_matrix, ic_cell_var_matrix, ic_ecm_var_matrix + all_location_variation_ids = [addLocationCDFVariations(location, inputs, pv, reference_variation_id, cdfs) for location in project_locations.varied] + variation_matrices = [createSortedRBDMatrix(vids, S) for vids in all_location_variation_ids] + all_variation_ids = [([loc => loc_var_ids[i] for (loc, loc_var_ids) in zip(project_locations.varied, all_location_variation_ids)] |> VariationID) for i in eachindex(all_location_variation_ids[1])] + location_variation_ids_dict = [loc => variation_matrices[i] for (i, loc) in enumerate(project_locations.varied)] |> Dict + return AddRBDVariationsResult(all_variation_ids, location_variation_ids_dict) end ################## Sampling Helper Functions ################## -function cdfsToVariations(cdfs::AbstractMatrix{Float64}, evs::Vector{<:ElementaryVariation}, addColumnsByPathsFn::Function, prepareAddNewFn::Function, addRowFn::Function, ev_dims::AbstractVector{Int}=1:length(evs)) - static_values, table_features = setUpColumns(evs, addColumnsByPathsFn, prepareAddNewFn) +function cdfsToVariations(location::Symbol, pv::ParsedVariations, folder_id::Int, reference_variation_id::Int, cdfs::AbstractMatrix{Float64}) + evs = pv[location].variations + static_values, table_features = setUpColumns(location, evs, folder_id, reference_variation_id) n = size(cdfs, 1) new_values = [] + ev_dims = pv[location].indices for (ev, col_ind) in zip(evs, ev_dims) - new_value = _values(ev, cdfs[:,col_ind]) # ok, all the new values for the given parameter + new_value = _values(ev, cdfs[:,col_ind]) #! ok, all the new values for the given parameter push!(new_values, new_value) end @@ -1117,35 +972,7 @@ function cdfsToVariations(cdfs::AbstractMatrix{Float64}, evs::Vector{<:Elementar for i in 1:n varied_values = [new_value[i] for new_value in new_values] .|> string |> x -> join("\"" .* x .* "\"", ",") - variation_ids[i] = addRowFn(table_features, static_values, varied_values) + variation_ids[i] = addVariationRow(location, folder_id, table_features, static_values, varied_values) end return variation_ids -end - -function prepareConfigVariationFunctions(config_id::Int, evs::Vector{<:ElementaryVariation}; reference_config_variation_id=0) - addColumnsByPathsFn = (paths) -> addConfigVariationColumns(config_id, paths, dataType.(evs)) - prepareAddNewFn = (args...) -> prepareAddNewConfigVariations(config_id, args...; reference_config_variation_id=reference_config_variation_id) - addRowFn = (args...) -> addConfigVariationRow(config_id, args...) - return addColumnsByPathsFn, prepareAddNewFn, addRowFn -end - -function prepareRulesetsVariationFunctions(rulesets_collection_id::Int; reference_rulesets_variation_id::Int=0) - addColumnsByPathsFn = (paths) -> addRulesetsVariationsColumns(rulesets_collection_id, paths) - prepareAddNewFn = (args...) -> prepareAddNewRulesetsVariations(rulesets_collection_id, args...; reference_rulesets_variation_id=reference_rulesets_variation_id) - addRowFn = (args...) -> addRulesetsVariationRow(rulesets_collection_id, args...) - return addColumnsByPathsFn, prepareAddNewFn, addRowFn -end - -function prepareICCellVariationFunctions(ic_cell_id::Int; reference_ic_cell_variation_id::Int=0) - addColumnsByPathsFn = (paths) -> addICCellVariationColumns(ic_cell_id, paths) - prepareAddNewFn = (args...) -> prepareAddNewICCellVariations(ic_cell_id, args...; reference_ic_cell_variation_id=reference_ic_cell_variation_id) - addRowFn = (args...) -> addICCellVariationRow(ic_cell_id, args...) - return addColumnsByPathsFn, prepareAddNewFn, addRowFn -end - -function prepareICECMVariationFunctions(ic_ecm_id::Int; reference_ic_ecm_variation_id::Int=0) - addColumnsByPathsFn = (paths) -> addICECMVariationColumns(ic_ecm_id, paths) - prepareAddNewFn = (args...) -> prepareAddNewICECMVariations(ic_ecm_id, args...; reference_ic_ecm_variation_id=reference_ic_ecm_variation_id) - addRowFn = (args...) -> addICECMVariationRow(ic_ecm_id, args...) - return addColumnsByPathsFn, prepareAddNewFn, addRowFn end \ No newline at end of file diff --git a/src/VCTVersion.jl b/src/VCTVersion.jl index 72fb6080..3eefc60a 100644 --- a/src/VCTVersion.jl +++ b/src/VCTVersion.jl @@ -12,7 +12,7 @@ function pcvctVersion() end function pcvctDBVersion(is_new_db::Bool) - # check if versions table exists + #! check if versions table exists table_name = "pcvct_version" versions_exists = DBInterface.execute(db, "SELECT name FROM sqlite_master WHERE type='table' AND name='$(table_name)';") |> DataFrame |> x -> (length(x.name)==1) if !versions_exists diff --git a/test.txt b/test.txt deleted file mode 100644 index 1c943a98..00000000 --- a/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -a -b -c \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 030cdd8e..3dcaded3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,36 +1,41 @@ using pcvct, Test -include("./test-project/VCT/PrintHelpers.jl") +include("./test-scripts/PrintHelpers.jl") @testset "pcvct.jl" begin - # Write your tests here. - include("./test-project/VCT/CreateProjectTests.jl") - - include("./test-project/VCT/GenerateData.jl") # this file is created by CreateProjectTests.jl - - include("./test-project/VCT/RunnerTests.jl") - include("./test-project/VCT/UserAPITests.jl") - include("./test-project/VCT/ImportTests.jl") - include("./test-project/VCT/PrunerTests.jl") - include("./test-project/VCT/ConfigurationTests.jl") - include("./test-project/VCT/ICCellTests.jl") - include("./test-project/VCT/ICECMTests.jl") - include("./test-project/VCT/ExportTests.jl") - include("./test-project/VCT/SensitivityTests.jl") - include("./test-project/VCT/DatabaseTests.jl") - include("./test-project/VCT/ClassesTests.jl") - include("./test-project/VCT/LoaderTests.jl") + #! Write your tests here. + include("./test-scripts/CreateProjectTests.jl") + + include("./VCT/GenerateData.jl") #! this file is created by CreateProjectTests.jl + + include("./test-scripts/ProjectConfigurationTests.jl") + + include("./test-scripts/RunnerTests.jl") + include("./test-scripts/UserAPITests.jl") + include("./test-scripts/ImportTests.jl") + include("./test-scripts/PrunerTests.jl") + include("./test-scripts/ConfigurationTests.jl") + + include("./test-scripts/IntracellularTests.jl") + include("./test-scripts/ICCellTests.jl") + include("./test-scripts/ICECMTests.jl") + + include("./test-scripts/ExportTests.jl") + include("./test-scripts/SensitivityTests.jl") + include("./test-scripts/DatabaseTests.jl") + include("./test-scripts/ClassesTests.jl") + include("./test-scripts/LoaderTests.jl") if Sys.isapple() - include("./test-project/VCT/MovieTests.jl") + include("./test-scripts/MovieTests.jl") end - include("./test-project/VCT/PopulationTests.jl") - include("./test-project/VCT/SubstrateTests.jl") - include("./test-project/VCT/VariationsTests.jl") - include("./test-project/VCT/HPCTests.jl") - include("./test-project/VCT/ModuleTests.jl") - include("./test-project/VCT/PhysiCellVersionTests.jl") - include("./test-project/VCT/PhysiCellStudioTests.jl") - - # probably want this one last (it deletes/resets things) - include("./test-project/VCT/DeletionTests.jl") -end + include("./test-scripts/PopulationTests.jl") + include("./test-scripts/SubstrateTests.jl") + include("./test-scripts/VariationsTests.jl") + include("./test-scripts/HPCTests.jl") + include("./test-scripts/ModuleTests.jl") + include("./test-scripts/PhysiCellVersionTests.jl") + include("./test-scripts/PhysiCellStudioTests.jl") + + #! probably want this one last (it deletes/resets things) + include("./test-scripts/DeletionTests.jl") +end \ No newline at end of file diff --git a/test/test-project/VCT/CreateProjectTests.jl b/test/test-project/VCT/CreateProjectTests.jl deleted file mode 100644 index 45c97d44..00000000 --- a/test/test-project/VCT/CreateProjectTests.jl +++ /dev/null @@ -1,8 +0,0 @@ -using LightXML -filename = @__FILE__ -filename = split(filename, "/") |> last -str = "TESTING WITH $(filename)" -hashBorderPrint(str) - -project_dir = "./test-project" -createProject(project_dir) \ No newline at end of file diff --git a/test/test-project/VCT/ClassesTests.jl b/test/test-scripts/ClassesTests.jl similarity index 68% rename from test/test-project/VCT/ClassesTests.jl rename to test/test-scripts/ClassesTests.jl index afad32c0..b029a9c1 100644 --- a/test/test-project/VCT/ClassesTests.jl +++ b/test/test-scripts/ClassesTests.jl @@ -24,9 +24,9 @@ rulesets_collection_variation_ids = [1, 1] ic_cell_variation_ids = [0, 0] sampling = Sampling(inputs; n_replicates=n_replicates, - config_variation_ids=config_variation_ids, - rulesets_collection_variation_ids=rulesets_collection_variation_ids, - ic_cell_variation_ids=ic_cell_variation_ids + location_variation_ids=Dict{Symbol,Union{Integer,AbstractArray{<:Integer}}}(:config => config_variation_ids, + :rulesets_collection => rulesets_collection_variation_ids, + :ic_cell => ic_cell_variation_ids) ) @test sampling isa Sampling @@ -46,4 +46,4 @@ trial = Trial(samplings) # misc tests inputs = InputFolders(; config="0_template", custom_code="0_template") simulation = Simulation(Monad(1)) -sampling = Sampling(inputs; config_variation_ids=0, rulesets_collection_variation_ids=-1, ic_cell_variation_ids=-1) \ No newline at end of file +sampling = Sampling(inputs; location_variation_ids=Dict{Symbol,Union{Integer,AbstractArray{<:Integer}}}(:config => 0, :rulesets_collection => -1, :ic_cell => -1)) \ No newline at end of file diff --git a/test/test-project/VCT/ConfigurationTests.jl b/test/test-scripts/ConfigurationTests.jl similarity index 88% rename from test/test-project/VCT/ConfigurationTests.jl rename to test/test-scripts/ConfigurationTests.jl index f2ad6530..42b8f410 100644 --- a/test/test-project/VCT/ConfigurationTests.jl +++ b/test/test-scripts/ConfigurationTests.jl @@ -10,7 +10,7 @@ inputs = InputFolders(config_folder, custom_code_folder; rulesets_collection=rul n_replicates = 2 -path_to_xml = joinpath("test-project", "data", "inputs", "configs", config_folder, "PhysiCell_settings.xml") +path_to_xml = joinpath("data", "inputs", "configs", config_folder, "PhysiCell_settings.xml") cell_type = "default" @@ -32,7 +32,7 @@ element_paths = [ xml_doc = pcvct.openXML(path_to_xml) for ep in element_paths ce = pcvct.retrieveElement(xml_doc, ep[2]; required=false) - @test !isnothing(ce) # make sure the element was found + @test !isnothing(ce) #! make sure the element was found end pcvct.closeXML(xml_doc) @@ -129,4 +129,10 @@ discrete_variations = DiscreteVariation[] addAttackRateVariationDimension!(discrete_variations, cell_type, cell_type, [0.1]) out = run(reference_monad, discrete_variations; n_replicates=n_replicates) -@test out.n_success == length(out.trial) \ No newline at end of file +@test out.n_success == length(out.trial) + +@test isnothing(pcvct.prepareVariedInputFolder(:custom_code, Sampling(1))) #! returns nothing because custom codes is not varied +@test_throws ArgumentError pcvct.shortLocationVariationID(:not_a_location) +@test_nowarn pcvct.shortVariationName(:intracellular, "not_a_var") +@test_nowarn pcvct.shortVariationName(:intracellular, "intracellular_variation_id") +@test_throws ArgumentError pcvct.shortVariationName(:not_a_location, "not_a_var") \ No newline at end of file diff --git a/test/test-scripts/CreateProjectTests.jl b/test/test-scripts/CreateProjectTests.jl new file mode 100644 index 00000000..c36c4b5f --- /dev/null +++ b/test/test-scripts/CreateProjectTests.jl @@ -0,0 +1,12 @@ +using LightXML +filename = @__FILE__ +filename = split(filename, "/") |> last +str = "TESTING WITH $(filename)" +hashBorderPrint(str) + +project_dir = "." +createProject(project_dir) + +# tests for coverage +@test pcvct.icFilename("ecms") == "ecm.csv" +@test pcvct.icFilename("dcs") == "dcs.csv" \ No newline at end of file diff --git a/test/test-project/VCT/DatabaseTests.jl b/test/test-scripts/DatabaseTests.jl similarity index 59% rename from test/test-project/VCT/DatabaseTests.jl rename to test/test-scripts/DatabaseTests.jl index 5f6349d5..8f100d44 100644 --- a/test/test-project/VCT/DatabaseTests.jl +++ b/test/test-scripts/DatabaseTests.jl @@ -8,10 +8,6 @@ hashBorderPrint(str) simulationsTable() simulation_ids = 1:5 printSimulationsTable(simulation_ids) -pcvct.printConfigVariationsTable(simulation_ids) -pcvct.printRulesetsVariationsTable(simulation_ids) -pcvct.printICCellVariationsTable(simulation_ids) -pcvct.printICECMVariationsTable(simulation_ids) # test required folders config_src_folder = joinpath(pcvct.data_dir, "inputs", "configs") @@ -41,25 +37,25 @@ schema_without_primary_id = "" @test_throws ArgumentError pcvct.icFilename("ecm") # misc tests -config_db = pcvct.configDB(Simulation(1)) +config_db = pcvct.variationsDatabase(:config, Simulation(1)) @test config_db isa SQLite.DB -ic_cell_db = pcvct.icCellDB(Simulation(1)) +ic_cell_db = pcvct.variationsDatabase(:ic_cell, Simulation(1)) @test ic_cell_db isa Missing -ic_ecm_db = pcvct.icECMDB(Simulation(1)) +ic_ecm_db = pcvct.variationsDatabase(:ic_ecm, Simulation(1)) @test ic_ecm_db isa Nothing -pcvct.configVariationIDs(Simulation(1)) -pcvct.configVariationIDs(Sampling(1)) -pcvct.rulesetsVariationIDs(Simulation(1)) -pcvct.rulesetsVariationIDs(Sampling(1)) -pcvct.icCellVariationIDs(Simulation(1)) -pcvct.icCellVariationIDs(Sampling(1)) -pcvct.icECMVariationIDs(Simulation(1)) -pcvct.icECMVariationIDs(Sampling(1)) - -pcvct.configVariationsTable(Sampling(1); remove_constants=true) -pcvct.rulesetsVariationsTable(Sampling(1); remove_constants=true) -pcvct.icCellVariationsTable(Sampling(1); remove_constants=true) -pcvct.icECMVariationsTable(Sampling(1); remove_constants=true) \ No newline at end of file +pcvct.variationIDs(:config, Simulation(1)) +pcvct.variationIDs(:config, Sampling(1)) +pcvct.variationIDs(:rulesets_collection, Simulation(1)) +pcvct.variationIDs(:rulesets_collection, Sampling(1)) +pcvct.variationIDs(:ic_cell, Simulation(1)) +pcvct.variationIDs(:ic_cell, Sampling(1)) +pcvct.variationIDs(:ic_ecm, Simulation(1)) +pcvct.variationIDs(:ic_ecm, Sampling(1)) + +pcvct.variationsTable(:config, Sampling(1); remove_constants=true) +pcvct.variationsTable(:rulesets_collection, Sampling(1); remove_constants=true) +pcvct.variationsTable(:ic_cell, Sampling(1); remove_constants=true) +pcvct.variationsTable(:ic_ecm, Sampling(1); remove_constants=true) \ No newline at end of file diff --git a/test/test-project/VCT/DeletionTests.jl b/test/test-scripts/DeletionTests.jl similarity index 85% rename from test/test-project/VCT/DeletionTests.jl rename to test/test-scripts/DeletionTests.jl index e8bb55ed..4128adcd 100644 --- a/test/test-project/VCT/DeletionTests.jl +++ b/test/test-scripts/DeletionTests.jl @@ -19,7 +19,7 @@ pcvct.deleteSampling(1) pcvct.deleteTrial(1) input_buffer = IOBuffer("n") -old_stdin = stdin # Save the original stdin +old_stdin = stdin #! Save the original stdin Base.stdin = input_buffer deleteSimulationsByStatus(["Queued", "Failed"]) Base.stdin = old_stdin @@ -30,19 +30,19 @@ pcvct.deleteAllSimulations() resetDatabase(; force_reset=true) input_buffer = IOBuffer("y") -old_stdin = stdin # Save the original stdin +old_stdin = stdin #! Save the original stdin Base.stdin = input_buffer resetDatabase() Base.stdin = old_stdin input_buffer = IOBuffer("n\nn\n") -old_stdin = stdin # Save the original stdin +old_stdin = stdin #! Save the original stdin Base.stdin = input_buffer @test_throws ErrorException resetDatabase() Base.stdin = old_stdin input_buffer = IOBuffer("n\ny\n") -old_stdin = stdin # Save the original stdin +old_stdin = stdin #! Save the original stdin Base.stdin = input_buffer resetDatabase() Base.stdin = old_stdin \ No newline at end of file diff --git a/test/test-project/VCT/ExportTests.jl b/test/test-scripts/ExportTests.jl similarity index 100% rename from test/test-project/VCT/ExportTests.jl rename to test/test-scripts/ExportTests.jl diff --git a/test/test-project/VCT/HPCTests.jl b/test/test-scripts/HPCTests.jl similarity index 94% rename from test/test-project/VCT/HPCTests.jl rename to test/test-scripts/HPCTests.jl index 56aed781..c99b936a 100644 --- a/test/test-project/VCT/HPCTests.jl +++ b/test/test-scripts/HPCTests.jl @@ -11,7 +11,7 @@ simulation = Simulation(1) monad = Monad(simulation) cmd_local = pcvct.prepareSimulationCommand(simulation, monad.id, true, false) -cmd_local_str = string(cmd_local) +cmd_local_str = string(Cmd(cmd_local.exec)) cmd_local_str = strip(cmd_local_str, '`') cmd_hpc = pcvct.prepareHPCCommand(cmd_local, simulation.id) @@ -36,7 +36,7 @@ end_of_day = DateTime(Dates.year(current_time), Dates.month(current_time), Dates threshold_time = end_of_day - Second(threshold_seconds) is_about_to_be_next_day = current_time >= threshold_time if is_about_to_be_next_day - # if it's about to be the next day, wait until it is the next day + #! if it's about to be the next day, wait until it is the next day sleep(threshold_seconds + 1) end path_to_dummy_file = joinpath(pcvct.data_dir, "test.txt") diff --git a/test/test-project/VCT/ICCellTests.jl b/test/test-scripts/ICCellTests.jl similarity index 84% rename from test/test-project/VCT/ICCellTests.jl rename to test/test-scripts/ICCellTests.jl index a3b3affc..feed842d 100644 --- a/test/test-project/VCT/ICCellTests.jl +++ b/test/test-scripts/ICCellTests.jl @@ -32,7 +32,7 @@ hashBorderPrint("SUCCESSFULLY CREATED SAMPLING WITH IC CELL VARIATION!") @test out.n_success == length(out.trial) -simulation_with_ic_cell_xml_id = getSimulationIDs(out.trial)[1] # used in ExportTests.jl +simulation_with_ic_cell_xml_id = getSimulationIDs(out.trial)[1] #! used in ExportTests.jl hashBorderPrint("SUCCESSFULLY RAN SAMPLING WITH IC CELL VARIATION!") @@ -43,10 +43,12 @@ push!(discrete_variations, DiscreteVariation(xml_path, 300.0)) out_fail = run(Monad(out.trial.monad_ids[1]), discrete_variations; n_replicates=n_replicates) @test out_fail.n_success == 0 -pcvct.createICCellXMLTemplate("2_xml") -@test isdir(joinpath(pcvct.data_dir, "inputs", "ics", "cells", "2_xml")) +ic_cell_folder = pcvct.createICCellXMLTemplate("2_xml") +@test ic_cell_folder == "2_xml" +@test isdir(pcvct.locationPath(:ic_cell, ic_cell_folder)) +@test_nowarn pcvct.createICECMXMLTemplate(ic_cell_folder) xml_path = ["cell_patches:name:default", "patch_collection:type:disc", "patch:ID:1", "x0"] -dv1 = DiscreteVariation(xml_path, -1e6) # outside the domain so none can be placed +dv1 = DiscreteVariation(xml_path, -1e6) #! outside the domain so none can be placed out = run(inputs, dv1) @test out.n_success == 0 \ No newline at end of file diff --git a/test/test-project/VCT/ICECMTests.jl b/test/test-scripts/ICECMTests.jl similarity index 90% rename from test/test-project/VCT/ICECMTests.jl rename to test/test-scripts/ICECMTests.jl index 460d260c..b1f4d7fd 100644 --- a/test/test-project/VCT/ICECMTests.jl +++ b/test/test-scripts/ICECMTests.jl @@ -6,7 +6,8 @@ str = "TESTING WITH $(filename)" hashBorderPrint(str) ic_ecm_folder = "1_xml" -pcvct.createICECMXMLTemplate(ic_ecm_folder) +ic_ecm_folder = pcvct.createICECMXMLTemplate(ic_ecm_folder) +@test_nowarn pcvct.createICECMXMLTemplate(ic_ecm_folder) config_folder = "template-ecm" custom_code_folder = "template-ecm" diff --git a/test/test-project/VCT/ImportTests.jl b/test/test-scripts/ImportTests.jl similarity index 69% rename from test/test-project/VCT/ImportTests.jl rename to test/test-scripts/ImportTests.jl index 64b862ce..205fad02 100644 --- a/test/test-project/VCT/ImportTests.jl +++ b/test/test-scripts/ImportTests.jl @@ -6,7 +6,7 @@ hashBorderPrint(str) config_folder = "immune_sample" custom_code_folder = rulesets_collection_folder = ic_cell_folder = "immune_function" -path_to_project = "./test-project/PhysiCell/sample_projects/immune_function" +path_to_project = joinpath("PhysiCell", "sample_projects", "immune_function") dest = Dict() dest["config"] = config_folder @@ -31,25 +31,25 @@ out = run(sampling; force_recompile=false) success = importProject(path_to_project, src, dest) @test success -@test isdir(joinpath(pcvct.data_dir, "inputs", "configs", "immune_sample_1")) +@test isdir(pcvct.locationPath(:config, "immune_sample_1")) src["rules"] = "not_rules.csv" success = importProject(path_to_project, src, dest) @test !success -path_to_fake_project = joinpath("test-project", "PhysiCell", "sample_projects", "not_a_project") +path_to_fake_project = joinpath("PhysiCell", "sample_projects", "not_a_project") success = importProject(path_to_fake_project) @test !success -path_to_project = joinpath("test-project", "PhysiCell", "sample_projects", "template") +path_to_project = joinpath("PhysiCell", "sample_projects", "template") success = importProject(path_to_project) @test success # intentionally sabotage the import -path_to_bad_project = joinpath("test-project", "PhysiCell", "sample_projects", "bad_template") -cp(path_to_project, "./test-project/PhysiCell/sample_projects/bad_template") +path_to_bad_project = joinpath("PhysiCell", "sample_projects", "bad_template") +cp(path_to_project, joinpath("PhysiCell", "sample_projects", "bad_template")) -path_to_main = joinpath("test-project", "PhysiCell", "sample_projects", "bad_template", "main.cpp") +path_to_main = joinpath("PhysiCell", "sample_projects", "bad_template", "main.cpp") lines = readlines(path_to_main) idx = findfirst(x->contains(x, "argument_parser"), lines) lines[idx] = " //no longer parsing because this is now a bad project" @@ -61,7 +61,7 @@ open(path_to_main, "w") do f end end -path_to_custom_cpp = joinpath("test-project", "PhysiCell", "sample_projects", "bad_template", "custom_modules", "custom.cpp") +path_to_custom_cpp = joinpath("PhysiCell", "sample_projects", "bad_template", "custom_modules", "custom.cpp") lines = readlines(path_to_custom_cpp) idx = findfirst(x->contains(x, "load_initial_cells"), lines) lines[idx] = " //no longer loading initial cells because this is now a bad project" @@ -75,11 +75,16 @@ success = importProject(path_to_bad_project) @test !success # import the ecm project to actually use -path_to_project = joinpath("test-project", "PhysiCell", "sample_projects", "template-ecm") +path_to_project = joinpath("PhysiCell", "sample_projects", "template-ecm") success = importProject(path_to_project) @test success # import the dirichlet conditions from file project -path_to_project = joinpath("test-project", "PhysiCell", "sample_projects", "dirichlet_from_file") +path_to_project = joinpath("PhysiCell", "sample_projects", "dirichlet_from_file") +success = importProject(path_to_project) +@test success + +# import the combined sbml project +path_to_project = joinpath("PhysiCell", "sample_projects_intracellular", "combined", "template-combined") success = importProject(path_to_project) @test success \ No newline at end of file diff --git a/test/test-scripts/IntracellularTests.jl b/test/test-scripts/IntracellularTests.jl new file mode 100644 index 00000000..a08aa8a3 --- /dev/null +++ b/test/test-scripts/IntracellularTests.jl @@ -0,0 +1,23 @@ +filename = @__FILE__ +filename = split(filename, "/") |> last +str = "TESTING WITH $(filename)" +hashBorderPrint(str) + +config = "template-combined" +custom_code = "template-combined" +cell_to_components_dict = Dict("default" => pcvct.PhysiCellComponent("roadrunner", "Toy_Metabolic_Model.xml")) +intracellular = assembleIntracellular!(cell_to_components_dict; name="template-combined") +inputs = InputFolders(config, custom_code; intracellular=intracellular) + +dv1 = DiscreteVariation(["overall", "max_time"], 12.0) +xml_path = ["intracellulars", "intracellular:ID:1", "sbml", "model", "listOfReactions", "reaction:id:Aerobic", "kineticLaw", "math", "apply", "apply", "cn"] +dv2 = DiscreteVariation(xml_path, [5, 6]) +out = run(inputs, [dv1, dv2]) +@test out.n_success == 2 + +macros_lines = pcvct.readMacrosFile(out.trial) +@test "ADDON_ROADRUNNER" in macros_lines + +#! more test coverage +intracellular = assembleIntracellular!(cell_to_components_dict; name="template-combined") +@test intracellular == "template-combined" #! should not need to make a new folder, the assembly.toml file should show they match \ No newline at end of file diff --git a/test/test-project/VCT/LoaderTests.jl b/test/test-scripts/LoaderTests.jl similarity index 87% rename from test/test-project/VCT/LoaderTests.jl rename to test/test-scripts/LoaderTests.jl index f710f0d0..3752edb9 100644 --- a/test/test-project/VCT/LoaderTests.jl +++ b/test/test-scripts/LoaderTests.jl @@ -15,7 +15,7 @@ push!(discrete_variations, DiscreteVariation(["save","SVG","interval"], 6.0)) out = run(inputs, discrete_variations; use_previous=false) @test out.trial isa Simulation -sequence = pcvct.PhysiCellSequence(joinpath("test-project", "data", "outputs", "simulations", string(out.trial.id), "output"); include_cells=true, include_substrates=true) +sequence = pcvct.PhysiCellSequence(joinpath("data", "outputs", "simulations", string(out.trial.id), "output"); include_cells=true, include_substrates=true) seq_dict = getCellDataSequence(sequence, "elapsed_time_in_phase"; include_dead=true) diff --git a/test/test-project/VCT/ModuleTests.jl b/test/test-scripts/ModuleTests.jl similarity index 100% rename from test/test-project/VCT/ModuleTests.jl rename to test/test-scripts/ModuleTests.jl diff --git a/test/test-project/VCT/MovieTests.jl b/test/test-scripts/MovieTests.jl similarity index 100% rename from test/test-project/VCT/MovieTests.jl rename to test/test-scripts/MovieTests.jl diff --git a/test/test-project/VCT/PhysiCellStudioTests.jl b/test/test-scripts/PhysiCellStudioTests.jl similarity index 100% rename from test/test-project/VCT/PhysiCellStudioTests.jl rename to test/test-scripts/PhysiCellStudioTests.jl diff --git a/test/test-project/VCT/PhysiCellVersionTests.jl b/test/test-scripts/PhysiCellVersionTests.jl similarity index 81% rename from test/test-project/VCT/PhysiCellVersionTests.jl rename to test/test-scripts/PhysiCellVersionTests.jl index 7349629c..b45ca0a2 100644 --- a/test/test-project/VCT/PhysiCellVersionTests.jl +++ b/test/test-scripts/PhysiCellVersionTests.jl @@ -6,7 +6,7 @@ hashBorderPrint(str) @test pcvct.physicellVersion() == readchomp(joinpath(pcvct.physicell_dir, "VERSION.txt")) @test pcvct.physicellVersion(Simulation(1)) == readchomp(joinpath(pcvct.physicell_dir, "VERSION.txt")) -path_to_file = "./test-project/PhysiCell/Makefile" +path_to_file = joinpath("PhysiCell", "Makefile") lines = readlines(path_to_file) lines[1] *= " " @@ -17,7 +17,7 @@ open(path_to_file, "w") do f end @test !pcvct.gitDirectoryIsClean(pcvct.physicell_dir) -initializeVCT(pcvct.physicell_dir, pcvct.data_dir) +initializeModelManager(pcvct.physicell_dir, pcvct.data_dir) lines[1] = lines[1][1:end-1] open(path_to_file, "w") do f @@ -36,6 +36,6 @@ project_dir = "./test-project-download" createProject(project_dir; clone_physicell=false) data_dir = joinpath(project_dir, "data") physicell_dir = joinpath(project_dir, "PhysiCell") -initializeVCT(physicell_dir, data_dir) +initializeModelManager(physicell_dir, data_dir) -initializeVCT(original_physicell_dir, original_data_dir) \ No newline at end of file +initializeModelManager(original_physicell_dir, original_data_dir) \ No newline at end of file diff --git a/test/test-project/VCT/PopulationTests.jl b/test/test-scripts/PopulationTests.jl similarity index 100% rename from test/test-project/VCT/PopulationTests.jl rename to test/test-scripts/PopulationTests.jl diff --git a/test/test-project/VCT/PrintHelpers.jl b/test/test-scripts/PrintHelpers.jl similarity index 82% rename from test/test-project/VCT/PrintHelpers.jl rename to test/test-scripts/PrintHelpers.jl index 22a07163..9cc10eb6 100644 --- a/test/test-project/VCT/PrintHelpers.jl +++ b/test/test-scripts/PrintHelpers.jl @@ -7,14 +7,14 @@ function printBetweenHashes(s::String) s_split = split(s) s_length = length(s) s_lengths = [length(x) for x in s_split] - sub_lengths = cumsum(s_lengths .+ (0:(length(s_split)-1))) # length of string after combining through the ith token, joining with spaces + sub_lengths = cumsum(s_lengths .+ (0:(length(s_split)-1))) #! length of string after combining through the ith token, joining with spaces I = findlast(2 .* sub_lengths .< s_length) if isnothing(I) - # then the first token is too long - s_split = [s_split[1][1:15]; s_split[1][16:end]; s_split[2:end]] # force the split 15 chars into the first token + #! then the first token is too long + s_split = [s_split[1][1:15]; s_split[1][16:end]; s_split[2:end]] #! force the split 15 chars into the first token I = 1 end - + S1 = join(s_split[1:I], " ") |> printBetweenHashes S2 = join(s_split[I+1:end], " ") |> printBetweenHashes return [S1; S2] diff --git a/test/test-scripts/ProjectConfigurationTests.jl b/test/test-scripts/ProjectConfigurationTests.jl new file mode 100644 index 00000000..b5cb57e8 --- /dev/null +++ b/test/test-scripts/ProjectConfigurationTests.jl @@ -0,0 +1,10 @@ +filename = @__FILE__ +filename = split(filename, "/") |> last +str = "TESTING WITH $(filename)" +hashBorderPrint(str) + +@test_throws ArgumentError pcvct.sanitizePathElements("..") +@test_throws ArgumentError pcvct.sanitizePathElements("~") +@test_throws ArgumentError pcvct.sanitizePathElements("/looks/like/absolute/path") + +@test_throws ErrorException pcvct.folderIsVaried(:config, "not-a-config-folder") \ No newline at end of file diff --git a/test/test-project/VCT/PrunerTests.jl b/test/test-scripts/PrunerTests.jl similarity index 100% rename from test/test-project/VCT/PrunerTests.jl rename to test/test-scripts/PrunerTests.jl diff --git a/test/test-project/VCT/RunnerTests.jl b/test/test-scripts/RunnerTests.jl similarity index 88% rename from test/test-project/VCT/RunnerTests.jl rename to test/test-scripts/RunnerTests.jl index 649fa11d..30a74943 100644 --- a/test/test-project/VCT/RunnerTests.jl +++ b/test/test-scripts/RunnerTests.jl @@ -31,7 +31,7 @@ out2 = run(inputs, discrete_variations) @test out.trial.id == out2.trial.id @test out.trial.inputs == out2.trial.inputs -@test out.trial.variation_ids == out2.trial.variation_ids +@test out.trial.variation_id == out2.trial.variation_id hashBorderPrint("SIMULATION SUCCESSFULLY RUN!") @@ -71,7 +71,7 @@ out2 = run(simulation, discrete_variations; n_replicates=n_replicates, force_rec hashBorderPrint("SUCCESSFULLY `run` WITHOUT CREATING SAMPLING!") -n_simulations = length(sampling) # number of simulations recorded (in .csvs) for this sampling +n_simulations = length(sampling) #! number of simulations recorded (in .csvs) for this sampling n_expected_sims = n_replicates for discrete_variation in discrete_variations global n_expected_sims *= length(discrete_variation) @@ -79,9 +79,9 @@ end n_variations = length(sampling.variation_ids) # make sure the number of simulations in this sampling is what we expected based on... -@test n_simulations == n_expected_sims # the discrete_variations... -@test n_simulations == n_variations * n_replicates # ...how many variation ids we recorded (number of rulesets_variations_ids must match variation_ids on construction of sampling) -@test n_simulations == out.n_success # ...how many simulations succeeded +@test n_simulations == n_expected_sims #! the discrete_variations... +@test n_simulations == n_variations * n_replicates #! ...how many variation ids we recorded (number of rulesets_variations_ids must match variation_ids on construction of sampling) +@test n_simulations == out.n_success #! ...how many simulations succeeded hashBorderPrint("SAMPLING SUCCESSFULLY IN CSVS!") diff --git a/test/test-project/VCT/SensitivityTests.jl b/test/test-scripts/SensitivityTests.jl similarity index 66% rename from test/test-project/VCT/SensitivityTests.jl rename to test/test-scripts/SensitivityTests.jl index b0ea48e4..dfd675d9 100644 --- a/test/test-project/VCT/SensitivityTests.jl +++ b/test/test-scripts/SensitivityTests.jl @@ -18,10 +18,8 @@ dv_save_full_data_interval = DiscreteVariation(["save", "full_data", "interval"] dv_save_svg_data_interval = DiscreteVariation(["save","SVG","interval"], 6.0) discrete_variations = [dv_max_time, dv_save_full_data_interval, dv_save_svg_data_interval] -reference_config_variation_id, reference_rulesets_variation_id, reference_ic_cell_variation_id = pcvct.addVariations(GridVariation(), inputs, discrete_variations) -reference_config_variation_id = reference_config_variation_id[1] -reference_rulesets_variation_id = reference_rulesets_variation_id[1] -reference_ic_cell_variation_id = reference_ic_cell_variation_id[1] +add_variations_result = pcvct.addVariations(GridVariation(), inputs, discrete_variations) +reference_variation_id = add_variations_result.all_variation_ids[1] xml_path = [pcvct.cyclePath(cell_type); "phase_durations"; "duration:index:0"] lower_bound = 250.0 - 50.0 @@ -46,11 +44,11 @@ n_replicates = 1 gs_fn(simulation_id::Int) = finalPopulationCount(simulation_id)[cell_type] -moat_sampling = run(MOAT(n_points), n_replicates, inputs, avs; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) -moat_sampling = run(MOAT(), n_replicates, inputs, avs; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) -moat_sampling = run(MOAT(4; orthogonalize=true), n_replicates, inputs, avs; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) -sobol_sampling = run(Sobolʼ(n_points), n_replicates, inputs, avs; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) -rbd_sampling = run(RBD(n_points), n_replicates, inputs, avs; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) +moat_sampling = run(MOAT(n_points), n_replicates, inputs, avs; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) +moat_sampling = run(MOAT(), n_replicates, inputs, avs; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) +moat_sampling = run(MOAT(4; orthogonalize=true), n_replicates, inputs, avs; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) +sobol_sampling = run(Sobolʼ(n_points), n_replicates, inputs, avs; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) +rbd_sampling = run(RBD(n_points), n_replicates, inputs, avs; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) pcvct.calculateGSA!(moat_sampling, gs_fn) pcvct.calculateGSA!(sobol_sampling, gs_fn) @@ -69,10 +67,8 @@ dv_save_full_data_interval = DiscreteVariation(["save", "full_data", "interval"] dv_save_svg_data_interval = DiscreteVariation(["save","SVG","interval"], 6.0) discrete_variations = [dv_max_time, dv_save_full_data_interval, dv_save_svg_data_interval] -reference_config_variation_id, reference_rulesets_variation_id, reference_ic_cell_variation_id = pcvct.addVariations(GridVariation(), inputs, discrete_variations) -reference_config_variation_id = reference_config_variation_id[1] -reference_rulesets_variation_id = reference_rulesets_variation_id[1] -reference_ic_cell_variation_id = reference_ic_cell_variation_id[1] +add_variations_result = pcvct.addVariations(GridVariation(), inputs, discrete_variations) +reference_variation_id = add_variations_result.all_variation_ids[1] xml_path = [pcvct.cyclePath(cell_type); "phase_durations"; "duration:index:0"] lower_bound = 250.0 - 50.0 @@ -90,14 +86,14 @@ dv4 = UniformDistributedVariation(xml_path, 0.25, 0.75) av = CoVariation(dv1, dv2, dv3, dv4) -moat_sampling = run(MOAT(n_points), n_replicates, inputs, av; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) +moat_sampling = run(MOAT(n_points), n_replicates, inputs, av; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) n_simulations_expected = n_points * (1 + 1) * n_replicates @test length(moat_sampling.sampling) == n_simulations_expected sobol_index_methods = (first_order=:Sobol1993, total_order=:Homma1996) -sobol_sampling = run(Sobolʼ(n_points; sobol_index_methods=sobol_index_methods), n_replicates, inputs, av; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) +sobol_sampling = run(Sobolʼ(n_points; sobol_index_methods=sobol_index_methods), n_replicates, inputs, av; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) sobol_index_methods = (first_order=:Saltelli2010, total_order=:Sobol2007) -sobol_sampling = run(Sobolʼ(n_points; sobol_index_methods=sobol_index_methods), n_replicates, inputs, av; force_recompile=force_recompile, reference_config_variation_id=reference_config_variation_id, reference_rulesets_variation_id=reference_rulesets_variation_id, reference_ic_cell_variation_id=reference_ic_cell_variation_id, functions=[gs_fn]) +sobol_sampling = run(Sobolʼ(n_points; sobol_index_methods=sobol_index_methods), n_replicates, inputs, av; force_recompile=force_recompile, reference_variation_id=reference_variation_id, functions=[gs_fn]) reference = getSimulationIDs(sobol_sampling)[1] |> Simulation sobol_sampling = run(Sobolʼ(2), n_replicates, reference, av) @@ -115,8 +111,8 @@ dv_pressure_hfm = UniformDistributedVariation(["hypothesis_ruleset:name:default" dv_x0 = UniformDistributedVariation(["cell_patches:name:default", "patch_collection:type:disc", "patch:ID:1", "x0"], -100.0, 0.0, flip) dv_anisotropy = UniformDistributedVariation(["layer:ID:2", "patch_collection:type:elliptical_disc", "patch:ID:1", "anisotropy"], 0.0, 1.0) -cv1 = CoVariation([dv_apop, dv_cycle]) # I think wanted these to only be config variations? -cv2 = CoVariation([dv_necr, dv_pressure_hfm, dv_x0, dv_anisotropy]) # I think I wanted these to be all different locations? +cv1 = CoVariation([dv_apop, dv_cycle]) #! I think wanted these to only be config variations? +cv2 = CoVariation([dv_necr, dv_pressure_hfm, dv_x0, dv_anisotropy]) #! I think I wanted these to be all different locations? avs = [cv1, cv2] method = MOAT(4) diff --git a/test/test-project/VCT/SubstrateTests.jl b/test/test-scripts/SubstrateTests.jl similarity index 100% rename from test/test-project/VCT/SubstrateTests.jl rename to test/test-scripts/SubstrateTests.jl diff --git a/test/test-project/VCT/UserAPITests.jl b/test/test-scripts/UserAPITests.jl similarity index 100% rename from test/test-project/VCT/UserAPITests.jl rename to test/test-scripts/UserAPITests.jl diff --git a/test/test-project/VCT/VariationsTests.jl b/test/test-scripts/VariationsTests.jl similarity index 70% rename from test/test-project/VCT/VariationsTests.jl rename to test/test-scripts/VariationsTests.jl index 8199fcd9..9e2216da 100644 --- a/test/test-project/VCT/VariationsTests.jl +++ b/test/test-scripts/VariationsTests.jl @@ -30,51 +30,51 @@ vals = [1.0, 2.0] push!(discrete_variations, DiscreteVariation(xml_path, vals)) # Test edge cases of addGrid -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(GridVariation(), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(GridVariation(), inputs, discrete_variations) # Test edge cases of addLHS -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(LHSVariation(4), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(LHSVariation(4), inputs, discrete_variations) discrete_variations = DiscreteVariation[] xml_path = [pcvct.cyclePath(cell_type); "phase_durations"; "duration:index:0"] push!(discrete_variations, DiscreteVariation(xml_path, [1.0, 2.0])) -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(LHSVariation(4), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(LHSVariation(4), inputs, discrete_variations) discrete_variations = DiscreteVariation[] xml_path = ["cell_patches:name:default", "patch_collection:type:disc", "patch:ID:1", "x0"] vals = [0.0, -100.0] push!(discrete_variations, DiscreteVariation(xml_path, vals)) -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(LHSVariation(4), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(LHSVariation(4), inputs, discrete_variations) # Test edge cases of addSobol -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(pcvct.SobolVariation(5), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(pcvct.SobolVariation(5), inputs, discrete_variations) discrete_variations = DiscreteVariation[] xml_path = [pcvct.cyclePath(cell_type); "phase_durations"; "duration:index:0"] push!(discrete_variations, DiscreteVariation(xml_path, [1.0, 2.0])) -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(pcvct.SobolVariation(5; skip_start=false), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(pcvct.SobolVariation(5; skip_start=false), inputs, discrete_variations) discrete_variations = DiscreteVariation[] xml_path = ["hypothesis_ruleset:name:default","behavior:name:cycle entry","decreasing_signals","max_response"] vals = [1.0, 2.0] push!(discrete_variations, DiscreteVariation(xml_path, vals)) -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(pcvct.SobolVariation(5; skip_start=4, include_one=true), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(pcvct.SobolVariation(5; skip_start=4, include_one=true), inputs, discrete_variations) # Test edge cases of addRBD -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(pcvct.RBDVariation(1), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(pcvct.RBDVariation(1), inputs, discrete_variations) discrete_variations = DiscreteVariation[] xml_path = ["cell_patches:name:default", "patch_collection:type:disc", "patch:ID:1", "x0"] vals = [0.0, -100.0] push!(discrete_variations, DiscreteVariation(xml_path, vals)) -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(pcvct.RBDVariation(2), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(pcvct.RBDVariation(2), inputs, discrete_variations) discrete_variations = DiscreteVariation[] xml_path = [pcvct.cyclePath(cell_type); "phase_durations"; "duration:index:0"] push!(discrete_variations, DiscreteVariation(xml_path, [1.0, 2.0])) -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(pcvct.RBDVariation(3), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(pcvct.RBDVariation(3), inputs, discrete_variations) -config_variation_ids, rulesets_collection_variation_ids, ic_cell_variation_ids = pcvct.addVariations(pcvct.RBDVariation(3; use_sobol=false), inputs, discrete_variations) +add_variations_result = pcvct.addVariations(pcvct.RBDVariation(3; use_sobol=false), inputs, discrete_variations) # test deprecation of ElementaryVariation @test_warn "`ElementaryVariation` is deprecated in favor of the more descriptive `DiscreteVariation`." ElementaryVariation(xml_path, [0.0, 1.0]) @@ -101,7 +101,7 @@ df = pcvct.simulationsTable(sampling) drs = df[!, Symbol("default: apop death rate")] pdurs = df[!, Symbol("default: duration:index:0")] for (dr, pdur) in zip(drs, pdurs) - @test (val_1.==dr) == (val_2.==pdur) # make sure they are using the same index in both + @test (val_1.==dr) == (val_2.==pdur) #! make sure they are using the same index in both end max_response_path = ["hypothesis_ruleset:name:default","behavior:name:cycle entry","decreasing_signals","max_response"] @@ -125,6 +125,13 @@ sampling = createTrial(LHSVariation(5), inputs, cv; n_replicates=3) @test pcvct.location(cv) == [:config, :config] @test pcvct.target(cv) == pcvct.XMLPath.([apoptosis_rate_path, cycle_rate_path]) -cv = CoVariation(cv.variations[1], cv.variations[2]) # CoVariation(ev1, ev2, ...) +cv = CoVariation(cv.variations[1], cv.variations[2]) #! CoVariation(ev1, ev2, ...) sampling = createTrial(SobolVariation(7), inputs, cv; n_replicates=2) @test length(sampling.monad_ids) == 7 + +# more tests for coverage +ev = DiscreteVariation(["hypothesis_ruleset:name:default", "behavior:name:cycle entry", "decreasing_signals", "signal:name:pressure", "applies_to_dead"], [true, false]) +@test pcvct.sqliteDataType(ev) == "TEXT" + +ev = DiscreteVariation(["options", "random_seed"], ["system_clock", 0]) +@test pcvct.sqliteDataType(ev) == "TEXT" \ No newline at end of file