From 8dc6f0eceec07b12a47b32fd63359ce6b4347d99 Mon Sep 17 00:00:00 2001 From: adkinsrs Date: Tue, 13 Feb 2024 11:18:00 -0500 Subject: [PATCH] Add log file and log level options, optimize code for speed, and update Julia version --- CHANGELOG.md | 2 ++ Dockerfile | 2 +- README.md | 10 ++++++++-- docs/index.md | 10 ++++++++-- fadu.jl | 38 +++++++++++++++++++++++++++++++++----- fadu_pkgs/Project.toml | 4 ++-- 6 files changed, 54 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0205b1..3b4aff5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ * FEATURE - Adding --no_output_header option to not print the header line in output. Useful for when you want to pipe this output to another tool. * FEATURE - Adding --exclude_regions option to exclude any regions found in a passed-in BED file. +* FEATURE - Adding --log_file and --log_level options to display or hide log messages of varying severity. Note that currently --log_level=DEBUG will print the line number, as I haven't figured out how to disable that for debug-level messages yet. +* Optimized some pieces of code for speed (thanks Github Copilot). This should increase the speed where a GFF feature has 100,000s of overlaps with the BAM alignments ## v1.8.3 diff --git a/Dockerfile b/Dockerfile index 8687d94..768e209 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Wanted to do the alpine version but that tag does not support arm64 (Mac M1) architecture -FROM julia:1.9 +FROM julia:1.7 LABEL maintainer="Shaun Adkins (sadkins@som.umaryland.edu)" RUN mkdir -p /opt/FADU diff --git a/README.md b/README.md index e0ae684..f02be5e 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,18 @@ Most current available quantification tools for transcriptomics analyses have be ## Current FADU release -v1.8.3 +v1.9 ## Requirements -Julia - v1.4.2 or later +Julia - v1.7 or later ### Packages NOTE: Packages installation instructions can be found at https://docs.julialang.org/en/v1.4/stdlib/Pkg/index.html#Getting-Started-1 * ArgParse +* Logging * BGZFStreams.jl - v0.3.0 * GenomicFeatures.jl - v2.0.0 * GFF3 - v0.2.0 @@ -165,6 +166,11 @@ optional arguments: on multimapped read depth. Only applies if --remove_multimapped flag is not passed (is disabled) (type: Int64, default: 1) + --log_level LEVEL Set the log level. Options are: DEBUG, INFO, + WARNING, ERROR, CRITICAL. (default: "INFO") + --log_file /path/to/logfile.log + Path to log file. If not specified, log + messages will be printed to STDERR. --version show version information and exit -h, --help show this help message and exit ``` diff --git a/docs/index.md b/docs/index.md index ee91080..331864b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,17 +17,18 @@ Most current available quantification tools for transcriptomics analyses have be ## Current FADU release -v1.8.3 +v1.9 ## Requirements -Julia - v1.4.2 or later +Julia - v1.7 or later ### Packages NOTE: Packages installation instructions can be found at https://docs.julialang.org/en/v1.4/stdlib/Pkg/index.html#Getting-Started-1 * ArgParse +* Logging * BGZFStreams.jl - v0.3.0 * GenomicFeatures.jl - v2.0.0 * GFF3 - v0.2.0 @@ -166,6 +167,11 @@ optional arguments: on multimapped read depth. Only applies if --remove_multimapped flag is not passed (is disabled) (type: Int64, default: 1) + --log_level LEVEL Set the log level. Options are: DEBUG, INFO, + WARNING, ERROR, CRITICAL. (default: "INFO") + --log_file /path/to/logfile.log + Path to log file. If not specified, log + messages will be printed to STDERR. --version show version information and exit -h, --help show this help message and exit ``` diff --git a/fadu.jl b/fadu.jl index 020ec99..a5ca2dd 100644 --- a/fadu.jl +++ b/fadu.jl @@ -22,6 +22,7 @@ By: Shaun Adkins (sadkins@som.umaryland.edu) # The macro on modules and functions makes the code available to all worker processes using ArgParse +using Logging using XAM: BAM, SAM using GenomicFeatures using GFF3 @@ -150,6 +151,14 @@ function parse_commandline() arg_type = Int range_tester = (x->x>0) dest_name = "em_iter" + "--log_level" + help = "Set the log level. Options are: DEBUG, INFO, WARNING, ERROR, CRITICAL." + default = "INFO" + metavar = "LEVEL" + range_tester = (x->x in ["DEBUG", "INFO", "WARNING", "ERROR"]) + "--log_file" + help = "Path to log file. If not specified, log messages will be printed to STDERR." + metavar = "/path/to/logfile.log" # Will not add log_file or debug options for now end @@ -157,20 +166,39 @@ function parse_commandline() return parse_args(s) end +function setup_logger(args) + """Set up logging.""" + stream = args["log_file"] === nothing ? stderr : open(args["log_file"], "w") + logleveldict = Dict("DEBUG" => -1000, "INFO" => 0, "WARNING" => 1000, "ERROR" => 2000) + loglevel = get(logleveldict, uppercase(args["log_level"]), 0) + logger = ConsoleLogger(stream, loglevel) + global_logger(logger) + # TODO: prevent debug messages from printing the file and line number + +end + function validate_args(args) """Validate the passed arguments.""" - isfile(args["gff3_file"]) || throw(SystemError("GFF3 file does not seem to exist. Please check supplied path.")) - isfile(args["bam_file"]) || throw(SystemError("BAM file does not seem to exist. Please check supplied path.")) + @info("Validating arguments...") + isfile(args["bam_file"]) || error("BAM file does not exist. Please check supplied path") + isfile(args["gff3_file"]) || error("GFF3 file does not exist. Please check supplied path") if !isdir(args["output_dir"]) - @debug("Creating output directory") + @info("Creating output directory at ", args["output_dir"]) mkdir(args["output_dir"]) end - args["stranded"] in ["yes", "no", "reverse"] || error("--stranded argument must be either 'yes', 'no', or 'reverse'.") end function main() args = parse_commandline() - #validate_args(args) + setup_logger(args) + + try + validate_args(args) + catch e + @error(e) + exit(1) + end + @info("Parsed args:") for (arg,val) in args @info(" $arg => $val") diff --git a/fadu_pkgs/Project.toml b/fadu_pkgs/Project.toml index 5f3fe4c..ccbf032 100644 --- a/fadu_pkgs/Project.toml +++ b/fadu_pkgs/Project.toml @@ -10,7 +10,7 @@ BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6" GFF3 = "af1dc308-cb6b-11e8-32f0-31192efa90f6" GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" XAM = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" @@ -22,4 +22,4 @@ GenomicFeatures = "2.0.0" Indexes = "0.1.1" StructArrays = "0.4.4" XAM = "0.2.6" -julia = "1.4.2" +julia = "1.7"