Skip to content

Commit

Permalink
Merge pull request #460 from kedhammar/improve-finding-run-dirs
Browse files Browse the repository at this point in the history
Improve identifying run dirs
  • Loading branch information
kedhammar authored Jan 22, 2025
2 parents 048c781 + d7b8a92 commit e60b055
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 37 deletions.
4 changes: 4 additions & 0 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# TACA Version Log

## 20250122.2

Improve the way TACA identifies run dirs in the "bioinfo_deliveries --update" command (bioinfo_tab.py).

## 20250122.1

Ruff formatting.
Expand Down
57 changes: 20 additions & 37 deletions taca/utils/bioinfo_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,52 +26,36 @@ def __init__(self, value=None):

def collect_runs():
"""Update command."""
found_runs = {"illumina": [], "element": []}

# Pattern explained:
# 6-8Digits_(maybe ST-)AnythingLetterornumberNumber_Number_AorBLetterornumberordash
illumina_rundir_re = re.compile("\d{6,8}_[ST-]*\w+\d+_\d+_[AB]?[A-Z0-9\-]+")
# E.g. 20250121_AV242106_B2425434199
element_rundir_re = re.compile("\d{8}_AV242106_[AB]\d+")

for inst_brand in CONFIG["bioinfo_tab"]["data_dirs"]:
for data_dir in CONFIG["bioinfo_tab"]["data_dirs"][inst_brand]:
if os.path.exists(data_dir):
potential_run_dirs = glob.glob(os.path.join(data_dir, "*"))
for run_dir in potential_run_dirs:
if os.path.isdir(run_dir):
if inst_brand == "illumina" and illumina_rundir_re.match(
os.path.basename(os.path.abspath(run_dir))
):
found_runs[inst_brand].append(os.path.basename(run_dir))
logger.info(f"Working on {run_dir}")
update_statusdb(run_dir, inst_brand)
elif inst_brand == "element":
# Skip no sync dirs, they will be checked below
if run_dir == os.path.join(data_dir, "nosync"):
continue
logger.info(f"Working on {run_dir}")
update_statusdb(run_dir, inst_brand)
elif inst_brand == "ont":
# Skip archived, no_backup, nosync and qc folders
if re.match(
ONT_RUN_PATTERN,
os.path.basename(os.path.abspath(run_dir)),
):
logger.info(f"Working on {run_dir}")
update_statusdb(run_dir, inst_brand)
potential_run_dirs += glob.glob(os.path.join(data_dir, "nosync", "*"))

nosync_data_dir = os.path.join(data_dir, "nosync")
potential_nosync_run_dirs = glob.glob(
os.path.join(nosync_data_dir, "*")
)
for run_dir in potential_nosync_run_dirs:
for run_dir in potential_run_dirs:
if os.path.isdir(run_dir):
if (
inst_brand == "illumina"
and illumina_rundir_re.match(
os.path.basename(os.path.abspath(run_dir))
(
inst_brand == "illumina"
and illumina_rundir_re.match(os.path.basename(run_dir))
)
) or (inst_brand == "element" or inst_brand == "ont"):
# Skip archived dirs
if run_dir == os.path.join(nosync_data_dir, "archived"):
continue
or (
inst_brand == "element"
and element_rundir_re.match(os.path.basename(run_dir))
)
or (
inst_brand == "ont"
and ONT_RUN_PATTERN.match(os.path.basename(run_dir))
)
):
logger.info(f"Working on {run_dir}")
update_statusdb(run_dir, inst_brand)


Expand All @@ -89,7 +73,6 @@ def update_statusdb(run_dir, inst_brand):
# WARNING - Run parameters file not found for ElementRun(<run_dir>), might not be ready yet
return
elif inst_brand == "ont":
run_dir = os.path.abspath(run_dir)
try:
ont_run = ONT_run(run_dir)
except AssertionError as e:
Expand Down Expand Up @@ -320,7 +303,7 @@ def get_ss_projects_illumina(run_dir):
proj_tree = Tree()
lane_pattern = re.compile("^([1-8]{1,2})$")
sample_proj_pattern = re.compile("^((P[0-9]{3,5})_[0-9]{3,5})")
run_name = os.path.basename(os.path.abspath(run_dir))
run_name = os.path.basename(run_dir)
run_date = run_name.split("_")[0]
if len(run_date) == 6:
current_year = "20" + run_date[0:2]
Expand Down

0 comments on commit e60b055

Please sign in to comment.