-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_pPb8160_dataset.sh
executable file
·63 lines (53 loc) · 1.69 KB
/
split_pPb8160_dataset.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/bash
# Check if the correct number of arguments is provided
if [ $# -ne 5 ]; then
echo "Usage: $0 <input_file> <N> <sample_name> <direction> <pd_number>"
exit 1
fi
# Date of submission
formatted_date=$(date +"%Y%m%d")
# Input file name
input_file="$1"
# Number of entries for each sublist
N="$2"
# Sample name
sample_name="$3"
# Direction
direction="$4"
# PD number
pd_number="$5"
# Prefix
if [ "$sample_name" == "DATA_MB" ]; then
sample_prefix="MB_PD${pd_number}_${direction}"
elif [ "$sample_name" == "DATA_HM185" ]; then
sample_prefix="HM185_PD${pd_number}_${direction}"
elif [ "$sample_name" == "DATA_HM250" ]; then
sample_prefix="HM250_${direction}"
else
sample_prefix="PAEGJet_${direction}"
fi
# Check if the input file exists
if [ ! -f "$input_file" ]; then
echo "Input file '$input_file' not found."
exit 1
fi
# Count the total number of lines in the input file
total_lines=$(wc -l < "$input_file")
# Calculate the number of sublists needed
num_sublists=$((total_lines / N))
if [ $((total_lines % N)) -ne 0 ]; then
((num_sublists++))
fi
if [ ! -d "$PWD/input/pPb8160/${formatted_date}" ]; then
#echo "Directory '$PWD/input/pPb8160/${formatted_date}' does not exist. Creating..."
mkdir -p "$PWD/input/pPb8160/${formatted_date}"
fi
# Create sublists
for ((i = 0; i < num_sublists; i++)); do
start=$((i * N + 1)) # Calculate start line number for current sublist
end=$((start + N - 1)) # Calculate end line number for current sublist
sublist_file="${sample_prefix}_$((i+1)).list" # Name of sublist file
# Extract sublist
sed -n "${start},${end}p" "$input_file" > "$PWD/input/pPb8160/${formatted_date}/$sublist_file"
done
echo $num_sublists