-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_check_GenPipes_log.sh
More file actions
executable file
·112 lines (101 loc) · 3.69 KB
/
parse_check_GenPipes_log.sh
File metadata and controls
executable file
·112 lines (101 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/bash
# Function to display usage information
usage() {
echo "Usage: $0 -l log_file.txt -o output_path"
exit 1
}
# Parse command-line options using getopts
while getopts ":l:o:" opt; do
case $opt in
l)
LOG_FILE="$OPTARG"
;;
o)
OUTPUT_PATH="$OPTARG"
;;
\?)
echo "Invalid option: -$OPTARG" >&2
usage
;;
:)
echo "Option -$OPTARG requires an argument." >&2
usage
;;
esac
done
# Check if both log file and output path are provided
if [ -z "$LOG_FILE" ] || [ -z "$OUTPUT_PATH" ]; then
usage
fi
# Read the log file line by line
while IFS= read -r line; do
# Skip log_report.py verbosity lines
if [[ $line == *"ERROR:__main__:"* || $line == "WARNING:" ]]; then
continue
# Extract the pipeline, protocol, patient, and job file using grep and awk
elif echo "$line" | grep -q "Checking"; then
current_date=$(echo "$line" | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}\.[0-9]{2}\.[0-9]{2}')
pipeline=$(echo "$line" | awk -F'[/.]' '{print $(NF-8)}')
if [[ $pipeline == "genpipes_submission" ]]; then
pipeline=$(echo "$line" | awk -F'[/.]' '{print $(NF-7)}')
fi
if [[ $pipeline == "RnaSeqLight" ]]; then
protocol=""
else
protocol=$(echo "$line" | awk -F'[/.]' '{print $(NF-7)}')
fi
patient=$(echo "$line" | awk -F'[/.]' '{print $(NF-6)}')
# Handle lines with 'ERROR: GenPipes json file is empty'
elif echo "$line" | grep -q "ERROR: GenPipes json file is empty"; then
pipeline=$(echo "$line" | awk -F'[/.]' '{print $(NF-4)}')
if [[ $pipeline == "RnaSeqLight" ]]; then
protocol=""
else
protocol=$(echo "$line" | awk -F'[._]' '{print $(NF-4)}')
fi
json_path=$(echo "$line" | awk -F'Cf. ' '{print $2}')
empty_json_file="${OUTPUT_PATH}/empty_json.${pipeline}.${protocol}.txt"
# Append the json path to the empty_json file
echo "${json_path},${current_date}" >> "${empty_json_file}"
# Handle lines with 'WARNING: Missing files'
elif echo "$line" | grep -q "WARNING: Missing files"; then
pipeline=$(echo "$line" | awk -F'[/.]' '{print $(NF-8)}')
if [[ $pipeline == "RnaSeqLight" ]]; then
missing_job_file="${OUTPUT_PATH}/missing_files.${pipeline}.txt"
job_list=$(echo "$line" | awk -F'in ' '{print $2}' | awk '{print $1}' | sed 's/.$//')
echo "${job_list}" >> "${missing_job_file}"
else
protocol=$(echo "$line" | awk -F'[._]' '{print $(NF-8)}')
if [[ $pipeline == $protocol ]]; then
pipeline=$(echo "$line" | awk -F'[/.]' '{print $(NF-9)}')
fi
job_list=$(echo "$line" | awk -F'in ' '{print $2}' | awk '{print $1}' | sed 's/.$//')
missing_job_file="${OUTPUT_PATH}/missing_files.${pipeline}.${protocol}.txt"
# Append the job list to the missing_job file
echo "${job_list},${current_date}" >> "${missing_job_file}"
fi
# Extract the status and job file using grep and awk
elif echo "$line" | grep -q -E "INFO|ERROR|WARNING|SUCCESS"; then
status=$(echo "$line" | awk '{print $1}' | tr -d ':')
if [[ $pipeline == "RnaSeqLight" ]]; then
protocol=""
fi
# Determine the status file name based on the status
case $status in
INFO)
status_file="${OUTPUT_PATH}/running.${pipeline}.${protocol}.txt"
;;
ERROR)
status_file="${OUTPUT_PATH}/error.${pipeline}.${protocol}.txt"
;;
WARNING)
status_file="${OUTPUT_PATH}/warning.${pipeline}.${protocol}.txt"
;;
SUCCESS)
status_file="${OUTPUT_PATH}/success.${pipeline}.${protocol}.txt"
;;
esac
# Append the patient information to the appropriate file based on status
echo "${patient},${current_date}" >> "${status_file}"
fi
done < "$LOG_FILE"