Skip to content

Commit 2abd017

Browse files
authored
hotfix: remove bad timeline HIPO files and allow Step 2 to finish (#299)
1 parent 4a468e9 commit 2abd017

2 files changed

Lines changed: 47 additions & 8 deletions

File tree

bin/hipo-check.sh

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,33 @@ set -e
44
set -u
55
source $(dirname $0)/environ.sh
66

7+
# default options
8+
rm_bad=false
9+
hipoFiles=()
10+
711
# arguments
812
if [ $# -lt 1 ]; then
9-
printError "no HIPO files specified for $(basename $0)"
1013
echo """
11-
USAGE: $0 [HIPO_FILE(S)]...
14+
USAGE: $0 [OPTIONS]... [HIPO_FILE(S)]...
1215
1316
Checks each [HIPO_FILE] for corruption, etc.
17+
18+
OPTIONS
19+
--rm-bad delete (rm) bad HIPO files
1420
""" >&2
1521
exit 101
1622
fi
17-
hipoFiles=$@
23+
for arg in "$@"; do
24+
if [[ $arg =~ ^- ]]; then
25+
case $arg in
26+
--rm-bad) rm_bad=true ;;
27+
*) printError "unknown option $arg" && exit 100 ;;
28+
esac
29+
else
30+
hipoFiles+=($arg)
31+
fi
32+
done
33+
[ ${#hipoFiles[@]} -lt 1 ] && printError "no HIPO files specified for $(basename $0)" && exit 100
1834

1935
# minimum file size for a valid HIPO file
2036
# - seems to be 192 bytes, but setting the threshold slightly higher may be safer
@@ -56,5 +72,12 @@ if [ ${#badFiles[@]} -gt 0 ]; then
5672
for badFile in ${badFiles[@]}; do
5773
echo " - $badFile" >&2
5874
done
75+
# remove bad HIPO files
76+
if $rm_bad; then
77+
printError "These HIPO files will now be REMOVED!"
78+
for badFile in ${badFiles[@]}; do
79+
rm -v $badFile >&2
80+
done
81+
fi
5982
exit 100
6083
fi

bin/run-detectors-timelines.sh

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,6 @@ if ${modes['focus-all']} || ${modes['focus-timelines']}; then
242242
echo "PREMATURE EXIT, since --debug option was used"
243243
exit
244244
else
245-
#sleep 1
246245
java $TIMELINE_JAVA_OPTS $run_detectors_script $timelineObj $inputDir > $logFile.out 2> $logFile.err || touch $logFile.fail &
247246
job_ids+=($!)
248247
job_names+=($timelineObj)
@@ -279,9 +278,13 @@ if ${modes['focus-all']} || ${modes['focus-timelines']}; then
279278
esac
280279
done
281280

282-
# check timelines
281+
# check timelines; remove and complain about any bad ones
282+
echo ">>> running hipo-check on timeline HIPO files..."
283283
outputFiles=$(find . -name "*.hipo")
284-
[ -n "$outputFiles" ] && $TIMELINESRC/bin/hipo-check.sh $outputFiles
284+
if [ -n "$outputFiles" ]; then
285+
logFile=$logDir/hipo-check
286+
$TIMELINESRC/bin/hipo-check.sh --rm-bad $outputFiles > $logFile.out 2> $logFile.err || touch $logFile.fail
287+
fi
285288

286289
# remove any empty directories
287290
echo ">>> removing any empty directories..."
@@ -326,31 +329,44 @@ $logDir/*.err
326329

327330
# exit nonzero if any jobs exitted nonzero
328331
failedJobs=($(find $logDir -name "*.fail" | xargs -I{} basename {} .fail))
332+
somethingFailed=false
329333
if [ ${#failedJobs[@]} -gt 0 ]; then
330334
for failedJob in ${failedJobs[@]}; do
331335
echo $sep
332336
printError "job '$failedJob' returned non-zero exit code; error log dump:"
333337
cat $logDir/$failedJob.err
338+
if [ "$failedJob" = "hipo-check" ]; then
339+
printWarning "These HIPO files are TIMELINE files; if this '$failedJob' job is the ONLY failed job, you may proceed with timeline deployment, but these failed timelines will not be deployed."
340+
fi
334341
done
335342
if [ -z "$singleTimeline" -a ${modes['focus-qa']} = false ]; then
336343
echo $sep
337344
echo "To re-run only the failed timelines, for debugging, try one of the following commands:"
338345
for failedJob in ${failedJobs[@]}; do
339346
if [ "$failedJob" = "qa" ]; then
340347
echo " $0 $@ --focus-qa"
348+
elif [ "$failedJob" = "hipo-check" ]; then
349+
echo " $0 $@ --focus-timelines -t [BAD_TIMELINE]"
350+
echo " where [BAD_TIMELINE] is any timeline that failed 'hipo-check'"
341351
else
342352
echo " $0 $@ --focus-timelines -t $failedJob"
343353
fi
344354
done
345355
fi
346-
exit 100
356+
somethingFailed=true
347357
else
348358
echo "All jobs exitted normally"
349359
fi
350360

351361
# grep for suspicious things in error logs
352362
errPattern="error:|exception:|warning"
353-
echo """To look for any quieter errors, running \`grep -iE '$errPattern'\` on *.err files:
363+
echo """Now scanning for any quieter errors, by running \`grep -iE '$errPattern'\` on *.err files:
354364
$sep"""
355365
grep -iE --color "$errPattern" $logDir/*.err || echo "Good news: grep found no errors, but you still may want to take a look yourself..."
356366
echo $sep
367+
368+
# exit nonzero if something failed
369+
if $somethingFailed; then
370+
printWarning "At least one job had issues; look above or in the log files to see what's wrong."
371+
exit 100
372+
fi

0 commit comments

Comments
 (0)