llvm/bolt/utils/bughunter.sh

#!/bin/bash
##===- bolt/utils/bughunter.sh - Help locate BOLT bugs -------*- Script -*-===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
# details.
#
##===----------------------------------------------------------------------===##
#
# This script attempts to narrow down llvm-bolt bug to a single function in the
# input binary.
#
# If such a function is found, llvm-bolt could be run just on this function
# to mitigate debugging process.
#
# The following envvars are used by this script:
#
#   BOLT              - path to llvm-bolt
#
#   BOLT_OPTIONS      - options to be used by llvm-bolt
#
#   INPUT_BINARY      - input for llvm-bolt
#
#   PRE_COMMAND       - command to execute prior to running optimized binary
#
#   POST_COMMAND      - command to filter results of running optimized binary
#
#   TIMEOUT_OR_CMD    - optional timeout or command on optimized binary command
#                       if the value is a number with an optional trailing letter
#                       [smhd] it is considered a paramter to "timeout",
#                       otherwise it's a shell command that wraps the optimized
#                       binary command.
#
#   COMMAND_LINE      - command line options to run optimized binary with
#
#   IGNORE_ERROR      - ignore error codes returned from optimized binary
#
#   GOLD_FILE         - file containing expected output from optimized binary
#
#   FUNC_NAMES        - if set, path to an initial list of function names to
#                       search.  Otherwise, nm is used on the original binary.
#
#   OFFLINE           - if set, bughunter will produce the binaries but will not
#                       run them, and will depend on you telling whether it
#                       succeeded or not.
#
#   MAX_FUNCS         - if set, use -max-funcs to narrow down the offending
#                       function.  if non-zero, start -max-funcs at $MAX_FUNCS
#                       otherwise, count the number of symbols in the binary.
#
#   MAX_FUNCS_FLAG    - BOLT command line option to use for MAX_FUNCS search.
#                       Default is -max-funcs.  Can also be used for relocation
#                       debugging, e.g. -max-data-relocations.
#
#   VERBOSE           - if non-empty, set the script to echo mode.
#
##===----------------------------------------------------------------------===##

BOLT=${BOLT:=llvm-bolt}

ulimit -c 0
set -o pipefail

if [[ -n "$VERBOSE" ]]; then
    set -x
fi

if [[ ! -x $INPUT_BINARY ]] ; then
    echo "INPUT_BINARY must be set to an executable file"
    exit 1
fi

if [[ -z "$PRE_COMMAND" ]] ; then
    PRE_COMMAND=':'
fi

if [[ -z "$POST_COMMAND" ]] ; then
    POST_COMMAND='cat'
fi

if [[ -n "$TIMEOUT_OR_CMD" && $TIMEOUT_OR_CMD =~ ^[0-9]+[smhd]?$ ]] ; then
    TIMEOUT_OR_CMD="timeout -s KILL $TIMEOUT_OR_CMD"
fi

if [[ -z "$MAX_FUNCS_FLAG" ]] ; then
    MAX_FUNCS_FLAG="-max-funcs"
fi

OPTIMIZED_BINARY=$(mktemp -t -u --suffix=.bolt $(basename ${INPUT_BINARY}).XXX)
OUTPUT_FILE="${OPTIMIZED_BINARY}.out"
BOLT_LOG=$(mktemp -t -u --suffix=.log boltXXX)

if [[ -z $OFFLINE ]]; then
    echo "Verify input binary passes"
    echo "  INPUT_BINARY: $PRE_COMMAND && $TIMEOUT_OR_CMD $INPUT_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE"
    ($PRE_COMMAND && $TIMEOUT_OR_CMD $INPUT_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE)
    STATUS=$?
    if [[ "$IGNORE_ERROR" == "1" ]]; then
        FAIL=0
    else
        FAIL=$STATUS
    fi
    if [[ -e "$GOLD_FILE" ]] ; then
        cmp -s "$OUTPUT_FILE" "$GOLD_FILE"
        FAIL=$?
    fi
    if [[ $FAIL -ne "0" ]] ; then
        echo "  Warning: input binary failed"
    else
        echo "  Input binary passes."
    fi
fi

echo "Verify optimized binary fails"
($BOLT $BOLT_OPTIONS $INPUT_BINARY -o $OPTIMIZED_BINARY >& $BOLT_LOG)
FAIL=$?
if [[ $FAIL -eq "0" ]]; then
    if [[ -z $OFFLINE ]]; then
        echo "  OPTIMIZED_BINARY: $PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE"
        ($PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE)
        STATUS=$?
        if [[ "$IGNORE_ERROR" == "1" ]]; then
            FAIL=0
        else
            FAIL=$STATUS
        fi
        if [[ -e "$GOLD_FILE" ]] ; then
            cmp -s "$OUTPUT_FILE" "$GOLD_FILE"
            FAIL=$?
        fi
    else
        echo "Did it pass? Type the return code [0 = pass, 1 = fail]"
        read -n1 PASS
    fi
    if [[ $FAIL -eq "0" ]] ; then
        echo "  Warning: optimized binary passes."
    else
        echo "  Optimized binary fails as expected."
    fi
else
    echo "  Bolt crashes while generating optimized binary."
fi

# Collect function names
FF=$(mktemp -t -u --suffix=.txt func-names.XXX)
nm --defined-only -p $INPUT_BINARY | grep " [TtWw] " | cut -d ' ' -f 3 | egrep -v "\._" | egrep -v '^$' | sort -u > $FF

# Use function names or numbers
if [[ -z "$MAX_FUNCS" ]] ; then
    # Do binary search on function names
    if [[ -n "$FUNC_NAMES" ]]; then
        FF=$FUNC_NAMES
    fi
    NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
    HALF=$(expr \( $NUM_FUNCS + 1 \) / 2)
    PREFIX=$(mktemp -t -u --suffix=.txt func-names.XXX)
    FF0=$PREFIX\aa
    FF1=$PREFIX\ab
    split -a 2 -l $HALF $FF $PREFIX
    FF=$FF0
    NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
    CONTINUE=$(expr $NUM_FUNCS \> 0)
else
    P=0
    if [[ "$MAX_FUNCS" -eq "0" ]]; then
        Q=$(wc -l $FF | cut -d ' ' -f 1)
    else
        Q=$MAX_FUNCS
    fi
    I=$Q
    CONTINUE=$(expr \( $Q - $P \) \> 1)
fi

ITER=0
while [[ "$CONTINUE" -ne "0" ]] ; do
    rm -f $OPTIMIZED_BINARY
    if [[ -z "$MAX_FUNCS" ]] ; then
        echo "Iteration $ITER, trying $FF / $HALF functions"
        SEARCH_OPT="-funcs-file-no-regex=$FF"
    else
        I=$(expr \( $Q + $P \) / 2)
        echo "Iteration $ITER, P=$P, Q=$Q, I=$I"
        SEARCH_OPT="$MAX_FUNCS_FLAG=$I"
    fi
    echo "  BOLT: $BOLT $BOLT_OPTIONS $INPUT_BINARY $SEARCH_OPT -o $OPTIMIZED_BINARY >& $BOLT_LOG"
    ($BOLT $BOLT_OPTIONS $INPUT_BINARY $SEARCH_OPT -o $OPTIMIZED_BINARY >& $BOLT_LOG)
    FAIL=$?
    echo "  BOLT failure=$FAIL"
    rm -f $OUTPUT_FILE
    if [[ $FAIL -eq "0" ]] ; then
        if [[ -z $OFFLINE ]]; then
            echo "  OPTIMIZED_BINARY: $PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE"
            ($PRE_COMMAND && $TIMEOUT_OR_CMD $OPTIMIZED_BINARY $COMMAND_LINE |& $POST_COMMAND >& $OUTPUT_FILE)
            STATUS=$?
            if [[ "$IGNORE_ERROR" == "1" ]]; then
                FAIL=0
            else
                FAIL=$STATUS
            fi
            if [[ -e "$GOLD_FILE" ]] ; then
                cmp -s "$OUTPUT_FILE" "$GOLD_FILE"
                FAIL=$?
            fi
            echo "  OPTIMIZED_BINARY failure=$FAIL"
        else
            echo "Did it pass? Type the return code [0 = pass, 1 = fail]"
            read -n1 PASS
        fi
    else
        FAIL=1
    fi

    if [[ -z "$MAX_FUNCS" ]] ; then
        if [[ $FAIL -eq "0" ]] ; then
            if [[ "$FF" == "$FF1" ]]; then
                NUM_FUNCS=0
                break;
            fi
            FF=$FF1
            NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
        else
            HALF=$(expr \( $NUM_FUNCS + 1 \) / 2)
            PREFIX=$(mktemp -t -u --suffix=.txt func-names.XXX)
            split -a 2 -l $HALF $FF $PREFIX
            FF0=$PREFIX\aa
            FF1=$PREFIX\ab
            FF=$FF0
            NUM_FUNCS=$(wc -l $FF | cut -d ' ' -f 1)
            if [[ $NUM_FUNCS -eq "1" && ! -e $FF1 ]]; then
                break;
            fi
        fi
        CONTINUE=$(expr $NUM_FUNCS \> 0)
    else
        if [[ $FAIL -eq "0" ]] ; then
            P=$I
        else
            Q=$I
        fi
        FF=$I
        HALF=$I
        CONTINUE=$(expr \( $Q - $P \) \> 1)
    fi
    ITER=$(expr $ITER + 1)
done

if [[ -z "$MAX_FUNCS" ]] ; then
    if [[ "$NUM_FUNCS" -ne "0" ]] ; then
        FAILED="The function(s) that failed are in $FF"
    fi
else
    if [[ $P -ne $Q ]] ; then
        FF=$(grep "processing ending" $BOLT_LOG | sed -e "s/BOLT-INFO: processing ending on \(.*\)/\1/g" | tail -1)
        FAILED="The item that failed is $FF @ $Q"
    fi
fi

if [[ -n "$FAILED" ]] ; then
    echo "$FAILED"
    echo "To reproduce, run: $BOLT $BOLT_OPTIONS $INPUT_BINARY $SEARCH_OPT -o $OPTIMIZED_BINARY"
else
    echo "Unable to reproduce bug."
fi

rm $OPTIMIZED_BINARY $OUTPUT_FILE $BOLT_LOG