#!/bin/sh

#
# This script parses the output of "fossil diff" and uses an external
# graphical difference tool like mgdiff or xxdiff to show you the
# differences one file at a time.
#
#                                             -- Paul Serice
#

FOSSIL_MGDIFF_VERSION=2.0.11

: ${MGDIFF:=/usr/bin/mgdiff}
: ${TMP:=/tmp}

# MSYS hijacks "/tmp", "$TMP", and "$TEMP" redirecting all to
# "%LOCALAPPDATA%\Temp" which breaks native tools that don't do the
# same redirection (which is almost all of them).  A further
# complication is that emac's (ediff) command has trouble parsing file
# names under Windows that start with the drive (like "C:/...").  So,
# to work around all of this, just use the current working directory
# if MSYS is detected.
if [ x"$MSYSTEM" != x ] ; then
    TMP="."
fi

# Because this script has to clean up temporary files, an EXIT trap
# called clean_up() is established.  After cleaning up the files,
# clean_up() calls "exit $exit_rv".  So do not exit the program via
# "exit n"; instead, do "exit_rv=n; exit".  This will trigger the EXIT
# trap and pass n back to the shell.
exit_rv=0

##
# Print usage message.  If the output file is specified as "1", the
# error message is printed to stdout (the usual file associated with
# the "1" file descriptor); otherwise, the error message is printed to
# stderr.
#
# @param[in] fd pseudo file descriptor
usage() {
    u_fd="$1"

    msg="\

Usage: $progname [options] [-r <rev1>] [-r <rev2>] [<files>|<dirs>]

    Options:

            -d : Print debugging information including the command
                 used to generate the diff.

      -g <gui> : Graphical difference program to use.

            -h : Print help message.

            -n : No gui is used.  Only the log is printed to stdout.

      -r <rev> : Specify one or two revisions.  To specify two
                 revisions, use the -r flag twice or use one of \"-r
                 <rev1>:<rev2>\" or \"-r <rev1>..<rev2>\".

            -v : Print version.

      -w <file>: Walk the <file> by diffing each version with its
                 predecessor.

    Purpose:

        The purpose of this program is to help automate the repetitive
        task of running recursive differences over a fossil source
        code repository.

    Examples:

        # Recursively diff the working tree with the checked out revision.
        fossil-mgdiff

        # Diff against a specific tag.
        fossil-mgdiff -r foo
"
    if [ "$u_fd" -eq 1 ] ; then
        echo "$msg"
    else
        echo "$msg" 1>&2
    fi
}

##
# Verify that a program is in the user's path and is executable.
#
# @param[in] pname program name
verify_exec() {
    type "$1" 1>/dev/null 2>&1
    if [ $? -ne 0 ] ; then
        echo "*** $progname: Error: Unable to find executable for \"$1\"." 1>&2
        exit_rv=1
        exit
    fi
}

##
# Trap handler for cleaning up temporary files.
clean_up() {
    rm -f "$tmp_file_1"
    rm -f "$tmp_file_2"
    exit $exit_rv
}

##
# Set "tmp_file_1" and "tmp_file_2 to be the name of two unique
# temporary files.
getunique() {
    old_umask=`umask`
    umask 077

    tmp_file_1=`mktemp -q "$TMP/${tmp_base}XXXXXX"`
    tmp_file_1=`realpath "$tmp_file_1"`
    if [ $? -ne 0 ] ; then
        echo "*** $progname:" \
             "Error: Unable to allocate a necessary temporary file." 1>&2
        exit_rv=1
        exit
    fi

    tmp_file_2=`mktemp -q "$TMP/${tmp_base}XXXXXX"`
    tmp_file_2=`realpath "$tmp_file_2"`
    if [ $? -ne 0 ] ; then
        echo "*** $progname:" \
             "Error: Unable to allocate a necessary temporary file." 1>&2
        exit_rv=1
        exit
    fi
    umask $old_umask
}

##
# This function takes a combined revision of the form "rev1:rev2" or
# "rev1..rev2" and writes "rev1" to stdout.
#
# @param[in] crev combined revision
# @return first revision
get_first_revision() {
    gfr_crev="$1"

    "$NAWK" -v crev="$gfr_crev" 'BEGIN {\

        # Split the string both ways.
        colon_count = split(crev, colon_parts, /:/)
        dotdot_count = split(crev, dotdot_parts, /\.\./)

        # The first separator wins so return the shorter first part.
        if ((colon_count >= 2) && (dotdot_count >=2)) {
            if (length(colon_parts[1]) < length(dotdot_parts[1])) {
                print colon_parts[1]
            } else {
                print dotdot_parts[1]
            }
            exit
        }

        # Only the colon mathched.
        if (colon_count >= 2) {
            print colon_parts[1]
            exit
        }

        # Only the dotdot matched.
        if (dotdot_count >= 2) {
            print dotdot_parts[1]
            exit
        }

        # No match.
        print crev
    }'
}

##
# This function takes a combined revision of the form "rev1:rev2" or
# "rev1..rev2" and writes "rev2" to stdout.
#
# @param[in] crev combined revision
# @return second revision
get_second_revision() {
    gsr_crev="$1"

    "$NAWK" -v crev="$gsr_crev" 'BEGIN {\

        # Split the string both ways.
        colon_count = split(crev, colon_parts, /:/)
        dotdot_count = split(crev, dotdot_parts, /\.\./)

        # The first separator wins so return the complement of the
        # shorter first part.
        if ((colon_count >= 2) && (dotdot_count >=2)) {
            if (length(colon_parts[1]) < length(dotdot_parts[1])) {
                print substr(crev, length (colon_parts[1]) + 2)
            } else {
                print substr(crev, length (dotdot_parts[1]) + 3)
            }
            exit
        }

        # Only the colon mathched.
        if (colon_count >= 2) {
            print substr(crev, length (colon_parts[1]) + 2)
            exit
        }

        # Only the dotdot matched.
        if (dotdot_count >= 2) {
            print substr(crev, length (dotdot_parts[1]) + 3)
            exit
        }

        # No match.
        print ""
    }'
}

##
# This functions gets the comment for a particular revision and writes
# it to stdout.
#
# @param[in] rev revision
# @return comment for revision printed to stdout
get_comment() {
    rev="$1"

    fossil artifact "$rev" \
      | "$NAWK" '/^C / {
            # Fossil escapes all spaces and new line characters with
            # "\s" and "\n" respectively.  We unescape them before
            # printing the result.
            comment = $2
            gsub(/\\s/, " ", comment)
            gsub(/\\n/, "\n", comment)
            print comment
            exit
        }'
}

##
# Get the sha1 of the commit and write it to stdout.
#
# @param[in] commit commit
# @return sha1 of commit
get_sha1_of_commit() {
    gsc_commit="$1"

    fossil timeline ancestors "$gsc_commit" -n 1 \
           | awk '/^[0-9]/ {
                 print substr($2, 2, length($2) - 2)
                 exit
             }'
}

##
# Convert the commit to a branch and write the result to stdout.  This
# function does not attempt to do anything special with the
# pseudo-commit "current".
#
# @param[in] commit commit
# @return branch associated with commit
branch_of_commit() {
    boc_commit="$1"
    fossil tag --raw list "$boc_commit" 2>/dev/null \
      | "$NAWK" '/^branch=/ {
            sub(/branch=/, "")
            print
            exit
        }'
}

##
# This function cats the revison of a file to stdout.
#
# @param[in] rev revision of file
# @param[in] fname file name
# @return revision of file printed to stdout
fossilcat() {
    fossilcat_rev="$1"
    fossilcat_fname="$2"

    # Get the sha1 value for the commit object for this revision.
    fossilcat_commit_sha1=`
        fossil timeline ancestors "$fossilcat_rev" -n 1 2>/dev/null \
          | "$NAWK" 'NR > 1 {
                print substr($2, 2, length($2) - 2)
                exit
            }'`
    if [ x"$fossilcat_commit_sha1" = x ] ; then
        echo "*** $progname: Error: Invalid commit: $fossilcat_rev" 1>&2
        exit_rv=1
        exit
    fi

    # Get the sha1 value for the object that holds the content of the file.
    fossilcat_obj_sha1=`
        fossil artifact "$fossilcat_commit_sha1" 2>/dev/null \
          | "$NAWK" -v fname="$fossilcat_fname" '
            BEGIN {
                # The "fossil artifact" prints file names with spaces
                # by replacing each space with a "\s" escape sequence.
                # This actually makes it much easier to extract file
                # names from the output of "fossil artifact" by simply
                # doing the same with our file name.
                gsub(/ /, "\\\\s", fname)
            }
            {
                # When we find an exact match, print the sha-1 value.
                if (($1 == "F") && ($2 == fname)) {
                    print $3
                    exit
                }
            }'`
    if [ x"$fossilcat_obj_sha1" = x ] ; then
        echo "*** $progname: Error: Invalid file: $fossilcat_fname" 1>&2
        exit_rv=1
        exit
    fi

    # Print the file contents for this version to stdout.
    fossil artifact "$fossilcat_obj_sha1" 2>/dev/null
}

##
# Walk all the commits associated with a particular file by
# recursively calling this script in order to compare the file before
# and after the commit.
#
# @param[in] fname file name
walk_single_file() {
    wsf_fname="$1"

    # Make sure the user only passed in a single file.
    if [ $# -ne 1 ] ; then
        echo "***" 1>&2
        echo "*** $progname: Error: Exactly one file must be specified: $@" 1>&2
        echo "***" 1>&2
        exit_rv=1
        exit
    fi

    # Find the revision associated with the previous commit.
    fossil finfo "$wsf_fname" \
      | "$NAWK" '/^[0-9]/ {print substr($2, 2, length($2) - 2)}' \
      | {
            while read prev ; do

                # If $curr is set, show the log file entry for that commit.
                if [ x"$curr" != x ] ; then
                    printf "%s%s\n" \
                           "------------------------------------" \
                           "------------------------------------"
                    comment=`get_comment "$curr"`
                    echo "[$curr] $comment"

                    # Diff the single file using the previous and
                    # current revision.
                    if [ x"$usegui" = x"true" ] ; then
                        fossil-mgdiff -r "$prev"${curr:+":$curr"} "$1" \
                                      1>/dev/null 2>&1
                    fi
                fi

                curr="$prev"
            done

            # Print the separator.
            if [ x"$curr" != x ] ; then
                printf "%s%s\n" \
                       "------------------------------------" \
                       "------------------------------------"
            fi
        }
}

##
# This function is the main part of the script.  It runs "fossil diff"
# and parses the output.  It writes a simple log to stdout of the
# files that are NEW, OLD, or DIF (i.e. different).  For the DIF
# files, it invokes the graphical diff program.
diff_revisions() {

    # Determine the "fossil diff" command to use.
    fossil_diff_cmd="fossil diff \
${rev0:+--from \"$rev0\"} \
${rev1:+--to \"$rev1\"} \
${1+\"\$@\"}"

    if [ x"$debug" != x ] ; then
        echo "$fossil_diff_cmd ${1+  [where \$@ = $@]}"
    fi

    eval "$fossil_diff_cmd" \
        | "$NAWK" '/^\+\+\+ / {print substr($0, 5)}
               /^ADDED_BY_MERGE / {print}
               /^ADDED / {print}
               /^DELETED / {print}
               /^MISSING / {print}' \
        | while read fname ; do

        # Skip diffing the added and deleted files, but at least print
        # them so the user will know about them.
        if [ x"${fname#ADDED_BY_MERGE}" != x"$fname" ] ; then
            fname=`echo "$fname" \
                     | "$NAWK" '{sub(/^ADDED_BY_MERGE[ \t]*/, ""); print}'`
            echo "NEW $fname"
            continue
        fi
        if [ x"${fname#ADDED}" != x"$fname" ] ; then
            fname=`echo "$fname" | "$NAWK" '{sub(/^ADDED[ \t]*/, ""); print}'`
            echo "NEW $fname"
            continue
        fi
        if [ x"${fname#DELETED}" != x"$fname" ] ; then
            fname=`echo "$fname" | "$NAWK" '{sub(/^DELETED[ \t]*/, ""); print}'`
            echo "OLD $fname"
            continue
        fi
        if [ x"${fname#MISSING}" != x"$fname" ] ; then
            fname=`echo "$fname" | "$NAWK" '{sub(/^MISSING[ \t]*/, ""); print}'`
            echo "OLD $fname"
            continue
        fi

        echo "DIF $fname"

        # Handle diffing two revisions.
        if [ \( x"$rev0" != x \) -a \( x"$rev1" != x \) ] ; then

            # Copy rev0 and rev1 to a temporary files.
            if [ x"$rev0" != x ] ; then
                fossilcat "$rev0" "$fname" > "$tmp_file_1"
            fi
            if [ x"$rev1" != x ] ; then
                fossilcat "$rev1" "$fname" > "$tmp_file_2"
            fi

            # Pop up the graphical diff.
            if [ x"$usegui" = x"true" ] ; then

                # Generate the command.
                dr_cmd="\"$MGDIFF\""
                if [ x"$QUIET_OPT" != x ] ; then
                    dr_cmd="$dr_cmd \"$QUIET_OPT\""
                fi
                if [ x"$TITLE_1_OPT" != x ] ; then
                    dr_cmd="$dr_cmd \"$TITLE_1_OPT\""
                    dr_cmd="$dr_cmd \"$fname (rev $rev0)\""
                fi
                if [ x"$TITLE_2_OPT" != x ] ; then
                    dr_cmd="$dr_cmd \"$TITLE_2_OPT\""
                    dr_cmd="$dr_cmd \"$fname (rev $rev1)\""
                fi
                # Native Windows executables need native Windows paths.
                # The test for native Windows executable is just if it
                # ends with ".exe" or ".EXE".
                if [ \( x"${mgdiff_basename%.exe}" != x"$mgdiff_basename" \) \
                  -o \( x"${mgdiff_basename%.EXE}" != x"$mgdiff_basename" \) ]
                then
                    tmp_file_1=$(cygpath -w "$tmp_file_1")
                    tmp_file_2=$(cygpath -w "$tmp_file_2")
                fi
                dr_cmd="$dr_cmd \"$tmp_file_1\" \"$tmp_file_2\""

                # Evaluate command.
                eval "$dr_cmd"
            fi

        else

            #
            # Here, we force all the GUIs to use a temporary file even
            # if they claim to be able to handle input on stdin.  The
            # reason for this is that most of the GUIs leave temporary
            # files behind in /tmp when they exit unexpectedly.  By
            # using the temporary files created by this script, they
            # are virtually guaranteed to be cleaned up in all cases.
            #

            curr_sha1=`get_sha1_of_commit current`

            if [ x"$rev0" != x ] ; then
                title_rev="$rev0"
            else
                # By default you diff against what was checked out.
                # Determine the current branch name from the current
                # SHA-1.
                title_rev=`branch_of_commit "$curr_sha1"`
            fi

            # The convention that "diff" uses is that the old file is
            # on the left and the new file is on the right.  We use
            # this to display the files.
            file_first="$tmp_file_1"
            file_second="$fname"
            if [ ! -r "$file_second" ] ; then
                file_second="/dev/null"
            fi
            file_second=`realpath "$file_second"`

            # Copy rev0 to a temporary file.
            if [ x"$rev0" != x ] ; then
                fossilcat "$rev0" "$fname" > "$tmp_file_1"
            else
                fossilcat "$curr_branch" "$fname" > "$tmp_file_1"
            fi

            # Pop up the graphical diff.
            if [ x"$usegui" = x"true" ] ; then

                # Generate the command.
                dr_cmd="\"$MGDIFF\""
                if [ x"$QUIET_OPT" != x ] ; then
                    dr_cmd="$dr_cmd \"$QUIET_OPT\""
                fi
                if [ x"$TITLE_1_OPT" != x ] ; then
                    dr_cmd="$dr_cmd \"$TITLE_1_OPT\""
                    dr_cmd="$dr_cmd \"$fname (rev $title_rev)\""
                fi
                if [ x"$TITLE_2_OPT" != x ] ; then
                    dr_cmd="$dr_cmd \"$TITLE_2_OPT\" \"$fname\""
                fi
                # Native Windows executables need native Windows paths.
                # The test for native Windows executable is just if it
                # ends with ".exe" or ".EXE".
                if [ \( x"${mgdiff_basename%.exe}" != x"$mgdiff_basename" \) \
                  -o \( x"${mgdiff_basename%.EXE}" != x"$mgdiff_basename" \) ]
                then
                    file_first=$(cygpath -w "$file_first")
                    file_second=$(cygpath -w "$file_second")
                fi
                dr_cmd="$dr_cmd \"$file_first\" \"$file_second\""

                # Evaluate the command.
                eval "$dr_cmd"

            fi
        fi

    done
}

#
# Script Starts Here !!!
#

progname="fossil-mgdiff.sh"
verify_exec "basename"
progname=`basename "$0"`

# You have to generate the temporary file names before registering the
# trap handler.
tmp_base="fossil-mgdiff-$$."
getunique

# Signal Trap handler to clean up temporary files in "$TMP".
trap 'clean_up' HUP INT QUIT TERM EXIT
if [ $? -ne 0 ] ; then
    echo "$progname: Unable to register signal handler." >&2
    exit 1
fi

# Use GNU awk if possible.
for NAWK in gawk nawk awk ; do
    type "$NAWK" 1>/dev/null 2>&1
    if [ $? -eq 0 ] ; then
        break
    fi
done

verify_exec "echo"
verify_exec "expr"
verify_exec "fossil"
verify_exec "$MGDIFF"
verify_exec "mktemp"
verify_exec "$NAWK"
verify_exec "printf"
verify_exec "realpath"
verify_exec "rm"

mgdiff_basename=`basename "$MGDIFF"`

# Get fossil revision(s) to use.
usegui="true"
while getopts "dg:hnr:vw" OPT ; do
    case "$OPT" in

        d)  debug="true"
            ;;

        g)  MGDIFF="$OPTARG"
            export MGDIFF
            ;;

        h)  usage 1
            exit
            ;;

        n)  usegui="false"
            ;;

        # Allow the user to pass in two revisions at once with the
        # form "-r <rev0>..<rev1>" or "-r <rev0>:<rev1>".
        r)  tmp=`get_first_revision "$OPTARG"`
            if [ x"$tmp" != x ] ; then
                if [ x"$rev0" = x ] ; then
                    rev0="$tmp"
                elif [ x"$rev1" = x ] ; then
                    rev1="$tmp"
                else
                    echo "*** $progname: Error: too many revisions" \
                         "\"$rev0\", \"$rev1\", \"$OPTARG\"" 1>&2
                    exit_rv=1
                    exit
                fi
            fi

            tmp=`get_second_revision "$OPTARG"`
            if [ x"$tmp" != x ] ; then
                if [ x"$rev0" = x ] ; then
                    rev0="$tmp"
                elif [ x"$rev1" = x ] ; then
                    rev1="$tmp"
                else
                    echo "*** $progname: Error: too many revisions" \
                         "\"$rev0\", \"$rev1\", \"$OPTARG\"" 1>&2
                    exit_rv=1
                    exit
                fi
            fi
            ;;

        v)  echo "$progname: ${FOSSIL_MGDIFF_VERSION}"
            exit
            ;;

        # If the user specifies the -w flag and a single file, this
        # script will walk the commit tree and pop up the gui to show
        # how the single file changed after each commit.
        w) walk="true"
            ;;

        \?) usage 2
            exit_rv=1
            exit
            ;;
    esac
done
shift `expr $OPTIND - 1`

#
# Portability issues.
#
#       QUIET_OPT -- How to prevent the gui from starting if there are no diffs.
#     TITLE_1_OPT -- How to override the name of file1.
#     TITLE_2_OPT -- How to override the name of file2.
#
if [ "$mgdiff_basename" = "mgdiff" ] ; then
    QUIET_OPT="-quit"
elif [ "$mgdiff_basename" = "tkdiff" ] ; then
    TITLE_1_OPT="-L"
    TITLE_2_OPT="-L"
elif [ "$mgdiff_basename" = "xdiff" ] ; then
    QUIET_OPT="-D"
elif [ "$mgdiff_basename" = "xxdiff" ] ; then
    QUIET_OPT="-D"
    TITLE_1_OPT="--title1"
    TITLE_2_OPT="--title2"
elif [ "$mgdiff_basename" = "WinMergeU.exe" ] ; then
    QUIET_OPT="/x"
    TITLE_1_OPT="/dl"
    TITLE_2_OPT="/dr"
fi

if [ x"$walk" != x ] ; then
    walk_single_file "$@"
else
    diff_revisions "$@"
fi