#!/usr/bin/env python
# Copyright (C) 2002 Dekel Tsur <dekelts@tau.ac.il>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

#  WARNING:
#  Ldiff might have unwanted effect on the current direcory.
#  It might be wise to backup your files before running it.

version = "0.5"

import getopt,os,sys,re,string,difflib

add_begin = "\\changestart"
add_end = "\\changeend"
del_begin = "\\overstrikeon"
del_end = "\\overstrikeoff"

###########################################################################

def half(L):
    return [L[2*i] for i in xrange((len(L)+1)/2)]

def wdiff(text1, text2, show_deleted):
    text1 = re.split(r"([\s~]+)", text1)
    text2 = re.split(r"([\s~]+)", text2)

    text1b = half(text1)
    text2b = half(text2)
    L = difflib.SequenceMatcher(None, text1b, text2b).get_opcodes()
    L = filter(lambda x:x[0] != 'equal', L)

    # Merge two adjacent changed blocks if the common block between them is small
    i = 0
    while i < len(L)-1:
        x = L[i]
        y = L[i+1]
        if (x[0] == 'replace' or y[0] == 'replace') and y[1] <= x[2]+2 and \
               (x[2]-x[1]+y[2]-y[1] > y[1]-x[2] or x[4]-x[3]+y[4]-y[3] > y[3]-x[4]):
            L[i] = ('replace', x[1], y[2], x[3], y[4])
            del L[i+1]
        else:
            i += 1

    for x in L:
        type = x[0]
        y = map(lambda a:2*a, x[1:])
        if type != 'insert' and show_deleted:
            deleted_text = string.join(text1[y[0]:y[1]-1])
            # the -1 removes the space at the end of the deleted text
            if deleted_text != '':
                deleted_text = '%\n'+del_begin+'{}'+deleted_text+'%\n'+del_end+'{} '
        else:
            deleted_text = ""

        if type != 'delete':
            text2[y[2]] = deleted_text+'%\n'+add_begin+'{}'+text2[y[2]]
            text2[y[3]-1] = '%\n'+add_end+'{}'+text2[y[3]-1]
        else:
            text2[y[2]] = deleted_text+text2[y[2]]

    return string.join(text2, "")

###########################################################################
math_rexp = r"\$|\\\(|\\\)|\\\[|\\\]|\\(?:begin|end)\{(?:equation|eqnarray|align)\*?\}"

def system(command):
    print "Running "+command
    return os.system(command)

def read_file(file, revision):
    if revision != "":
        tmpname = "ldiff_tmp_" + file
        if revision == "-1":
            revision_flag = ""
        else:
            revision_flag = "-r"+revision
        system("cvs diff %s -u %s | patch -R -o%s" % (revision_flag, file, tmpname))
        lines = read_file2(tmpname)
        os.remove(tmpname)
        return lines
    else:
        return read_file2(file)

def read_file2(file):
    if file[-3:] == "lyx":
    # If the file is a lyx file, convert it to latex
        lyx = os.getenv("LYX")
        if lyx == "":
            lyx = "lyx"
        system(lyx+" -e latex " + file)
        file2 = file[:-3]+"tex"
        lines = read_file3(file2)
        os.remove(file2)
        return lines
    else:
        return read_file3(file)

def read_file3(file):
    fh = open(file)
    lines = fh.readlines()
    fh.close()
    return lines

def get_documentclass(lines):
    for line in lines:
        mo = re.search(r"\\document(class|style).*{(.*)}", line)
        if mo:
            return mo.group(2)
    return "article"

def find_rexp(rexp, lines):
    for i in xrange(len(lines)):
        if re.search(rexp, lines[i]):
            return i
    return -1

def preprocess(lines, separate_title):
# Clean input files
#   lines = lines of the input file
#   separate_title = if True, then include the \title..\maketitle part in
#                    the second string
# It returns 3 strings: the first contains the preamble line,
# the second may contain the \title...\maketitle part (according to the
# value of separate_title), and the third contain the body of the file

    preamble_end = find_rexp(r"\\begin{document}", lines)
    preamble_text = string.join(lines[:preamble_end], "")
    text_begin = preamble_end

    title_text = ""
    if separate_title:
        title_end = find_rexp(r"\\maketitle", lines)
        if title_end > preamble_end:
            title_text = string.join(lines[preamble_end:title_end], "")
            text_begin = title_end

    text = ""
    for i in xrange(text_begin, len(lines)):
        # Remove comments
        line = re.sub(r"(?<!\\)%.*", r"%", lines[i])
        # Put %\n before commands
        line = re.sub(r"(\\(\w+){)", "%\n\\1", line)
        # Put %\n after \emph{
        line = re.sub(r"(\\(emph|textbf){)", "\\1%\n", line)
        text = text + line

    x = re.split("("+math_rexp+")", text)
    math_mode = 0
    for i in xrange(len(x)):
        y = x[i]
        if i % 2:
            math_mode = not math_mode
            if math_mode:
                x[i] = x[i]+" "
            else:
                x[i] = " "+x[i]
        elif math_mode:
            # Replace x^a by x^{a}.
            # This should give smaller diffs if x^a is replaced by x^b 
            x[i] = re.sub(r"(?<!\\)([_^])(\\\w+|[^{}\\])", r"\1{\2}", x[i])
            ##x[i] = re.sub(r"(\\\w+|[^{}\\])([_^])", r"{\1}\2", x[i])
            # Add some space in order to reduce the diff
            # We try to add spaces in "safe" positions, but some spaces are
            # added in unwanted places and then removed later
            x[i] = re.sub(r"(?<!\\)([=<>+-,()}\\])", r" \1", x[i])
            x[i] = re.sub(r"(?<!\\)([=<>+-,(){}])", r"\1 ", x[i])
            # Remove some of the spaces that were added above
            x[i] = re.sub(r"\\(begin|end|label|ref|cite|text|textrm|mbox){ (\w+) }", r"\\\1{\2}", x[i])
            x[i] = re.sub(r"} {", r"}{", x[i])
        else:
            # put space between \begin{<env>} and the optional argument
            x[i] = re.sub(r"(\\begin{\w+})\[", r"\1 [", x[i])

    return preamble_text, title_text, string.join(x, "")

invert_array = {"\(":"\)", "\)":"\(","\[":"\]", "\[":"\]" }
def invert_command(command):
    if invert_array.has_key(command):
        return invert_array[command]
    elif command[:6] == "\\begin":
        return "\\end"+command[6:]
    elif command[:4] == "\\end":
        return "\\begin"+command[4:]
    else:
        return command


def postprocess(text):
    # Try to change the code to prevent latex errors

    x = re.split("("+math_rexp+"|\\"+del_begin+"\\{\\}|\\"+del_end+"\\{\\})", text)
    math_mode = 0
    math_mode_save = 0
    math_mode_diff = 0
    delete_mode = 0
    delete_mode_start = 0
    delete_math_balance = 0
    last_math_command = ""
    for i in xrange(len(x)):
        if i % 2:
            # x[i] is either empty, mathmode start/end command,
            # or delete block start/end command
            if x[i] == del_begin+"{}":
                delete_mode = 1
                delete_mode_start = i
                delete_math_balance = 0
                math_mode_save = math_mode
            elif x[i] == del_end+"{}":
                delete_mode = 0
                if math_mode_save != math_mode:
                    # We need to make sure that the mode at the end of the deleted
                    # block is the same as the beginning
                    x[i] = "{}"+invert_command(last_math_command)+x[i]
                    math_mode = math_mode_save
                elif x[delete_mode_start+1:i] == [""]*(i-delete_mode_start-1):
                    # There is nothing in the deleted block, so remove the
                    # delete block start & end commands
                    x[delete_mode_start] = x[i] = ""
            elif x[i] != "": # math start/end
                if math_mode and delete_mode and ( \
                   math_mode_diff != 0 or \
                   invert_command(x[i]) != last_math_command ):
                    # If we exit from math mode inside a deleted block, and it is
                    # not "safe", (namely the math block doesn't have balanced
                    # brackets, or the command used to exit math mode does
                    # not match the command in which the math block begins)
                    # then use mbox to go into text mode
                    x[i] = ""
                    x[i+1] = "\mbox{"+x[i+1]+"}"
                    if i+2 < len(x) and x[i+2] != del_end+"{}":
                        x[i+2] = ""
                elif x[i+1:i+3] == ["", del_end+"{}"] and \
                     math_mode_save == math_mode:
                    # The deleted block is about to end, and
                    # the current math command will cause a mismatch of modes
                    x[i] = ""
                else:
                    math_mode = not math_mode
                    last_math_command = x[i]
                    math_mode_diff = 0
        else:
            # check balance of brackets 
            diff = len(re.findall(r"(?<!\\){", x[i])) - \
                   len(re.findall(r"(?<!\\)}", x[i]))
            if delete_mode:
                # Remove labels in deleted blocks as they may appear
                # in changed block
                x[i] = re.sub(r"\\(label)\{.*?\}", "", x[i])
                if diff > 0:
                    # If the number of '{' is greater than the number of '}'
                    # add diff closing brackets at the end
                    x[i] += "}"*diff
                elif diff < 0:
                    # If the number of '}' is greater than the number of '{'
                    # remove the first -diff brackets
                    x[i] = re.sub(r"(?<!\\)}", "", x[i], -diff)
            elif math_mode:
                math_mode_diff += diff

    return string.join(x, "")


def usage():
    print """Usage: ldiff [options] [<file1>] <file2>
Show the differences between two latex/lyx files.
ldiff <file1> <file2> to compare two files.
ldiff <file> to compare <file> with the most recent version checked into CVS.
ldiff -r<rev> <file> to compare <file> with revision <rev> of <file>.
ldiff -r<rev1> -r<rev2> <file> to compare revision <rev1> with revision <rev2>.

Options:
    -h, --help                  This information
    -v, --version               Output version information
    -b, --nocolor               Do not colorize the changed text
    -d, --nodeleted             Don't show deleted text
    -t, --notitle               Don't show differences in the title
    -l, --latex                 Produce only the latex file
    -p, --nodvipost             Don't use dvipost             
    -s, --separation            Separation between change bars and text
                                (default value = -50)
"""

_options = ["help", "version", "nocolor", "nodeleted", "notitle", "latex",\
            "nodvipost", "separation="]
try:
    opts, args = getopt.getopt(sys.argv[1:], "hvbdtlps:r:", _options)
except getopt.error:
    usage()
    sys.exit(1)

rev_list = []
deleted = 1
colorize = 1
onlylatex = 0
notitle = 0
dvipost = 1
sep = "-50"
for o, a in opts:
    if o in ("-h", "--help"):
        usage()
        sys.exit()
    if o in ("-v", "--version"):
        print "ldiff, version "+version
        sys.exit()
    if o in ("-d", "--nodeleted"):
        deleted = 0
    if o in ("-b", "--nocolor"):
        colorize = 0
    if o in ("-t", "--notitle"):
        notitle = 1
    if o in ("-l", "--latex"):
        onlylatex = 1
    if o in ("-p", "--nodvipost"):
        dvipost = 0
    if o in ("-s", "--separation"):
        sep = a
    if o == "-r":
        rev_list.append(a)

if len(args) == 2:
    if rev_list != []:
        usage()
        sys.exit(1)
    text1 = read_file(args[0], "")
    text2 = read_file(args[1], "")
    filebase = args[1][:-4]+"-diff"
elif len(args) == 1:
    if len(rev_list) == 0:
        rev_list = ["-1", ""]
    elif len(rev_list) == 1:
        rev_list += [""]
    text1 = read_file(args[0], rev_list[0])
    text2 = read_file(args[0], rev_list[1])
    filebase = args[0][:-4]+"-diff"
else:
    usage()
    sys.exit()

if get_documentclass(text1) != get_documentclass(text2):
    notitle = 1
preamble1, title1, text1 = preprocess(text1, notitle)
preamble2, title2, text2 = preprocess(text2, notitle)

lines = string.split(wdiff(text1, text2, deleted), "\n")
filetex = filebase+".tex"

fh = open(filetex, 'w')
fh.write(preamble2)

if dvipost:
    fh.write(r"""
\usepackage{dvipost}
\dvipost{cbexp=0pt}
\dvipost{cbsep=%spt}
""" % sep)
    if colorize:
        fh.write(r"""
\dvipost{cbstart color push Blue}
\dvipost{cbend color pop}
\dvipost{osstart color push Red}
\dvipost{osend color pop}
""")
else:
    fh.write(r"""
\newcommand{%s}{\special{color push Blue}}
\newcommand{%s}{\special{color push Black}}
\newcommand{%s}{\special{color push Red}}
\newcommand{%s}{\special{color push Black}}
""" % (add_begin,add_end,del_begin,del_end))

fh.write(r"""
\makeatletter
\let\ldiff@old@maketitle=\maketitle
\let\ldiff@old@thanks=\thanks
\let\ldiff@old@footnote=\footnote
\let\ldiff@old@endfigure=\endfigure
\let\ldiff@old@endtable=\endtable
\def\maketitle{\ldiff@old@maketitle%s%s}
\def\thanks#1{\ldiff@old@thanks{#1%s}%s}
\long\def\footnote#1{\ldiff@old@footnote{#1%s}%s}
\def\endfigure{%s\ldiff@old@endfigure%s}
\def\endtable{%s\ldiff@old@endtable%s}
\makeatother
""" % (( (del_end+add_end)*2+"{}",)*10) )

fh.write(title2)
text = ""
for line in lines:
    line = re.sub(r"(?<!\\)%(.*)"+'\\'+del_end, r"\1"+del_end, line)
    line = re.sub(r"(?<!\\)%(.*)", r"\1%", line)
    text = text+line+"\n"

if deleted:
    text = postprocess(text)
fh.write(text)
fh.close()

if onlylatex:
    sys.exit()

filedvi = filebase+".dvi"
fileps = filebase+".ps"

latex_command = "latex --interaction=batchmode "
os.system(latex_command+filetex)
os.system("bibtex "+filebase)
os.system(latex_command+filetex)
os.system(latex_command+filetex)
if dvipost:
    os.system("dvipost %s %s" % (filedvi, filedvi))
os.system("dvips %s -o %s" % (filedvi, fileps))
print "\nLatex Warnings:"
os.system("grep Warning "+filebase+".log")
print "\nLatex Errors:"
os.system("grep ^! "+filebase+".log")
os.system("rm %s.{aux,bbl,blg,dvi,log}" % filebase)
