# HG changeset patch # User mpm@selenic.com # Date 1117251514 28800 # Node ID 697f05bfe976294479e28531fdece9a379d7e7e7 # Parent 767916673e16a6f3f9833e2a750dc780b4c46592 Improved binary diff from Christopher Li This is more intelligent/efficient by combining neighboring inserts, replaces and deletes. Passes test of converting kernel repo, but doesn't appear to substantially affect compression or performance. diff -r 767916673e16 -r 697f05bfe976 mercurial/mdiff.py --- a/mercurial/mdiff.py Fri May 27 13:30:35 2005 -0800 +++ b/mercurial/mdiff.py Fri May 27 19:38:34 2005 -0800 @@ -19,28 +19,25 @@ def sortdiff(a, b): la = lb = 0 - + lena = len(a) + lenb = len(b) while 1: - if la >= len(a) or lb >= len(b): break - if b[lb] < a[la]: - si = lb - while lb < len(b) and b[lb] < a[la] : lb += 1 - yield "insert", la, la, si, lb - elif a[la] < b[lb]: - si = la - while la < len(a) and a[la] < b[lb]: la += 1 - yield "delete", si, la, lb, lb - else: + am, bm, = la, lb + while lb < lenb and la < len and a[la] == b[lb] : la += 1 lb += 1 - - if lb < len(b): - yield "insert", la, la, lb, len(b) - - if la < len(a): - yield "delete", la, len(a), lb, lb + if la>am: yield (am, bm, la-am) + while lb < lenb and b[lb] < a[la]: lb += 1 + if lb>=lenb: break + while la < lena and b[lb] > a[la]: la += 1 + if la>=lena: break + yield (lena, lenb, 0) def diff(a, b, sorted=0): + if not a: + s = "".join(b) + return s and (struct.pack(">lll", 0, 0, len(s)) + s) + bin = [] p = [0] for i in a: p.append(p[-1] + len(i)) @@ -48,13 +45,16 @@ if sorted: d = sortdiff(a, b) else: - d = difflib.SequenceMatcher(None, a, b).get_opcodes() - - for o, m, n, s, t in d: - if o == 'equal': continue - s = "".join(b[s:t]) - bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) - + d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() + la = 0 + lb = 0 + for am, bm, size in d: + s = "".join(b[lb:bm]) + if am > la or s: + bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) + la = am + size + lb = bm + size + return "".join(bin) def patchtext(bin):