annotate mercurial/mdiff.py @ 184:697f05bfe976

Improved binary diff from Christopher Li This is more intelligent/efficient by combining neighboring inserts, replaces and deletes. Passes test of converting kernel repo, but doesn't appear to substantially affect compression or performance.
author mpm@selenic.com
date Fri, 27 May 2005 19:38:34 -0800
parents e6c621a825f2
children 75840796e8e2 afe895fcc0d0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
1 #!/usr/bin/python
127
44538462d3c8 Fix braindamaged import in mdiff.
mpm@selenic.com
parents: 125
diff changeset
2 import difflib, struct, mmap
44538462d3c8 Fix braindamaged import in mdiff.
mpm@selenic.com
parents: 125
diff changeset
3 from mercurial.mpatch import *
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
4
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
5 def unidiff(a, ad, b, bd, fn):
35
9197c26a414b unidiff: punt on comparing empty files
mpm@selenic.com
parents: 0
diff changeset
6 if not a and not b: return ""
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
7 a = a.splitlines(1)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
8 b = b.splitlines(1)
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
9 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
170
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
10
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
11 for ln in xrange(len(l)):
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
12 if l[ln][-1] != '\n':
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
13 l[ln] += "\n\ No newline at end of file\n"
e6c621a825f2 hg diff: fix missing final newline bug
mpm@selenic.com
parents: 127
diff changeset
14
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
15 return "".join(l)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
16
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
17 def textdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
18 return diff(a.splitlines(1), b.splitlines(1))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
19
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
20 def sortdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
21 la = lb = 0
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
22 lena = len(a)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
23 lenb = len(b)
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
24 while 1:
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
25 am, bm, = la, lb
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
26 while lb < lenb and la < len and a[la] == b[lb] :
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
27 la += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
28 lb += 1
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
29 if la>am: yield (am, bm, la-am)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
30 while lb < lenb and b[lb] < a[la]: lb += 1
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
31 if lb>=lenb: break
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
32 while la < lena and b[lb] > a[la]: la += 1
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
33 if la>=lena: break
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
34 yield (lena, lenb, 0)
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
35
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
36 def diff(a, b, sorted=0):
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
37 if not a:
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
38 s = "".join(b)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
39 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
40
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
41 bin = []
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
42 p = [0]
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
43 for i in a: p.append(p[-1] + len(i))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
44
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
45 if sorted:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
46 d = sortdiff(a, b)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
47 else:
184
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
48 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
49 la = 0
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
50 lb = 0
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
51 for am, bm, size in d:
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
52 s = "".join(b[lb:bm])
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
53 if am > la or s:
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
54 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
55 la = am + size
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
56 lb = bm + size
697f05bfe976 Improved binary diff from Christopher Li
mpm@selenic.com
parents: 170
diff changeset
57
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
58 return "".join(bin)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
59
120
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
60 def patchtext(bin):
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
61 pos = 0
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
62 t = []
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
63 while pos < len(bin):
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
64 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
65 pos += 12
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
66 t.append(bin[pos:pos + l])
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
67 pos += l
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
68 return "".join(t)
bae6f0328f63 Add a function to return the new text from a binary diff
mpm@selenic.com
parents: 75
diff changeset
69
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
70 def patch(a, bin):
72
4a6ab4d80dc4 Add an O(m + nlog n) patching extension
mpm@selenic.com
parents: 71
diff changeset
71 return patches(a, [bin])