annotate mercurial/mdiff.py @ 71:47c9a869adee

Add mdiff.patches to speed up applying thousands of patches to the manifest
author mpm@selenic.com
date Sat, 14 May 2005 10:27:14 -0800
parents b3e2ddff0159
children 4a6ab4d80dc4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
1 #!/usr/bin/python
71
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
2 import difflib, struct, mmap
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
3
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
4 devzero = file("/dev/zero")
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
5
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
6 def unidiff(a, ad, b, bd, fn):
35
9197c26a414b unidiff: punt on comparing empty files
mpm@selenic.com
parents: 0
diff changeset
7 if not a and not b: return ""
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
8 a = a.splitlines(1)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
9 b = b.splitlines(1)
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
10 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn, ad, bd))
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
11 return "".join(l)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
12
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
13 def textdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
14 return diff(a.splitlines(1), b.splitlines(1))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
15
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
16 def sortdiff(a, b):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
17 la = lb = 0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
18
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
19 while 1:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
20 if la >= len(a) or lb >= len(b): break
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
21 if b[lb] < a[la]:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
22 si = lb
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
23 while lb < len(b) and b[lb] < a[la] : lb += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
24 yield "insert", la, la, si, lb
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
25 elif a[la] < b[lb]:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
26 si = la
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
27 while la < len(a) and a[la] < b[lb]: la += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
28 yield "delete", si, la, lb, lb
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
29 else:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
30 la += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
31 lb += 1
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
32
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
33 if lb < len(b):
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
34 yield "insert", la, la, lb, len(b)
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
35
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
36 if la < len(a):
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
37 yield "delete", la, len(a), lb, lb
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
38
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
39 def diff(a, b, sorted=0):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
40 bin = []
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
41 p = [0]
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
42 for i in a: p.append(p[-1] + len(i))
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
43
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
44 if sorted:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
45 d = sortdiff(a, b)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
46 else:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
47 d = difflib.SequenceMatcher(None, a, b).get_opcodes()
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
48
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
49 for o, m, n, s, t in d:
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
50 if o == 'equal': continue
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
51 s = "".join(b[s:t])
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
52 bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
53
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
54 return "".join(bin)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
55
71
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
56 # This attempts to apply a series of patches in time proportional to
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
57 # the total size of the patches, rather than patches * len(text). This
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
58 # means rather than shuffling strings around, we shuffle around
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
59 # pointers to fragments with fragment lists.
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
60 #
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
61 # When the fragment lists get too long, we collapse them. To do this
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
62 # efficiently, we do all our operations inside a buffer created by
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
63 # mmap and simply use memmove. This avoids creating a bunch of large
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
64 # temporary string buffers.
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
65
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
66 def patches(a, bins):
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
67 if not bins: return a
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
68
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
69 plens = [len(x) for x in bins]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
70 pl = sum(plens)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
71 bl = len(a) + pl
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
72 tl = bl + bl + pl # enough for the patches and two working texts
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
73 b1, b2 = 0, bl
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
74 m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
75
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
76 # load our original text
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
77 m.write(a)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
78 frags = [(len(a), b1)]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
79
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
80 # copy all the patches into our segment so we can memmove from them
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
81 pos = b2 + bl
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
82 m.seek(pos)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
83 for p in bins: m.write(p)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
84
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
85 def pull(dst, src, l): # pull l bytes from src
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
86 while l:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
87 f = src.pop(0)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
88 if f[0] > l: # do we need to split?
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
89 src.insert(0, (f[0] - l, f[1] + l))
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
90 dst.append((l, f[1]))
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
91 return
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
92 dst.append(f)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
93 l -= f[0]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
94
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
95 def collect(buf, list):
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
96 start = buf
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
97 for l, p in list:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
98 m.move(buf, p, l)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
99 buf += l
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
100 return (buf - start, start)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
101
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
102 for plen in plens:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
103 # if our list gets too long, execute it
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
104 if len(frags) > 128:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
105 b2, b1 = b1, b2
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
106 frags = [collect(b1, frags)]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
107
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
108 new = []
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
109 end = pos + plen
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
110 last = 0
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
111 while pos < end:
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
112 p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
113 pull(new, frags, p1 - last) # what didn't change
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
114 pull([], frags, p2 - p1) # what got deleted
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
115 new.append((l, pos + 12)) # what got added
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
116 pos += l + 12
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
117 last = p2
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
118 frags = new + frags # what was left at the end
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
119
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
120 t = collect(b2, frags)
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
121
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
122 return m[t[1]:t[1] + t[0]]
47c9a869adee Add mdiff.patches to speed up applying thousands of patches to the manifest
mpm@selenic.com
parents: 64
diff changeset
123
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
124 def patch(a, bin):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
125 last = pos = 0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
126 r = []
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
127
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
128 c = 0
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
129 while pos < len(bin):
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
130 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
131 pos += 12
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
132 r.append(a[last:p1])
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
133 r.append(bin[pos:pos + l])
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
134 pos += l
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
135 last = p2
64
b3e2ddff0159 Diff in subdirectories from Jake Edge
mpm@selenic.com
parents: 35
diff changeset
136 c += 1
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
137 r.append(a[last:])
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
138
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
139 return "".join(r)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
140
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
141
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
142
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
143
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
144