# HG changeset patch # User mpm@selenic.com # Date 1116095234 28800 # Node ID 47c9a869adee981f852af88af5d5cba4c7d1e62f # Parent ce080e8eccd7afffcf5c466e0102d17406413d1c Add mdiff.patches to speed up applying thousands of patches to the manifest diff -r ce080e8eccd7 -r 47c9a869adee mercurial/mdiff.py --- a/mercurial/mdiff.py Sat May 14 10:13:49 2005 -0800 +++ b/mercurial/mdiff.py Sat May 14 10:27:14 2005 -0800 @@ -1,6 +1,7 @@ #!/usr/bin/python -import difflib, struct -from cStringIO import StringIO +import difflib, struct, mmap + +devzero = file("/dev/zero") def unidiff(a, ad, b, bd, fn): if not a and not b: return "" @@ -52,6 +53,74 @@ return "".join(bin) +# This attempts to apply a series of patches in time proportional to +# the total size of the patches, rather than patches * len(text). This +# means rather than shuffling strings around, we shuffle around +# pointers to fragments with fragment lists. +# +# When the fragment lists get too long, we collapse them. To do this +# efficiently, we do all our operations inside a buffer created by +# mmap and simply use memmove. This avoids creating a bunch of large +# temporary string buffers. + +def patches(a, bins): + if not bins: return a + + plens = [len(x) for x in bins] + pl = sum(plens) + bl = len(a) + pl + tl = bl + bl + pl # enough for the patches and two working texts + b1, b2 = 0, bl + m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE) + + # load our original text + m.write(a) + frags = [(len(a), b1)] + + # copy all the patches into our segment so we can memmove from them + pos = b2 + bl + m.seek(pos) + for p in bins: m.write(p) + + def pull(dst, src, l): # pull l bytes from src + while l: + f = src.pop(0) + if f[0] > l: # do we need to split? + src.insert(0, (f[0] - l, f[1] + l)) + dst.append((l, f[1])) + return + dst.append(f) + l -= f[0] + + def collect(buf, list): + start = buf + for l, p in list: + m.move(buf, p, l) + buf += l + return (buf - start, start) + + for plen in plens: + # if our list gets too long, execute it + if len(frags) > 128: + b2, b1 = b1, b2 + frags = [collect(b1, frags)] + + new = [] + end = pos + plen + last = 0 + while pos < end: + p1, p2, l = struct.unpack(">lll", m[pos:pos + 12]) + pull(new, frags, p1 - last) # what didn't change + pull([], frags, p2 - p1) # what got deleted + new.append((l, pos + 12)) # what got added + pos += l + 12 + last = p2 + frags = new + frags # what was left at the end + + t = collect(b2, frags) + + return m[t[1]:t[1] + t[0]] + def patch(a, bin): last = pos = 0 r = [] diff -r ce080e8eccd7 -r 47c9a869adee mercurial/revlog.py --- a/mercurial/revlog.py Sat May 14 10:13:49 2005 -0800 +++ b/mercurial/revlog.py Sat May 14 10:27:14 2005 -0800 @@ -41,7 +41,7 @@ n = 0 i = self.opener(self.indexfile).read() s = struct.calcsize(indexformat) - for f in range(0, len(i), s): + for f in xrange(0, len(i), s): # offset, size, base, linkrev, p1, p2, nodeid e = struct.unpack(indexformat, i[f:f + s]) self.nodemap[e[6]] = n @@ -87,9 +87,6 @@ def diff(self, a, b): return mdiff.textdiff(a, b) - def patch(self, text, patch): - return mdiff.patch(text, patch) - def revision(self, node): if node == nullid: return "" if self.cache and self.cache[0] == node: return self.cache[2] @@ -114,12 +111,14 @@ last = self.length(base) text = decompress(data[:last]) + bins = [] for r in xrange(base + 1, rev + 1): s = self.length(r) - b = decompress(data[last:last + s]) - text = self.patch(text, b) + bins.append(decompress(data[last:last + s])) last = last + s + text = mdiff.patches(text, bins) + (p1, p2) = self.parents(node) if node != hash(text, p1, p2): raise "integrity check failed on %s:%d" % (self.datafile, rev) @@ -301,14 +300,12 @@ # helper to reconstruct intermediate versions def construct(text, base, rev): - for r in range(base + 1, rev + 1): - b = decompress(chunks[r]) - text = self.patch(text, b) - return text + bins = [decompress(chunks[r]) for r in xrange(base + 1, rev + 1)] + return mdiff.patches(text, bins) # build deltas deltas = [] - for d in range(0, len(revs) - 1): + for d in xrange(0, len(revs) - 1): a, b = revs[d], revs[d + 1] n = self.node(b)