# HG changeset patch # User mpm@selenic.com # Date 1115714449 28800 # Node ID 93e868fa0db86845c07164f39a145a81de5a17f5 # Parent f2b2d5daec30981e6757a6e4c5a4ac497ae0bc10 Add changegroup support diff -r f2b2d5daec30 -r 93e868fa0db8 hg --- a/hg Tue May 10 00:34:57 2005 -0800 +++ b/hg Tue May 10 00:40:49 2005 -0800 @@ -8,12 +8,13 @@ # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. -# the psyco compiler makes commits about twice as fast -try: - import psyco - psyco.full() -except: - pass +# the psyco compiler makes commits a bit faster +# and makes changegroup merge about 20 times slower! +# try: +# import psyco +# psyco.full() +# except: +# pass import sys, os, time from mercurial import hg, mdiff, fancyopts @@ -175,6 +176,15 @@ prev = repo.changelog.parents(node)[0] diff(None, prev, node) +elif cmd == "debugchangegroup": + newer = repo.newer(repo.changelog.lookup(args[0])) + cg = repo.changegroup(newer) + sys.stdout.write(cg) + +elif cmd == "debugaddchangegroup": + data = sys.stdin.read() + repo.addchangegroup(data) + elif cmd == "addremove": (c, a, d) = repo.diffdir(repo.root, repo.current) repo.add(a) diff -r f2b2d5daec30 -r 93e868fa0db8 mercurial/hg.py --- a/mercurial/hg.py Tue May 10 00:34:57 2005 -0800 +++ b/mercurial/hg.py Tue May 10 00:40:49 2005 -0800 @@ -559,6 +559,151 @@ for f in list: dl.write(f + "\n") + def newer(self, node): + nodes = [] + for i in xrange(self.changelog.rev(node) + 1, self.changelog.count()): + nodes.append(self.changelog.node(i)) + + return nodes + + def changegroup(self, nodes): + # construct the link map + linkmap = {} + for n in nodes: + linkmap[self.changelog.rev(n)] = n + + # construct a list of all changed files + changed = {} + for n in nodes: + c = self.changelog.read(n) + for f in c[3]: + changed[f] = 1 + changed = changed.keys() + changed.sort() + + # the changegroup is changesets + manifests + all file revs + cg = [] + revs = [ self.changelog.rev(n) for n in nodes ] + + g = self.changelog.group(linkmap) + cg.append(g) + g = self.manifest.group(linkmap) + cg.append(g) + + for f in changed: + g = self.file(f).group(linkmap) + if not g: raise "couldn't find change to %s" % f + l = struct.pack(">l", len(f)) + cg += [l, f, g] + + return compress("".join(cg)) + + def addchangegroup(self, data): + data = decompress(data) + def getlen(data, pos): + return struct.unpack(">l", data[pos:pos + 4])[0] + + tr = self.transaction() + simple = True + + print "merging changesets" + # pull off the changeset group + l = getlen(data, 0) + csg = data[0:l] + pos = l + co = self.changelog.tip() + cn = self.changelog.addgroup(csg, lambda x: self.changelog.count(), tr) + + print "merging manifests" + # pull off the manifest group + l = getlen(data, pos) + mfg = data[pos: pos + l] + pos += l + mo = self.manifest.tip() + mn = self.manifest.addgroup(mfg, lambda x: self.changelog.rev(x), tr) + + # do we need a resolve? + if self.changelog.ancestor(co, cn) != co: + print "NEED RESOLVE" + simple = False + resolverev = self.changelog.count() + + # process the files + print "merging files" + new = {} + while pos < len(data): + l = getlen(data, pos) + pos += 4 + f = data[pos:pos + l] + pos += l + + l = getlen(data, pos) + fg = data[pos: pos + l] + pos += l + + fl = self.file(f) + o = fl.tip() + n = fl.addgroup(fg, lambda x: self.changelog.rev(x), tr) + if not simple: + new[fl] = fl.resolvedag(o, n, tr, resolverev) + + # For simple merges, we don't need to resolve manifests or changesets + if simple: + tr.close() + return + + # resolve the manifest to point to all the merged files + self.ui.status("resolving manifests\n") + ma = self.manifest.ancestor(mm, mo) + mmap = self.manifest.read(mm) # mine + omap = self.manifest.read(mo) # other + amap = self.manifest.read(ma) # ancestor + nmap = {} + + for f, mid in mmap.iteritems(): + if f in omap: + if mid != omap[f]: + nmap[f] = new.get(f, mid) # use merged version + else: + nmap[f] = new.get(f, mid) # they're the same + del omap[f] + elif f in amap: + if mid != amap[f]: + pass # we should prompt here + else: + pass # other deleted it + else: + nmap[f] = new.get(f, mid) # we created it + + del mmap + + for f, oid in omap.iteritems(): + if f in amap: + if oid != amap[f]: + pass # this is the nasty case, we should prompt + else: + pass # probably safe + else: + nmap[f] = new.get(f, oid) # remote created it + + del omap + del amap + + node = self.manifest.add(nmap, tr, resolverev, mm, mo) + + # Now all files and manifests are merged, we add the changed files + # and manifest id to the changelog + self.ui.status("committing merge changeset\n") + new = new.keys() + new.sort() + if co == cn: cn = -1 + + edittext = "\n"+"".join(["HG: changed %s\n" % f for f in new]) + edittext = self.ui.edit(edittext) + n = self.changelog.add(node, new, edittext, tr, co, cn) + + tr.close() + class ui: def __init__(self, verbose=False, debug=False): self.verbose = verbose diff -r f2b2d5daec30 -r 93e868fa0db8 mercurial/revlog.py --- a/mercurial/revlog.py Tue May 10 00:34:57 2005 -0800 +++ b/mercurial/revlog.py Tue May 10 00:40:49 2005 -0800 @@ -227,3 +227,186 @@ # return the unmerged heads for later resolving return (old, self.tip()) + + def group(self, linkmap): + # given a list of changeset revs, return a set of deltas and + # metadata corresponding to nodes the first delta is + # parent(nodes[0]) -> nodes[0] the receiver is guaranteed to + # have this parent as it has all history before these + # changesets. parent is parent[0] + + revs = [] + needed = {} + + # find file nodes/revs that match changeset revs + for i in xrange(0, self.count()): + if self.index[i][3] in linkmap: + revs.append(i) + needed[i] = 1 + + # if we don't have any revisions touched by these changesets, bail + if not revs: return struct.pack(">l", 0) + + # add the parent of the first rev + p = self.parents(self.node(revs[0]))[0] + revs.insert(0, self.rev(p)) + + # for each delta that isn't contiguous in the log, we need to + # reconstruct the base, reconstruct the result, and then + # calculate the delta. We also need to do this where we've + # stored a full version and not a delta + for i in xrange(0, len(revs) - 1): + a, b = revs[i], revs[i + 1] + if a + 1 != b or self.base(b) == b: + for j in xrange(self.base(a), a + 1): + needed[j] = 1 + for j in xrange(self.base(b), b + 1): + needed[j] = 1 + + # calculate spans to retrieve from datafile + needed = needed.keys() + needed.sort() + spans = [] + for n in needed: + if n < 0: continue + o = self.start(n) + l = self.length(n) + spans.append((o, l, [(n, l)])) + + # merge spans + merge = [spans.pop(0)] + while spans: + e = spans.pop(0) + f = merge[-1] + if e[0] == f[0] + f[1]: + merge[-1] = (f[0], f[1] + e[1], f[2] + e[2]) + else: + merge.append(e) + + # read spans in, divide up chunks + chunks = {} + for span in merge: + # we reopen the file for each span to make http happy for now + f = self.opener(self.datafile) + f.seek(span[0]) + data = f.read(span[1]) + + # divide up the span + pos = 0 + for r, l in span[2]: + chunks[r] = data[pos: pos + l] + pos += l + + # helper to reconstruct intermediate versions + def construct(text, base, rev): + for r in range(base + 1, rev + 1): + b = decompress(chunks[r]) + text = self.patch(text, b) + return text + + # build deltas + deltas = [] + for d in range(0, len(revs) - 1): + a, b = revs[d], revs[d + 1] + n = self.node(b) + + if a + 1 != b or self.base(b) == b: + if a >= 0: + base = self.base(a) + ta = decompress(chunks[self.base(a)]) + ta = construct(ta, base, a) + else: + ta = "" + + base = self.base(b) + if a > base: + base = a + tb = ta + else: + tb = decompress(chunks[self.base(b)]) + tb = construct(tb, base, b) + d = self.diff(ta, tb) + else: + d = decompress(chunks[b]) + + p = self.parents(n) + meta = n + p[0] + p[1] + linkmap[self.linkrev(n)] + l = struct.pack(">l", len(meta) + len(d) + 4) + deltas.append(l + meta + d) + + l = struct.pack(">l", sum(map(len, deltas)) + 4) + deltas.insert(0, l) + return "".join(deltas) + + def addgroup(self, data, linkmapper, transaction): + # given a set of deltas, add them to the revision log. the + # first delta is against its parent, which should be in our + # log, the rest are against the previous delta. + + if len(data) <= 4: return + + # retrieve the parent revision of the delta chain + chain = data[28:48] + text = self.revision(chain) + + # track the base of the current delta log + r = self.count() + t = r - 1 + + base = prev = -1 + start = end = 0 + if r: + start = self.start(self.base(t)) + end = self.end(t) + measure = self.length(self.base(t)) + base = self.base(t) + prev = self.tip() + + transaction.add(self.datafile, end) + transaction.add(self.indexfile, r * struct.calcsize(indexformat)) + dfh = self.opener(self.datafile, "a") + ifh = self.opener(self.indexfile, "a") + + # loop through our set of deltas + pos = 4 + while pos < len(data): + l, node, p1, p2, cs = struct.unpack(">l20s20s20s20s", + data[pos:pos+84]) + link = linkmapper(cs) + delta = data[pos + 84:pos + l] + pos += l + + # full versions are inserted when the needed deltas become + # comparable to the uncompressed text or when the previous + # version is not the one we have a delta against. We use + # the size of the previous full rev as a proxy for the + # current size. + + if chain == prev: + cdelta = compress(delta) + + if chain != prev or (end - start + len(cdelta)) > measure * 2: + # flush our writes here so we can read it in revision + dfh.flush() + ifh.flush() + text = self.revision(self.node(t)) + text = self.patch(text, delta) + chk = self.addrevision(text, transaction, link, p1, p2) + if chk != node: + raise "consistency error adding group" + measure = len(text) + else: + e = (end, len(cdelta), self.base(t), link, p1, p2, node) + self.index.append(e) + self.nodemap[node] = r + dfh.write(cdelta) + ifh.write(struct.pack(indexformat, *e)) + + t, r = r, r + 1 + chain = prev + start = self.start(self.base(t)) + end = self.end(t) + + dfh.close() + ifh.close() + return node