# HG changeset patch # User Chris Mason # Date 1144541446 14400 # Node ID 345107e167a05b972682d07234900c5172552f85 # Parent 856f0ba200bc59c05e304cc1aca2865e0e831870# Parent c0b945c5df0872bc07a33f2acf0b001a03bc58f9 merge 0.8.1 with revlogng diff -r c0b945c5df08 -r 345107e167a0 mercurial/appendfile.py --- a/mercurial/appendfile.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/appendfile.py Sat Apr 08 20:10:46 2006 -0400 @@ -42,9 +42,19 @@ # seek and read can be fast. self.fpsize = os.fstat(fp.fileno()).st_size - def seek(self, offset): + def end(self): + self.tmpfp.flush() # make sure the stat is correct + return self.fpsize + os.fstat(self.tmpfp.fileno()).st_size + + def seek(self, offset, whence=0): '''virtual file offset spans real file and temp file.''' - self.offset = offset + if whence == 0: + self.offset = offset + elif whence == 1: + self.offset += offset + elif whence == 2: + self.offset = self.end() + offset + if self.offset < self.fpsize: self.realfp.seek(self.offset) else: @@ -103,8 +113,16 @@ self.fp = fp self.offset = 0 - def seek(self, offset): - self.offset = offset + def tell(self): + return self.offset + + def seek(self, offset, whence=0): + if whence == 0: + self.offset = offset + elif whence == 1: + self.offset += offset + elif whence == 2: + self.offset = self.fp.end() + offset def read(self, count=-1): try: @@ -143,7 +161,7 @@ '''open file. return same cached appendfile object for every later call.''' - assert mode in 'ra' + assert mode in 'ra+' fp = self.fps.get(name) if fp is None: fp = appendfile(self.realopener(name, 'a+')) @@ -162,11 +180,15 @@ # not mixed up together. class appendchangelog(changelog.changelog, appendopener): - def __init__(self, opener): + def __init__(self, opener, version): appendopener.__init__(self, opener) - changelog.changelog.__init__(self, self) + changelog.changelog.__init__(self, self, version) + def checkinlinesize(self, fp, tr): + return class appendmanifest(manifest.manifest, appendopener): - def __init__(self, opener): + def __init__(self, opener, version): appendopener.__init__(self, opener) - manifest.manifest.__init__(self, self) + manifest.manifest.__init__(self, self, version) + def checkinlinesize(self, fp, tr): + return diff -r c0b945c5df08 -r 345107e167a0 mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/bundlerepo.py Sat Apr 08 20:10:46 2006 -0400 @@ -31,6 +31,7 @@ # revlog.revlog.__init__(self, opener, indexfile, datafile) self.bundlefile = bundlefile + self.basemap = {} def chunkpositer(): for chunk in changegroup.chunkiter(bundlefile): pos = bundlefile.tell() @@ -58,7 +59,8 @@ if not prev: prev = p1 # start, size, base is not used, link, p1, p2, delta ref - e = (start, size, None, link, p1, p2, node, prev) + e = (start, size, None, link, p1, p2, node) + self.basemap[n] = prev self.index.append(e) self.nodemap[node] = n prev = node @@ -68,9 +70,9 @@ """is rev from the bundle""" if rev < 0: return False - return len(self.index[rev]) > 7 - def bundlebase(self, rev): return self.index[rev][7] - def chunk(self, rev): + return rev in self.basemap + def bundlebase(self, rev): return self.basemap[rev] + def chunk(self, rev, df=None): # Warning: in case of bundle, the diff is against bundlebase, # not against rev - 1 # XXX: could use some caching diff -r c0b945c5df08 -r 345107e167a0 mercurial/changelog.py --- a/mercurial/changelog.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/changelog.py Sat Apr 08 20:10:46 2006 -0400 @@ -11,8 +11,9 @@ demandload(globals(), "os time util") class changelog(revlog): - def __init__(self, opener): - revlog.__init__(self, opener, "00changelog.i", "00changelog.d") + def __init__(self, opener, defversion=0): + revlog.__init__(self, opener, "00changelog.i", "00changelog.d", + defversion) def extract(self, text): if not text: diff -r c0b945c5df08 -r 345107e167a0 mercurial/commands.py --- a/mercurial/commands.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/commands.py Sat Apr 08 20:10:46 2006 -0400 @@ -1268,7 +1268,7 @@ def debugancestor(ui, index, rev1, rev2): """find the ancestor revision of two revisions in a given index""" - r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "") + r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "", 0) a = r.ancestor(r.lookup(rev1), r.lookup(rev2)) ui.write("%d:%s\n" % (r.rev(a), hex(a))) @@ -1372,7 +1372,7 @@ def debugdata(ui, file_, rev): """dump the contents of an data file revision""" r = revlog.revlog(util.opener(os.getcwd(), audit=False), - file_[:-2] + ".i", file_) + file_[:-2] + ".i", file_, 0) try: ui.write(r.revision(r.lookup(rev))) except KeyError: @@ -1380,18 +1380,19 @@ def debugindex(ui, file_): """dump the contents of an index file""" - r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "") + r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0) ui.write(" rev offset length base linkrev" + " nodeid p1 p2\n") for i in range(r.count()): - e = r.index[i] + node = r.node(i) + pp = r.parents(node) ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % ( - i, e[0], e[1], e[2], e[3], - short(e[6]), short(e[4]), short(e[5]))) + i, r.start(i), r.length(i), r.base(i), r.linkrev(node), + short(node), short(pp[0]), short(pp[1]))) def debugindexdot(ui, file_): """dump an index DAG as a .dot file""" - r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "") + r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0) ui.write("digraph G {\n") for i in range(r.count()): e = r.index[i] diff -r c0b945c5df08 -r 345107e167a0 mercurial/filelog.py --- a/mercurial/filelog.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/filelog.py Sat Apr 08 20:10:46 2006 -0400 @@ -11,10 +11,11 @@ demandload(globals(), "bdiff") class filelog(revlog): - def __init__(self, opener, path): + def __init__(self, opener, path, defversion=0): revlog.__init__(self, opener, os.path.join("data", self.encodedir(path + ".i")), - os.path.join("data", self.encodedir(path + ".d"))) + os.path.join("data", self.encodedir(path + ".d")), + defversion) # This avoids a collision between a file named foo and a dir named # foo.i or foo.d diff -r c0b945c5df08 -r 345107e167a0 mercurial/localrepo.py --- a/mercurial/localrepo.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/localrepo.py Sat Apr 08 20:10:46 2006 -0400 @@ -10,8 +10,8 @@ from node import * from i18n import gettext as _ from demandload import * -demandload(globals(), "re lock transaction tempfile stat mdiff errno ui") demandload(globals(), "appendfile changegroup") +demandload(globals(), "re lock transaction tempfile stat mdiff errno ui revlog") class localrepository(object): def __del__(self): @@ -35,8 +35,31 @@ self.ui = ui.ui(parentui=parentui) self.opener = util.opener(self.path) self.wopener = util.opener(self.root) - self.manifest = manifest.manifest(self.opener) - self.changelog = changelog.changelog(self.opener) + + try: + self.ui.readconfig(self.join("hgrc"), self.root) + except IOError: + pass + + v = self.ui.revlogopts + self.revlogversion = int(v.get('format', 0)) + flags = 0 + for x in v.get('flags', "").split(): + flags |= revlog.flagstr(x) + + v = self.revlogversion | flags + self.manifest = manifest.manifest(self.opener, v) + self.changelog = changelog.changelog(self.opener, v) + + # the changelog might not have the inline index flag + # on. If the format of the changelog is the same as found in + # .hgrc, apply any flags found in the .hgrc as well. + # Otherwise, just version from the changelog + v = self.changelog.version + if v == self.revlogversion: + v |= flags + self.revlogversion = v + self.tagscache = None self.nodetagscache = None self.encodepats = None @@ -48,11 +71,6 @@ os.mkdir(self.join("data")) self.dirstate = dirstate.dirstate(self.opener, self.ui, self.root) - try: - self.ui.readconfig(self.join("hgrc"), self.root) - except IOError: - pass - def hook(self, name, throw=False, **args): def runhook(name, cmd): self.ui.note(_("running hook %s: %s\n") % (name, cmd)) @@ -150,6 +168,7 @@ try: return self.changelog.lookup(key) except: + raise raise repo.RepoError(_("unknown revision '%s'") % key) def dev(self): @@ -167,7 +186,7 @@ def file(self, f): if f[0] == '/': f = f[1:] - return filelog.filelog(self.opener, f) + return filelog.filelog(self.opener, f, self.revlogversion) def getcwd(self): return self.dirstate.getcwd() @@ -1394,7 +1413,7 @@ # write changelog and manifest data to temp files so # concurrent readers will not see inconsistent view - cl = appendfile.appendchangelog(self.opener) + cl = appendfile.appendchangelog(self.opener, self.changelog.version) oldheads = len(cl.heads()) @@ -1408,7 +1427,7 @@ cnr = cor changesets = cnr - cor - mf = appendfile.appendmanifest(self.opener) + mf = appendfile.appendmanifest(self.opener, self.manifest.version) # pull off the manifest group self.ui.status(_("adding manifests\n")) @@ -1436,8 +1455,10 @@ cl.writedata() # make changelog and manifest see real files again - self.changelog = changelog.changelog(self.opener) - self.manifest = manifest.manifest(self.opener) + self.changelog = changelog.changelog(self.opener, self.changelog.version) + self.manifest = manifest.manifest(self.opener, self.manifest.version) + self.changelog.checkinlinesize(tr) + self.manifest.checkinlinesize(tr) newheads = len(self.changelog.heads()) heads = "" diff -r c0b945c5df08 -r 345107e167a0 mercurial/manifest.py --- a/mercurial/manifest.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/manifest.py Sat Apr 08 20:10:46 2006 -0400 @@ -12,10 +12,11 @@ demandload(globals(), "bisect array") class manifest(revlog): - def __init__(self, opener): + def __init__(self, opener, defversion=0): self.mapcache = None self.listcache = None - revlog.__init__(self, opener, "00manifest.i", "00manifest.d") + revlog.__init__(self, opener, "00manifest.i", "00manifest.d", + defversion) def read(self, node): if node == nullid: return {} # don't upset local cache diff -r c0b945c5df08 -r 345107e167a0 mercurial/mdiff.py --- a/mercurial/mdiff.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/mdiff.py Sat Apr 08 20:10:46 2006 -0400 @@ -192,4 +192,5 @@ return mpatch.patches(a, [bin]) patches = mpatch.patches +patchedsize = mpatch.patchedsize textdiff = bdiff.bdiff diff -r c0b945c5df08 -r 345107e167a0 mercurial/mpatch.c --- a/mercurial/mpatch.c Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/mpatch.c Sat Apr 08 20:10:46 2006 -0400 @@ -354,8 +354,44 @@ return result; } +/* calculate size of a patched file directly */ +static PyObject * +patchedsize(PyObject *self, PyObject *args) +{ + long orig, start, end, len, outlen = 0, last = 0; + int patchlen; + char *bin, *binend; + char decode[12]; /* for dealing with alignment issues */ + + if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen)) + return NULL; + + binend = bin + patchlen; + + while (bin < binend) { + memcpy(decode, bin, 12); + start = ntohl(*(uint32_t *)decode); + end = ntohl(*(uint32_t *)(decode + 4)); + len = ntohl(*(uint32_t *)(decode + 8)); + bin += 12 + len; + outlen += start - last; + last = end; + outlen += len; + } + + if (bin != binend) { + if (!PyErr_Occurred()) + PyErr_SetString(mpatch_Error, "patch cannot be decoded"); + return NULL; + } + + outlen += orig - last; + return Py_BuildValue("l", outlen); +} + static PyMethodDef methods[] = { {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, + {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"}, {NULL, NULL} }; diff -r c0b945c5df08 -r 345107e167a0 mercurial/revlog.py --- a/mercurial/revlog.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/revlog.py Sat Apr 08 20:10:46 2006 -0400 @@ -16,6 +16,18 @@ demandload(globals(), "binascii changegroup errno heapq mdiff os") demandload(globals(), "sha struct zlib") +# revlog version strings +REVLOGV0 = 0 +REVLOGNG = 1 + +# revlog flags +REVLOGNGINLINEDATA = (1 << 16) + +def flagstr(flag): + if flag == "inline": + return REVLOGNGINLINEDATA + raise RevlogError(_("unknown revlog flag %s" % flag)) + def hash(text, p1, p2): """generate a hash from the given text and its parent hashes @@ -51,49 +63,148 @@ if t == 'u': return bin[1:] raise RevlogError(_("unknown compression type %r") % t) -indexformat = ">4l20s20s20s" +indexformatv0 = ">4l20s20s20s" +v0shaoffset = 56 +# index ng: +# 6 bytes offset +# 2 bytes flags +# 4 bytes compressed length +# 4 bytes uncompressed length +# 4 bytes: base rev +# 4 bytes link rev +# 4 bytes parent 1 rev +# 4 bytes parent 2 rev +# 32 bytes: nodeid +indexformatng = ">Qiiiiii20s12x" +ngshaoffset = 32 +versionformat = ">i" class lazyparser(object): """ this class avoids the need to parse the entirety of large indices - - By default we parse and load 1000 entries at a time. - - If no position is specified, we load the whole index, and replace - the lazy objects in revlog with the underlying objects for - efficiency in cases where we look at most of the nodes. """ - def __init__(self, data, revlog): - self.data = data + def __init__(self, dataf, size, indexformat, shaoffset): + self.dataf = dataf + self.format = indexformat self.s = struct.calcsize(indexformat) - self.l = len(data)/self.s + self.indexformat = indexformat + self.datasize = size + self.l = size/self.s self.index = [None] * self.l self.map = {nullid: -1} + self.allmap = 0 self.all = 0 - self.revlog = revlog + self.mapfind_count = 0 + self.shaoffset = shaoffset - def trunc(self, pos): - self.l = pos/self.s + def loadmap(self): + """ + during a commit, we need to make sure the rev being added is + not a duplicate. This requires loading the entire index, + which is fairly slow. loadmap can load up just the node map, + which takes much less time. + """ + if self.allmap: return + start = 0 + end = self.datasize + self.allmap = 1 + cur = 0 + count = 0 + blocksize = self.s * 256 + self.dataf.seek(0) + while cur < end: + data = self.dataf.read(blocksize) + off = 0 + for x in xrange(256): + n = data[off + self.shaoffset:off + self.shaoffset + 20] + self.map[n] = count + count += 1 + if count >= self.l: + break + off += self.s + cur += blocksize + + def loadblock(self, blockstart, blocksize, data=None): + if self.all: return + if data is None: + self.dataf.seek(blockstart) + data = self.dataf.read(blocksize) + lend = len(data) / self.s + i = blockstart / self.s + off = 0 + for x in xrange(lend): + if self.index[i + x] == None: + b = data[off : off + self.s] + self.index[i + x] = b + n = b[self.shaoffset:self.shaoffset + 20] + self.map[n] = i + x + off += self.s - def load(self, pos=None): + def findnode(self, node): + """search backwards through the index file for a specific node""" + if self.allmap: return None + + # hg log will cause many many searches for the manifest + # nodes. After we get called a few times, just load the whole + # thing. + if self.mapfind_count > 8: + self.loadmap() + if node in self.map: + return node + return None + self.mapfind_count += 1 + last = self.l - 1 + while self.index[last] != None: + if last == 0: + self.all = 1 + self.allmap = 1 + return None + last -= 1 + end = (last + 1) * self.s + blocksize = self.s * 256 + while end >= 0: + start = max(end - blocksize, 0) + self.dataf.seek(start) + data = self.dataf.read(end - start) + findend = end - start + while True: + # we're searching backwards, so weh have to make sure + # we don't find a changeset where this node is a parent + off = data.rfind(node, 0, findend) + findend = off + if off >= 0: + i = off / self.s + off = i * self.s + n = data[off + self.shaoffset:off + self.shaoffset + 20] + if n == node: + self.map[n] = i + start / self.s + return node + else: + break + end -= blocksize + return None + + def loadindex(self, i=None, end=None): if self.all: return - if pos is not None: - block = pos / 1000 - i = block * 1000 - end = min(self.l, i + 1000) + all = False + if i == None: + blockstart = 0 + blocksize = (512 / self.s) * self.s + end = self.datasize + all = True else: - self.all = 1 - i = 0 - end = self.l - self.revlog.index = self.index - self.revlog.nodemap = self.map - - while i < end: - d = self.data[i * self.s: (i + 1) * self.s] - e = struct.unpack(indexformat, d) - self.index[i] = e - self.map[e[6]] = i - i += 1 + if end: + blockstart = i * self.s + end = end * self.s + blocksize = end - blockstart + else: + blockstart = (i & ~(32)) * self.s + blocksize = self.s * 64 + end = blockstart + blocksize + while blockstart < end: + self.loadblock(blockstart, blocksize) + blockstart += blocksize + if all: self.all = True class lazyindex(object): """a lazy version of the index array""" @@ -104,39 +215,43 @@ def load(self, pos): if pos < 0: pos += len(self.p.index) - self.p.load(pos) + self.p.loadindex(pos) return self.p.index[pos] def __getitem__(self, pos): - return self.p.index[pos] or self.load(pos) + ret = self.p.index[pos] or self.load(pos) + if isinstance(ret, str): + ret = struct.unpack(self.p.indexformat, ret) + return ret + def __setitem__(self, pos, item): + self.p.index[pos] = item def __delitem__(self, pos): del self.p.index[pos] def append(self, e): self.p.index.append(e) - def trunc(self, pos): - self.p.trunc(pos) class lazymap(object): """a lazy version of the node map""" def __init__(self, parser): self.p = parser def load(self, key): - if self.p.all: return - n = self.p.data.find(key) - if n < 0: + n = self.p.findnode(key) + if n == None: raise KeyError(key) - pos = n / self.p.s - self.p.load(pos) def __contains__(self, key): - self.p.load() + if key in self.p.map: + return True + self.p.loadmap() return key in self.p.map def __iter__(self): yield nullid for i in xrange(self.p.l): - try: - yield self.p.index[i][6] - except: - self.p.load(i) - yield self.p.index[i][6] + ret = self.p.index[i] + if not ret: + self.p.loadindex(i) + ret = self.p.index[i] + if isinstance(ret, str): + ret = struct.unpack(self.p.indexformat, ret) + yield ret[-1] def __getitem__(self, key): try: return self.p.map[key] @@ -178,7 +293,7 @@ remove data, and can use some simple techniques to avoid the need for locking while reading. """ - def __init__(self, opener, indexfile, datafile): + def __init__(self, opener, indexfile, datafile, defversion=0): """ create a revlog object @@ -192,11 +307,15 @@ self.indexstat = None self.cache = None self.chunkcache = None + self.defversion = defversion self.load() def load(self): + v = self.defversion try: f = self.opener(self.indexfile) + i = f.read(4) + f.seek(0) except IOError, inst: if inst.errno != errno.ENOENT: raise @@ -213,56 +332,164 @@ and st.st_mtime == oldst.st_mtime and st.st_ctime == oldst.st_ctime): return - self.indexstat = st - i = f.read() + self.indexstat = st + if len(i) > 0: + v = struct.unpack(versionformat, i)[0] + flags = v & ~0xFFFF + fmt = v & 0xFFFF + if fmt == 0: + if flags: + raise RevlogError(_("index %s invalid flags %x for format v0" % + (self.indexfile, flags))) + elif fmt == REVLOGNG: + if flags & ~REVLOGNGINLINEDATA: + raise RevlogError(_("index %s invalid flags %x for revlogng" % + (self.indexfile, flags))) + else: + raise RevlogError(_("index %s invalid format %d" % + (self.indexfile, fmt))) + self.version = v + if v == 0: + self.indexformat = indexformatv0 + shaoffset = v0shaoffset + else: + self.indexformat = indexformatng + shaoffset = ngshaoffset - if i and i[:4] != "\0\0\0\0": - raise RevlogError(_("incompatible revlog signature on %s") % - self.indexfile) - - if len(i) > 10000: - # big index, let's parse it on demand - parser = lazyparser(i, self) - self.index = lazyindex(parser) - self.nodemap = lazymap(parser) + if i: + if not self.inlinedata() and st and st.st_size > 10000: + # big index, let's parse it on demand + parser = lazyparser(f, st.st_size, self.indexformat, shaoffset) + self.index = lazyindex(parser) + self.nodemap = lazymap(parser) + else: + i = f.read() + self.parseindex(i) + if self.inlinedata(): + # we've already got the entire data file read in, save it + # in the chunk data + self.chunkcache = (0, i) + if self.version != 0: + e = list(self.index[0]) + type = self.ngtype(e[0]) + e[0] = self.offset_type(0, type) + self.index[0] = e else: - s = struct.calcsize(indexformat) - l = len(i) / s - self.index = [None] * l - m = [None] * l + self.nodemap = { nullid: -1} + self.index = [] + + + def parseindex(self, data): + s = struct.calcsize(self.indexformat) + l = len(data) + self.index = [] + self.nodemap = {nullid: -1} + inline = self.inlinedata() + off = 0 + n = 0 + while off < l: + e = struct.unpack(self.indexformat, data[off:off + s]) + self.index.append(e) + self.nodemap[e[-1]] = n + n += 1 + off += s + if inline: + off += e[1] - n = 0 - for f in xrange(0, l * s, s): - # offset, size, base, linkrev, p1, p2, nodeid - e = struct.unpack(indexformat, i[f:f + s]) - m[n] = (e[6], n) - self.index[n] = e - n += 1 + def ngoffset(self, q): + if q & 0xFFFF: + raise RevlogError(_('%s: incompatible revision flag %x') % + (self.indexfile, type)) + return long(q >> 16) + + def ngtype(self, q): + return int(q & 0xFFFF) + + def offset_type(self, offset, type): + return long(long(offset) << 16 | type) - self.nodemap = dict(m) - self.nodemap[nullid] = -1 + def loadindex(self, start, end): + """load a block of indexes all at once from the lazy parser""" + if isinstance(self.index, lazyindex): + self.index.p.loadindex(start, end) + def loadindexmap(self): + """loads both the map and the index from the lazy parser""" + if isinstance(self.index, lazyindex): + p = self.index.p + p.loadindex() + self.nodemap = p.map + + def loadmap(self): + """loads the map from the lazy parser""" + if isinstance(self.nodemap, lazymap): + self.nodemap.p.loadmap() + self.nodemap = self.nodemap.p.map + + def inlinedata(self): return self.version & REVLOGNGINLINEDATA def tip(self): return self.node(len(self.index) - 1) def count(self): return len(self.index) - def node(self, rev): return (rev < 0) and nullid or self.index[rev][6] + def node(self, rev): + return (rev < 0) and nullid or self.index[rev][-1] def rev(self, node): try: return self.nodemap[node] except KeyError: raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node))) - def linkrev(self, node): return self.index[self.rev(node)][3] + def linkrev(self, node): return self.index[self.rev(node)][-4] def parents(self, node): if node == nullid: return (nullid, nullid) - return self.index[self.rev(node)][4:6] + r = self.rev(node) + d = self.index[r][-3:-1] + if self.version == 0: + return d + return [ self.node(x) for x in d ] + def start(self, rev): + if rev < 0: + return -1 + if self.version != 0: + return self.ngoffset(self.index[rev][0]) + return self.index[rev][0] + + def end(self, rev): return self.start(rev) + self.length(rev) + + def size(self, rev): + """return the length of the uncompressed text for a given revision""" + l = -1 + if self.version != 0: + l = self.index[rev][2] + if l >= 0: + return l - def start(self, rev): return (rev < 0) and -1 or self.index[rev][0] + t = self.revision(self.node(rev)) + return len(t) + + # alternate implementation, The advantage to this code is it + # will be faster for a single revision. But, the results are not + # cached, so finding the size of every revision will be slower. + """ + if self.cache and self.cache[1] == rev: + return len(self.cache[2]) + + base = self.base(rev) + if self.cache and self.cache[1] >= base and self.cache[1] < rev: + base = self.cache[1] + text = self.cache[2] + else: + text = self.revision(self.node(base)) + + l = len(text) + for x in xrange(base + 1, rev + 1): + l = mdiff.patchedsize(l, self.chunk(x)) + return l + """ + def length(self, rev): if rev < 0: return 0 else: return self.index[rev][1] - def end(self, rev): return self.start(rev) + self.length(rev) - def base(self, rev): return (rev < 0) and rev or self.index[rev][2] + def base(self, rev): return (rev < 0) and rev or self.index[rev][-5] def reachable(self, rev, stop=None): reachable = {} @@ -501,18 +728,24 @@ """apply a list of patches to a string""" return mdiff.patches(t, pl) - def chunk(self, rev): + def chunk(self, rev, df=None, cachelen=4096): start, length = self.start(rev), self.length(rev) + inline = self.inlinedata() + if inline: + start += (rev + 1) * struct.calcsize(self.indexformat) end = start + length - - def loadcache(): - cache_length = max(4096 * 1024, length) # 4Mo - df = self.opener(self.datafile) + def loadcache(df): + cache_length = max(cachelen, length) # 4k + if not df: + if inline: + df = self.opener(self.indexfile) + else: + df = self.opener(self.datafile) df.seek(start) self.chunkcache = (start, df.read(cache_length)) if not self.chunkcache: - loadcache() + loadcache(df) cache_start = self.chunkcache[0] cache_end = cache_start + len(self.chunkcache[1]) @@ -520,7 +753,7 @@ # it is cached offset = start - cache_start else: - loadcache() + loadcache(df) offset = 0 #def checkchunk(): @@ -555,16 +788,24 @@ rev = self.rev(node) base = self.base(rev) + if self.inlinedata(): + # we probably have the whole chunk cached + df = None + else: + df = self.opener(self.datafile) + # do we have useful data cached? if self.cache and self.cache[1] >= base and self.cache[1] < rev: base = self.cache[1] text = self.cache[2] + self.loadindex(base, rev + 1) else: - text = self.chunk(base) + self.loadindex(base, rev + 1) + text = self.chunk(base, df=df) bins = [] for r in xrange(base + 1, rev + 1): - bins.append(self.chunk(r)) + bins.append(self.chunk(r, df=df)) text = self.patches(text, bins) @@ -576,6 +817,45 @@ self.cache = (node, rev, text) return text + def checkinlinesize(self, tr, fp=None): + if not self.inlinedata(): + return + if not fp: + fp = self.opener(self.indexfile, 'r') + fp.seek(0, 2) + size = fp.tell() + if size < 131072: + return + tr.add(self.datafile, 0) + df = self.opener(self.datafile, 'w') + calc = struct.calcsize(self.indexformat) + for r in xrange(self.count()): + start = self.start(r) + (r + 1) * calc + length = self.length(r) + fp.seek(start) + d = fp.read(length) + df.write(d) + fp.close() + df.close() + fp = self.opener(self.indexfile, 'w', atomictemp=True) + self.version &= ~(REVLOGNGINLINEDATA) + if self.count(): + x = self.index[0] + e = struct.pack(self.indexformat, *x)[4:] + l = struct.pack(versionformat, self.version) + fp.write(l) + fp.write(e) + + for i in xrange(1, self.count()): + x = self.index[i] + e = struct.pack(self.indexformat, *x) + fp.write(e) + + # if we don't call rename, the temp file will never replace the + # real index + fp.rename() + self.chunkcache = None + def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): """add a revision to the log @@ -621,25 +901,64 @@ if t >= 0: offset = self.end(t) - e = (offset, l, base, link, p1, p2, node) + if self.version == 0: + e = (offset, l, base, link, p1, p2, node) + else: + e = (self.offset_type(offset, 0), l, len(text), + base, link, self.rev(p1), self.rev(p2), node) self.index.append(e) self.nodemap[node] = n - entry = struct.pack(indexformat, *e) + entry = struct.pack(self.indexformat, *e) - transaction.add(self.datafile, e[0]) - f = self.opener(self.datafile, "a") - if data[0]: + if not self.inlinedata(): + transaction.add(self.datafile, offset) + transaction.add(self.indexfile, n * len(entry)) + f = self.opener(self.datafile, "a") + if data[0]: + f.write(data[0]) + f.write(data[1]) + f = self.opener(self.indexfile, "a") + else: + f = self.opener(self.indexfile, "a+") + f.seek(0, 2) + transaction.add(self.indexfile, f.tell()) + + if len(self.index) == 1 and self.version != 0: + l = struct.pack(versionformat, self.version) + f.write(l) + entry = entry[4:] + + f.write(entry) + + if self.inlinedata(): f.write(data[0]) - f.write(data[1]) - transaction.add(self.indexfile, n * len(entry)) - self.opener(self.indexfile, "a").write(entry) + f.write(data[1]) + self.checkinlinesize(transaction, f) self.cache = (node, n, text) return node def ancestor(self, a, b): """calculate the least common ancestor of nodes a and b""" + + # start with some short cuts for the linear cases + if a == b: + return a + ra = self.rev(a) + rb = self.rev(b) + if ra < rb: + last = b + first = a + else: + last = a + first = b + + # reachable won't include stop in the list, so we have to use a parent + reachable = self.reachable(last, stop=self.parents(first)[0]) + if first in reachable: + return first + # calculate the distance of every node from root dist = {nullid: 0} for i in xrange(self.count()): @@ -746,18 +1065,18 @@ node = None base = prev = -1 - start = end = measure = 0 + start = end = textlen = 0 if r: - base = self.base(t) - start = self.start(base) end = self.end(t) - measure = self.length(base) - prev = self.tip() - transaction.add(self.datafile, end) - transaction.add(self.indexfile, r * struct.calcsize(indexformat)) - dfh = self.opener(self.datafile, "a") - ifh = self.opener(self.indexfile, "a") + ifh = self.opener(self.indexfile, "a+") + ifh.seek(0, 2) + transaction.add(self.indexfile, ifh.tell()) + if self.inlinedata(): + dfh = None + else: + transaction.add(self.datafile, end) + dfh = self.opener(self.datafile, "a") # loop through our set of deltas chain = None @@ -791,31 +1110,48 @@ if chain == prev: tempd = compress(delta) cdelta = tempd[0] + tempd[1] + textlen = mdiff.patchedsize(textlen, delta) - if chain != prev or (end - start + len(cdelta)) > measure * 2: + if chain != prev or (end - start + len(cdelta)) > textlen * 2: # flush our writes here so we can read it in revision - dfh.flush() + if dfh: + dfh.flush() ifh.flush() text = self.revision(chain) text = self.patches(text, [delta]) chk = self.addrevision(text, transaction, link, p1, p2) if chk != node: raise RevlogError(_("consistency error adding group")) - measure = len(text) + textlen = len(text) else: - e = (end, len(cdelta), base, link, p1, p2, node) + if self.version == 0: + e = (end, len(cdelta), base, link, p1, p2, node) + else: + e = (self.offset_type(end, 0), len(cdelta), textlen, base, + link, self.rev(p1), self.rev(p2), node) self.index.append(e) self.nodemap[node] = r - dfh.write(cdelta) - ifh.write(struct.pack(indexformat, *e)) + if self.inlinedata(): + ifh.write(struct.pack(self.indexformat, *e)) + ifh.write(cdelta) + self.checkinlinesize(transaction, ifh) + if not self.inlinedata(): + dfh = self.opener(self.datafile, "a") + ifh = self.opener(self.indexfile, "a") + else: + if not dfh: + # addrevision switched from inline to conventional + # reopen the index + dfh = self.opener(self.datafile, "a") + ifh = self.opener(self.indexfile, "a") + dfh.write(cdelta) + ifh.write(struct.pack(self.indexformat, *e)) t, r, chain, prev = r, r + 1, node, node base = self.base(t) start = self.start(base) end = self.end(t) - dfh.close() - ifh.close() if node is None: raise RevlogError(_("group to be added is empty")) return node @@ -824,32 +1160,37 @@ if self.count() == 0 or rev >= self.count(): return + if isinstance(self.index, lazyindex): + self.loadindexmap() + # When stripping away a revision, we need to make sure it # does not actually belong to an older changeset. # The minlink parameter defines the oldest revision # we're allowed to strip away. - while minlink > self.index[rev][3]: + while minlink > self.index[rev][-4]: rev += 1 if rev >= self.count(): return # first truncate the files on disk end = self.start(rev) - self.opener(self.datafile, "a").truncate(end) - end = rev * struct.calcsize(indexformat) - self.opener(self.indexfile, "a").truncate(end) + if not self.inlinedata(): + df = self.opener(self.datafile, "a") + df.truncate(end) + end = rev * struct.calcsize(self.indexformat) + else: + end += rev * struct.calcsize(self.indexformat) + + indexf = self.opener(self.indexfile, "a") + indexf.truncate(end) # then reset internal state in memory to forget those revisions self.cache = None self.chunkcache = None - for p in self.index[rev:]: - del self.nodemap[p[6]] - del self.index[rev:] + for x in xrange(rev, self.count()): + del self.nodemap[self.node(x)] - # truncating the lazyindex also truncates the lazymap. - if isinstance(self.index, lazyindex): - self.index.trunc(end) - + del self.index[rev:] def checksize(self): expected = 0 @@ -870,9 +1211,15 @@ f = self.opener(self.indexfile) f.seek(0, 2) actual = f.tell() - s = struct.calcsize(indexformat) + s = struct.calcsize(self.indexformat) i = actual / s di = actual - (i * s) + if self.inlinedata(): + databytes = 0 + for r in xrange(self.count()): + databytes += self.length(r) + dd = 0 + di = actual - self.count() * s - databytes except IOError, inst: if inst.errno != errno.ENOENT: raise diff -r c0b945c5df08 -r 345107e167a0 mercurial/statichttprepo.py --- a/mercurial/statichttprepo.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/statichttprepo.py Sat Apr 08 20:10:46 2006 -0400 @@ -32,6 +32,7 @@ def __init__(self, ui, path): self.path = (path + "/.hg") self.ui = ui + self.revlogversion = 0 self.opener = opener(self.path) self.manifest = manifest.manifest(self.opener) self.changelog = changelog.changelog(self.opener) diff -r c0b945c5df08 -r 345107e167a0 mercurial/ui.py --- a/mercurial/ui.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/ui.py Sat Apr 08 20:10:46 2006 -0400 @@ -29,6 +29,7 @@ self.diffcache = None self.header = [] self.prev_header = [] + self.revlogopts = self.configrevlog() else: # parentui may point to an ui object which is already a child self.parentui = parentui.parentui or parentui @@ -134,6 +135,12 @@ result.append(path) return result + def configrevlog(self): + ret = {} + for x in self.configitems("revlog"): + k = x[0].lower() + ret[k] = x[1] + return ret def diffopts(self): if self.diffcache: return self.diffcache diff -r c0b945c5df08 -r 345107e167a0 mercurial/util.py --- a/mercurial/util.py Fri Apr 07 20:08:11 2006 -0500 +++ b/mercurial/util.py Sat Apr 08 20:10:46 2006 -0400 @@ -431,20 +431,33 @@ os.chmod(temp, st.st_mode) return temp - class atomicfile(file): - """the file will only be copied on close""" - def __init__(self, name, mode, atomic=False): + class atomictempfile(file): + """the file will only be copied when rename is called""" + def __init__(self, name, mode): self.__name = name self.temp = mktempcopy(name) file.__init__(self, self.temp, mode) - def close(self): + def rename(self): if not self.closed: file.close(self) rename(self.temp, self.__name) def __del__(self): - self.close() + if not self.closed: + try: + os.unlink(self.temp) + except: pass + file.close(self) - def o(path, mode="r", text=False, atomic=False): + class atomicfile(atomictempfile): + """the file will only be copied on close""" + def __init__(self, name, mode): + atomictempfile.__init__(self, name, mode) + def close(self): + self.rename() + def __del__(self): + self.rename() + + def o(path, mode="r", text=False, atomic=False, atomictemp=False): if audit_p: audit_path(path) f = os.path.join(p, path) @@ -462,6 +475,8 @@ else: if atomic: return atomicfile(f, mode) + elif atomictemp: + return atomictempfile(f, mode) if nlink > 1: rename(mktempcopy(f), f) return file(f, mode)