# HG changeset patch # User mason@suse.com # Date 1144183124 14400 # Node ID 441ea218414e83dd0d91e2bf8438d796828f89c1 # Parent 4d0700ae0991221fd9357d04b1c02c1a78109ffc Fill in the uncompressed size during revlog.addgroup This uses code from Matt to calculate the size change that would result from applying a delta to keep an accurate running total of the text size during revlog.addgroup diff -r 4d0700ae0991 -r 441ea218414e mercurial/mdiff.py --- a/mercurial/mdiff.py Tue Apr 04 16:38:44 2006 -0400 +++ b/mercurial/mdiff.py Tue Apr 04 16:38:44 2006 -0400 @@ -192,4 +192,5 @@ return mpatch.patches(a, [bin]) patches = mpatch.patches +patchedsize = mpatch.patchedsize textdiff = bdiff.bdiff diff -r 4d0700ae0991 -r 441ea218414e mercurial/mpatch.c --- a/mercurial/mpatch.c Tue Apr 04 16:38:44 2006 -0400 +++ b/mercurial/mpatch.c Tue Apr 04 16:38:44 2006 -0400 @@ -354,8 +354,44 @@ return result; } +/* calculate size of a patched file directly */ +static PyObject * +patchedsize(PyObject *self, PyObject *args) +{ + long orig, start, end, len, outlen = 0, last = 0; + int patchlen; + char *bin, *binend; + char decode[12]; /* for dealing with alignment issues */ + + if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen)) + return NULL; + + binend = bin + patchlen; + + while (bin < binend) { + memcpy(decode, bin, 12); + start = ntohl(*(uint32_t *)decode); + end = ntohl(*(uint32_t *)(decode + 4)); + len = ntohl(*(uint32_t *)(decode + 8)); + bin += 12 + len; + outlen += start - last; + last = end; + outlen += len; + } + + if (bin != binend) { + if (!PyErr_Occurred()) + PyErr_SetString(mpatch_Error, "patch cannot be decoded"); + return NULL; + } + + outlen += orig - last; + return Py_BuildValue("l", outlen); +} + static PyMethodDef methods[] = { {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, + {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"}, {NULL, NULL} }; diff -r 4d0700ae0991 -r 441ea218414e mercurial/revlog.py --- a/mercurial/revlog.py Tue Apr 04 16:38:44 2006 -0400 +++ b/mercurial/revlog.py Tue Apr 04 16:38:44 2006 -0400 @@ -342,8 +342,40 @@ if self.version != 0: return self.ngoffset(self.index[rev][0]) return self.index[rev][0] + def end(self, rev): return self.start(rev) + self.length(rev) + def size(self, rev): + """return the length of the uncompressed text for a given revision""" + l = -1 + if self.version != 0: + l = self.index[rev][2] + if l >= 0: + return l + + t = self.revision(self.node(rev)) + return len(t) + + # alternate implementation, The advantage to this code is it + # will be faster for a single revision. But, the results are not + # cached, so finding the size of every revision will be slower. + """ + if self.cache and self.cache[1] == rev: + return len(self.cache[2]) + + base = self.base(rev) + if self.cache and self.cache[1] >= base and self.cache[1] < rev: + base = self.cache[1] + text = self.cache[2] + else: + text = self.revision(self.node(base)) + + l = len(text) + for x in xrange(base + 1, rev + 1): + l = mdiff.patchedsize(l, self.chunk(x)) + return l + """ + def length(self, rev): if rev < 0: return 0 @@ -904,7 +936,7 @@ node = None base = prev = -1 - start = end = measure = 0 + start = end = textlen = 0 if r: end = self.end(t) @@ -949,8 +981,9 @@ if chain == prev: tempd = compress(delta) cdelta = tempd[0] + tempd[1] + textlen = mdiff.patchedsize(textlen, delta) - if chain != prev or (end - start + len(cdelta)) > measure * 2: + if chain != prev or (end - start + len(cdelta)) > textlen * 2: # flush our writes here so we can read it in revision if dfh: dfh.flush() @@ -960,12 +993,12 @@ chk = self.addrevision(text, transaction, link, p1, p2) if chk != node: raise RevlogError(_("consistency error adding group")) - measure = len(text) + textlen = len(text) else: if self.version == 0: e = (end, len(cdelta), base, link, p1, p2, node) else: - e = (self.offset_type(end, 0), len(cdelta), -1, base, + e = (self.offset_type(end, 0), len(cdelta), textlen, base, link, self.rev(p1), self.rev(p2), node) self.index.append(e) self.nodemap[node] = r