# HG changeset patch # User Matt Mackall # Date 1174624664 18000 # Node ID cd7b36b7869cda3f0b98b7913540c3aa7a421de4 # Parent bdbfc2193524232076844c04b9df6f5588b9c8fe restructure changelog file appending - make appending code proper part of changelog with delayupdate/finalize - use simplified appender that tracks pending data in memory - eliminate old appendfile and helper classes - update addchangegroup to use new interface and reuse the existing changelog diff -r bdbfc2193524 -r cd7b36b7869c mercurial/appendfile.py --- a/mercurial/appendfile.py Thu Mar 22 20:10:46 2007 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,152 +0,0 @@ -# appendfile.py - special classes to make repo updates atomic -# -# Copyright 2006 Vadim Gelfer -# -# This software may be used and distributed according to the terms -# of the GNU General Public License, incorporated herein by reference. - -import cStringIO, changelog, errno, manifest, os, tempfile, util - -# writes to metadata files are ordered. reads: changelog, manifest, -# normal files. writes: normal files, manifest, changelog. - -# manifest contains pointers to offsets in normal files. changelog -# contains pointers to offsets in manifest. if reader reads old -# changelog while manifest or normal files are written, it has no -# pointers into new parts of those files that are maybe not consistent -# yet, so will not read them. - -# localrepo.addchangegroup thinks it writes changelog first, then -# manifest, then normal files (this is order they are available, and -# needed for computing linkrev fields), but uses appendfile to hide -# updates from readers. data not written to manifest or changelog -# until all normal files updated. write manifest first, then -# changelog. - -# with this write ordering, readers cannot see inconsistent view of -# repo during update. - -class appendfile(object): - '''implement enough of file protocol to append to revlog file. - appended data is written to temp file. reads and seeks span real - file and temp file. readers cannot see appended data until - writedata called.''' - - def __init__(self, fp, tmpname): - if tmpname: - self.tmpname = tmpname - self.tmpfp = util.posixfile(self.tmpname, 'ab+') - else: - fd, self.tmpname = tempfile.mkstemp(prefix="hg-appendfile-") - os.close(fd) - self.tmpfp = util.posixfile(self.tmpname, 'ab+') - self.realfp = fp - self.offset = fp.tell() - # real file is not written by anyone else. cache its size so - # seek and read can be fast. - self.realsize = util.fstat(fp).st_size - self.name = fp.name - - def end(self): - self.tmpfp.flush() # make sure the stat is correct - return self.realsize + util.fstat(self.tmpfp).st_size - - def tell(self): - return self.offset - - def flush(self): - self.tmpfp.flush() - - def close(self): - self.realfp.close() - self.tmpfp.close() - - def seek(self, offset, whence=0): - '''virtual file offset spans real file and temp file.''' - if whence == 0: - self.offset = offset - elif whence == 1: - self.offset += offset - elif whence == 2: - self.offset = self.end() + offset - - if self.offset < self.realsize: - self.realfp.seek(self.offset) - else: - self.tmpfp.seek(self.offset - self.realsize) - - def read(self, count=-1): - '''only trick here is reads that span real file and temp file.''' - fp = cStringIO.StringIO() - old_offset = self.offset - if self.offset < self.realsize: - s = self.realfp.read(count) - fp.write(s) - self.offset += len(s) - if count > 0: - count -= len(s) - if count != 0: - if old_offset != self.offset: - self.tmpfp.seek(self.offset - self.realsize) - s = self.tmpfp.read(count) - fp.write(s) - self.offset += len(s) - return fp.getvalue() - - def write(self, s): - '''append to temp file.''' - self.tmpfp.seek(0, 2) - self.tmpfp.write(s) - # all writes are appends, so offset must go to end of file. - self.offset = self.realsize + self.tmpfp.tell() - -class appendopener(object): - '''special opener for files that only read or append.''' - - def __init__(self, opener): - self.realopener = opener - self.tmpname = None - - def __call__(self, name, mode='r'): - '''open file.''' - # only handle .i file - if not name.endswith("."): - return self.realopener(name, mode) - assert mode in 'ra+' - try: - realfp = self.realopener(name, 'r') - except IOError, err: - if err.errno != errno.ENOENT: raise - self.realfp = self.realopener(name, 'w+') - fp = appendfile(realfp, self.tmpname) - if tmpname is None: - self.tmpname = fp.tmpname - self.name = name - return fp - - def writedata(self): - '''copy data from temp files to real files.''' - if not self.tmpname: - return - ifp = open(self.tmpname, 'rb') - ofp = self.realopener(self.name, 'a') - for chunk in util.filechunkiter(ifp): - ofp.write(chunk) - ifp.close() - os.unlink(self.tmpname) - ofp.close() - - def cleanup(self): - '''delete temp files (this discards unwritten data!)''' - if self.tmpname: - os.unlink(self.tmpname) - -# files for changelog and manifest are in different appendopeners, so -# not mixed up together. - -class appendchangelog(changelog.changelog, appendopener): - def __init__(self, opener): - appendopener.__init__(self, opener) - changelog.changelog.__init__(self, self) - def checkinlinesize(self, fp, tr): - return diff -r bdbfc2193524 -r cd7b36b7869c mercurial/changelog.py --- a/mercurial/changelog.py Thu Mar 22 20:10:46 2007 -0500 +++ b/mercurial/changelog.py Thu Mar 22 23:37:44 2007 -0500 @@ -26,10 +26,89 @@ def _string_unescape(text): return text.decode('string_escape') +class appender: + '''the changelog index must be update last on disk, so we use this class + to delay writes to it''' + def __init__(self, fp, buf): + self.data = buf + self.fp = fp + self.offset = fp.tell() + self.size = util.fstat(fp).st_size + + def end(self): + return self.size + len("".join(self.data)) + def tell(self): + return self.offset + def flush(self): + pass + def close(self): + close(self.fp) + + def seek(self, offset, whence=0): + '''virtual file offset spans real file and data''' + if whence == 0: + self.offset = offset + elif whence == 1: + self.offset += offset + elif whence == 2: + self.offset = self.end() + offset + if self.offset < self.size: + self.fp.seek(self.offset) + + def read(self, count=-1): + '''only trick here is reads that span real file and data''' + ret = "" + old_offset = self.offset + if self.offset < self.size: + s = self.fp.read(count) + ret = s + self.offset += len(s) + if count > 0: + count -= len(s) + if count != 0: + doff = self.offset - self.size + self.data.insert(0, "".join(self.data)) + del self.data[1:] + s = self.data[0][doff:doff+count] + self.offset += len(s) + ret += s + return ret + + def write(self, s): + self.data.append(s) + self.offset += len(s) + class changelog(revlog): def __init__(self, opener): revlog.__init__(self, opener, "00changelog.i") + def delayupdate(self): + "delay visibility of index updates to other readers" + self._realopener = self.opener + self.opener = self._appendopener + self._delaybuf = [] + + def finalize(self, tr): + "finalize index updates" + self.opener = self._realopener + if self._delaybuf: + fp = self.opener(self.indexfile, 'a') + fp.write("".join(self._delaybuf)) + fp.close() + del self._delaybuf + self.checkinlinesize(tr) + + def _appendopener(self, name, mode='r'): + fp = self._realopener(name, mode) + if not name == self.indexfile: + return fp + return appender(fp, self._delaybuf) + + def checkinlinesize(self, tr, fp=None): + if self.opener == self._appendopener: + return + return revlog.checkinlinesize(self, tr, fp) + def decode_extra(self, text): extra = {} for l in text.split('\0'): diff -r bdbfc2193524 -r cd7b36b7869c mercurial/localrepo.py --- a/mercurial/localrepo.py Thu Mar 22 20:10:46 2007 -0500 +++ b/mercurial/localrepo.py Thu Mar 22 23:37:44 2007 -0500 @@ -7,7 +7,7 @@ from node import * from i18n import _ -import repo, appendfile, changegroup +import repo, changegroup import changelog, dirstate, filelog, manifest, context import re, lock, transaction, tempfile, stat, mdiff, errno, ui import os, revlog, time, util @@ -1782,52 +1782,45 @@ # write changelog data to temp files so concurrent readers will not see # inconsistent view - cl = None - try: - cl = appendfile.appendchangelog(self.sopener) - oldheads = len(cl.heads()) + cl = self.changelog + cl.delayupdate() + oldheads = len(cl.heads()) + + # pull off the changeset group + self.ui.status(_("adding changesets\n")) + cor = cl.count() - 1 + chunkiter = changegroup.chunkiter(source) + if cl.addgroup(chunkiter, csmap, tr, 1) is None: + raise util.Abort(_("received changelog group is empty")) + cnr = cl.count() - 1 + changesets = cnr - cor - # pull off the changeset group - self.ui.status(_("adding changesets\n")) - cor = cl.count() - 1 - chunkiter = changegroup.chunkiter(source) - if cl.addgroup(chunkiter, csmap, tr, 1) is None: - raise util.Abort(_("received changelog group is empty")) - cnr = cl.count() - 1 - changesets = cnr - cor + # pull off the manifest group + self.ui.status(_("adding manifests\n")) + chunkiter = changegroup.chunkiter(source) + # no need to check for empty manifest group here: + # if the result of the merge of 1 and 2 is the same in 3 and 4, + # no new manifest will be created and the manifest group will + # be empty during the pull + self.manifest.addgroup(chunkiter, revmap, tr) - # pull off the manifest group - self.ui.status(_("adding manifests\n")) + # process the files + self.ui.status(_("adding file changes\n")) + while 1: + f = changegroup.getchunk(source) + if not f: + break + self.ui.debug(_("adding %s revisions\n") % f) + fl = self.file(f) + o = fl.count() chunkiter = changegroup.chunkiter(source) - # no need to check for empty manifest group here: - # if the result of the merge of 1 and 2 is the same in 3 and 4, - # no new manifest will be created and the manifest group will - # be empty during the pull - self.manifest.addgroup(chunkiter, revmap, tr) - - # process the files - self.ui.status(_("adding file changes\n")) - while 1: - f = changegroup.getchunk(source) - if not f: - break - self.ui.debug(_("adding %s revisions\n") % f) - fl = self.file(f) - o = fl.count() - chunkiter = changegroup.chunkiter(source) - if fl.addgroup(chunkiter, revmap, tr) is None: - raise util.Abort(_("received file revlog group is empty")) - revisions += fl.count() - o - files += 1 - - cl.writedata() - finally: - if cl: - cl.cleanup() + if fl.addgroup(chunkiter, revmap, tr) is None: + raise util.Abort(_("received file revlog group is empty")) + revisions += fl.count() - o + files += 1 # make changelog see real files again - self.changelog = changelog.changelog(self.sopener) - self.changelog.checkinlinesize(tr) + cl.finalize(tr) newheads = len(self.changelog.heads()) heads = ""