changeset 2089:cb99c711c59f

make appendfile simpler so it does not break with revlogng on windows. it used to cache open files. this made revlogng break because it wants to rename files when splitting .i into .i/.d, but cannot rename or unlink open files on windows. new code is bit slower, but safe on linux and windows. proper fix for too many open/close of changelog/manifest belongs in different place. can get 10% speed improvement back.
author Vadim Gelfer <vadim.gelfer@gmail.com>
date Wed, 19 Apr 2006 08:33:46 -0700
parents f16435b45780
children eb40db373717
files mercurial/appendfile.py mercurial/revlog.py
diffstat 2 files changed, 52 insertions(+), 88 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/appendfile.py	Wed Apr 19 07:09:15 2006 +0200
+++ b/mercurial/appendfile.py	Wed Apr 19 08:33:46 2006 -0700
@@ -6,7 +6,7 @@
 # of the GNU General Public License, incorporated herein by reference.
 
 from demandload import *
-demandload(globals(), "cStringIO changelog manifest os tempfile")
+demandload(globals(), "cStringIO changelog errno manifest os tempfile")
 
 # writes to metadata files are ordered.  reads: changelog, manifest,
 # normal files.  writes: normal files, manifest, changelog.
@@ -33,18 +33,32 @@
     file and temp file.  readers cannot see appended data until
     writedata called.'''
 
-    def __init__(self, fp):
-        fd, self.tmpname = tempfile.mkstemp()
-        self.tmpfp = os.fdopen(fd, 'ab+')
+    def __init__(self, fp, tmpname):
+        if tmpname:
+            self.tmpname = tmpname
+            self.tmpfp = open(self.tmpname, 'ab+')
+        else:
+            fd, self.tmpname = tempfile.mkstemp()
+            self.tmpfp = os.fdopen(fd, 'ab+')
         self.realfp = fp
         self.offset = fp.tell()
         # real file is not written by anyone else. cache its size so
         # seek and read can be fast.
-        self.fpsize = os.fstat(fp.fileno()).st_size
+        self.realsize = os.fstat(fp.fileno()).st_size
 
     def end(self):
         self.tmpfp.flush() # make sure the stat is correct
-        return self.fpsize + os.fstat(self.tmpfp.fileno()).st_size
+        return self.realsize + os.fstat(self.tmpfp.fileno()).st_size
+
+    def tell(self):
+        return self.offset
+
+    def flush(self):
+        self.tmpfp.flush()
+
+    def close(self):
+        self.realfp.close()
+        self.tmpfp.close()
 
     def seek(self, offset, whence=0):
         '''virtual file offset spans real file and temp file.'''
@@ -55,16 +69,16 @@
         elif whence == 2:
             self.offset = self.end() + offset
 
-        if self.offset < self.fpsize:
+        if self.offset < self.realsize:
             self.realfp.seek(self.offset)
         else:
-            self.tmpfp.seek(self.offset - self.fpsize)
+            self.tmpfp.seek(self.offset - self.realsize)
 
     def read(self, count=-1):
         '''only trick here is reads that span real file and temp file.'''
         fp = cStringIO.StringIO()
         old_offset = self.offset
-        if self.offset < self.fpsize:
+        if self.offset < self.realsize:
             s = self.realfp.read(count)
             fp.write(s)
             self.offset += len(s)
@@ -72,7 +86,7 @@
                 count -= len(s)
         if count != 0:
             if old_offset != self.offset:
-                self.tmpfp.seek(self.offset - self.fpsize)
+                self.tmpfp.seek(self.offset - self.realsize)
             s = self.tmpfp.read(count)
             fp.write(s)
             self.offset += len(s)
@@ -83,98 +97,47 @@
         self.tmpfp.seek(0, 2)
         self.tmpfp.write(s)
         # all writes are appends, so offset must go to end of file.
-        self.offset = self.fpsize + self.tmpfp.tell()
-
-    def writedata(self):
-        '''copy data from temp file to real file.'''
-        self.tmpfp.seek(0)
-        s = self.tmpfp.read()
-        self.tmpfp.close()
-        self.realfp.seek(0, 2)
-        # small race here.  we write all new data in one call, but
-        # reader can see partial update due to python or os. file
-        # locking no help: slow, not portable, not reliable over nfs.
-        # only safe thing is write to temp file every time and rename,
-        # but performance bad when manifest or changelog gets big.
-        self.realfp.write(s)
-        self.realfp.close()
-
-    def __del__(self):
-        '''delete temp file even if exception raised.'''
-        try: os.unlink(self.tmpname)
-        except: pass
-
-class sharedfile(object):
-    '''let file objects share a single appendfile safely.  each
-    sharedfile has own offset, syncs up with appendfile offset before
-    read and after read and write.'''
-
-    def __init__(self, fp):
-        self.fp = fp
-        self.offset = 0
-
-    def tell(self):
-        return self.offset
-
-    def seek(self, offset, whence=0):
-        if whence == 0:
-            self.offset = offset
-        elif whence == 1:
-            self.offset += offset
-        elif whence == 2:
-            self.offset = self.fp.end() + offset
-
-    def read(self, count=-1):
-        try:
-            if self.offset != self.fp.offset:
-                self.fp.seek(self.offset)
-            return self.fp.read(count)
-        finally:
-            self.offset = self.fp.offset
-
-    def write(self, s):
-        try:
-            return self.fp.write(s)
-        finally:
-            self.offset = self.fp.offset
-
-    def close(self):
-        # revlog wants this.
-        pass
-
-    def flush(self):
-        # revlog wants this.
-        pass
-
-    def writedata(self):
-        self.fp.writedata()
+        self.offset = self.realsize + self.tmpfp.tell()
 
 class appendopener(object):
     '''special opener for files that only read or append.'''
 
     def __init__(self, opener):
         self.realopener = opener
-        # key: file name, value: appendfile object
-        self.fps = {}
+        # key: file name, value: appendfile name
+        self.tmpnames = {}
 
     def __call__(self, name, mode='r'):
-        '''open file.  return same cached appendfile object for every
-        later call.'''
+        '''open file.'''
 
         assert mode in 'ra+'
-        fp = self.fps.get(name)
-        if fp is None:
-            fp = appendfile(self.realopener(name, 'a+'))
-            self.fps[name] = fp
-        return sharedfile(fp)
+        try:
+            realfp = self.realopener(name, 'r')
+        except IOError, err:
+            if err.errno != errno.ENOENT: raise
+            realfp = self.realopener(name, 'w+')
+        tmpname = self.tmpnames.get(name)
+        fp = appendfile(realfp, tmpname)
+        if tmpname is None:
+            self.tmpnames[name] = fp.tmpname
+        return fp
 
     def writedata(self):
         '''copy data from temp files to real files.'''
         # write .d file before .i file.
-        fps = self.fps.items()
-        fps.sort()
-        for name, fp in fps:
-            fp.writedata()
+        tmpnames = self.tmpnames.items()
+        tmpnames.sort()
+        for name, tmpname in tmpnames:
+            fp = open(tmpname, 'rb')
+            s = fp.read()
+            fp.close()
+            fp = self.realopener(name, 'a')
+            fp.write(s)
+            fp.close()
+
+    def __del__(self):
+        for tmpname in self.tmpnames.itervalues():
+            os.unlink(tmpname)
 
 # files for changelog and manifest are in different appendopeners, so
 # not mixed up together.
--- a/mercurial/revlog.py	Wed Apr 19 07:09:15 2006 +0200
+++ b/mercurial/revlog.py	Wed Apr 19 08:33:46 2006 -0700
@@ -928,6 +928,7 @@
             if data[0]:
                 f.write(data[0])
             f.write(data[1])
+            f.close()
             f = self.opener(self.indexfile, "a")
         else:
             f = self.opener(self.indexfile, "a+")